def main(): ''' main function ''' options = parse_args(sys.argv[1:]) setup_logging(config_uri) settings = get_appsettings(config_uri) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) price = lookup_price(options) if options.pricing == 'OnDemand': for prc in price: for key, val in prc.items(): print '%s %s in %s: %s per %s' % (options.pricing, options.instance_type, options.region, val, key) elif options.pricing == 'Reserved': for prc in sorted(price, key=lambda x: x['PurchaseOption']): print '%s %s' % (prc.pop('LeaseContractLength'), prc.pop('PurchaseOption')) for key, val in prc.items(): if isinstance(val, dict): for unit, rate in val.iteritems(): print '\t%s - %.4f %s' % (key, float(rate), unit) else: print '\t%s - %s' % (key, val) print ''
def setUp(self): self.config = testing.setUp() from sqlalchemy import create_engine engine = create_engine('sqlite://') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine)
def setUp(self): self.config = testing.setUp() from sqlalchemy import create_engine engine = create_engine('sqlite://') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) import json with open(self.testpath + '/' + self.testfilename, 'w+') as fh: json.dump(json.loads(self.testdata), fh)
def setUp(self): self.config = testing.setUp() from sqlalchemy import create_engine engine = create_engine('sqlite://') from budget.models import (Base, DBSession, AwsAccountMetadata) DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: meta = AwsAccountMetadata(account_id=0, account_name='account name', tags="Lorem,ipsum,dolor,sit,amet") DBSession.add(meta)
def setUp(self): self.config = testing.setUp() from budget.models import Base, AwsInstanceInventory from sqlalchemy import create_engine engine = create_engine('sqlite://') #FIxME if DBSession: DBSession.remove() DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: data = AwsInstanceInventory(name='instance1', environment='test', instance_id='i-1234abcd', instance_type='m42.ultrahuge', availability_zone='area-51', account=1234567890, status='running', launch_date=yesterday, check_date=today) DBSession.add(data) data = AwsInstanceInventory(name='instance2', environment='test', instance_id='i-4567hijk', instance_type='m42.ultrahuge', availability_zone='area-51', account=1234567890, status='stopped', launch_date=yesterday, check_date=today) DBSession.add(data)
def setUp(self): self.config = testing.setUp() from sqlalchemy import create_engine engine = create_engine('sqlite://') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: node = Openshift3Node( collection_date = yesterday, create_date = yesterday, end_date = None, uid = '12345678-1234-5678-1234-567812345678', status = ''' status: addresses: - address: 10.0.0.1 type: InternalIP - address: 10.0.0.2 type: ExternalIP allocatable: cpu: "1" memory: 1024Ki pods: "10" capacity: cpu: "1" memory: 1024Ki pods: "10" conditions: - lastHeartbeatTime: 2001-01-01T12:01:00Z lastTransitionTime: 2001-01-01T12:01:00Z message: kubelet is posting ready status reason: KubeletReady status: "True" type: Ready ''', meta = ''' metadata: creationTimestamp: 2001-01-01T12:00:00Z labels: color: red type: compute name: test uid: 12345678-1234-5678-1234-567812345678 ''', cluster_id = 'test' ) DBSession.add(node)
def runTest(self): from budget.scripts.openshift_v3_stats import expire expire(DBSession, Openshift3Node, ['12345678-1234-5678-1234-567812345678']) result = DBSession.query(Openshift3Node.uid, Openshift3Node.end_date).all() self.assertEqual(result[0], ('12345678-1234-5678-1234-567812345678',None)) self.assertEqual(result[1][0], '23456789-2345-6789-2345-678923456789') self.assertLessEqual((datetime.now()-result[1][1]).total_seconds(), 2)
def runTest(self): from budget.scripts.gcp_billing_import import run settings = {'cache.dir': os.path.dirname(__file__)} options = {'nocacheupdate': True} run(settings, options) result = DBSession.query(GcpLineItem).all() self.assertEqual(result[0].cost_amount, 1.234567)
def setUp(self): self.config = testing.setUp() from sqlalchemy import create_engine engine = create_engine('sqlite://') from budget.models import ( Base, AwsCostAllocation, ) DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: cost = AwsCostAllocation( invoice_id = 'invoice id', payer_account_id = 0, linked_account_id = 0, record_type = 'AccountTotal', record_id = 0, billing_period_start_date = yesterday, billing_period_end_date = today, invoice_date = today, payer_account_name = 'payer account name', linked_account_name = 'linked account name', taxation_address = 'tax address', payer_po_number = 0, product_code = 'product code', product_name = 'product name', seller_of_record = 'seller', usage_type = 'usage type', operation = 'operation', availability_zone = 'availability zone', rate_id = 0, item_description = 'description', usage_start_date = yesterday, usage_end_date = today, usage_quantity = 1.0001, blended_rate = 2.0002, currency_code = 'currency', cost_before_tax = 3.0003, credits = 4.0004, tax_amount = 5.0005, tax_type = 'tax', total_cost = 6.0006, user_environment = 'env', user_node = 'node' ) DBSession.add(cost)
def setUp(self): self.config = testing.setUp() from sqlalchemy import create_engine engine = create_engine('sqlite://') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: node = Openshift3Node(collection_date=yesterday, create_date=yesterday, end_date=None, uid='12345678-1234-5678-1234-567812345678', status=''' status: addresses: - address: 10.0.0.1 type: InternalIP - address: 10.0.0.2 type: ExternalIP allocatable: cpu: "1" memory: 1024Ki pods: "10" capacity: cpu: "1" memory: 1024Ki pods: "10" conditions: - lastHeartbeatTime: 2001-01-01T12:01:00Z lastTransitionTime: 2001-01-01T12:01:00Z message: kubelet is posting ready status reason: KubeletReady status: "True" type: Ready ''', meta=''' metadata: creationTimestamp: 2001-01-01T12:00:00Z labels: color: red type: compute name: test uid: 12345678-1234-5678-1234-567812345678 ''', cluster_id='test') DBSession.add(node)
def setUp(self): self.config = testing.setUp() from budget.views.reservation import DataHolder self.tup = ('m4.xlarge', 'us-east-1a') dummy_inst = DataHolder( instance_type = self.tup[0], availability_zone = self.tup[1], ) dummy_rsrv = DataHolder( instance_type = self.tup[0], availability_zone = self.tup[1], instance_count = 5 ) self.dh = DataHolder( instance_type=self.tup[0], availability_zone=self.tup[1], instances={ '1234567890' : [ dummy_inst ] }, reservations={ '1234567890' : [ dummy_rsrv ] }, account='1234567890' ) from budget.models import Base, AwsPrice, AwsProduct from sqlalchemy import create_engine engine = create_engine('sqlite://') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: data = AwsPrice( sku = 'test', offer_term_code = 'test1', price_dimensions = '{"test1.test1.test1": {"pricePerUnit": {"USD": "1"}, "appliesTo": [], "rateCode": "test.test.2TG2D8R56U", "unit": "Quantity", "description": "Upfront Fee"}, "test.test.test": {"description": "Linux/UNIX (Amazon VPC), m4.xlarge instance-hours used this month", "pricePerUnit": {"USD": "0.1234000000"}, "rateCode": "test.test.test", "endRange": "Inf", "beginRange": "0", "appliesTo": [], "unit": "Hrs"}}', term_attributes = '{"LeaseContractLength": "1yr", "PurchaseOption": "Partial Upfront"}', json = 'test1.test1' ) DBSession.add(data) data = AwsPrice( sku = 'test', offer_term_code = 'test2', price_dimensions = '{"test.test2.test2": {"description": "$0.79 per On Demand Linux m4.xlarge Instance Hour", "pricePerUnit": {"USD": "0.4567800000"}, "rateCode": "test.test2.test2", "endRange": "Inf", "beginRange": "0", "appliesTo": [], "unit": "Hrs"}}', term_attributes = '{}', json = 'test.test2' ) DBSession.add(data) data = AwsProduct( sku = 'test', location = 'US East (N. Virginia)', instance_type = 'm4.xlarge', current_generation = True, tenancy = 'Shared', usage_type = 'test:test', operation = 'test:test', operating_system = 'Linux', json = '{"sku": "test", "productFamily": "Compute Instance", "attributes": {"enhancedNetworkingSupported": "Yes", "networkPerformance": "High", "preInstalledSw": "NA", "instanceFamily": "Storage optimized", "vcpu": "9000", "locationType": "AWS Region", "usagetype": "test:test"", "storage": "100 x 100 MFM", "currentGeneration": "Yes", "operatingSystem": "Linux", "processorArchitecture": "8-bit", "tenancy": "Shared", "licenseModel": "No License required", "servicecode": "test", "memory": "1 ZiB", "processorFeatures": "Shiny; Hot; Metal", "clockSpeed": "0.1 MHz", "operation": "testStuff", "physicalProcessor": "IBM 8088", "instanceType": "m4.xlarge", "location": "US East (N. Virginia)"}}' ) DBSession.add(data)
def insert_data(): filename = cache_dir+'/other_expenses.csv' data = [] with open(filename, 'rb') as csvfile: reader = csv.reader(csvfile) header = reader.next() # pop the header for row in reader: for idx,el in enumerate(row): if idx > 0 and float(el) > 0.0: data.append(ExpensedCost( vendor=header[idx], invoice_date=datetime.strptime(row[0],'%Y-%m-%d'), amount=el )) DBSession.add_all(data) transaction.commit()
def main(argv=sys.argv): if len(argv) < 2: usage(argv) config_uri = argv[1] options = parse_vars(argv[2:]) setup_logging(config_uri) global log log = logging.getLogger(__name__) settings = get_appsettings(config_uri, options=options) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) global cache_dir cache_dir = settings['cache.dir'] insert_data()
def runTest(self): import os cache_dir = os.path.dirname(__file__) + "/gcp" from budget.scripts.gcp_billing_import import insert_data filename = 'gcp-billing-2001-01-01.json' insert_data(filename, cache_dir) result = DBSession.query(GcpLineItem).all() self.assertEqual(result[0].cost_amount, 1.234567)
def setUp(self): self.config = testing.setUp() from budget.views.reservation import DataHolder self.tup = ('m4.xlarge', 'us-east-1a') dummy_inst = DataHolder( instance_type=self.tup[0], availability_zone=self.tup[1], ) dummy_rsrv = DataHolder(instance_type=self.tup[0], availability_zone=self.tup[1], instance_count=5) self.dh = DataHolder(instance_type=self.tup[0], availability_zone=self.tup[1], instances={'1234567890': [dummy_inst]}, reservations={'1234567890': [dummy_rsrv]}, account='1234567890') from budget.models import Base, AwsPrice, AwsProduct from sqlalchemy import create_engine engine = create_engine('sqlite://') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: data = AwsPrice( sku='test', offer_term_code='test1', price_dimensions= '{"test1.test1.test1": {"pricePerUnit": {"USD": "1"}, "appliesTo": [], "rateCode": "test.test.2TG2D8R56U", "unit": "Quantity", "description": "Upfront Fee"}, "test.test.test": {"description": "Linux/UNIX (Amazon VPC), m4.xlarge instance-hours used this month", "pricePerUnit": {"USD": "0.1234000000"}, "rateCode": "test.test.test", "endRange": "Inf", "beginRange": "0", "appliesTo": [], "unit": "Hrs"}}', term_attributes= '{"LeaseContractLength": "1yr", "PurchaseOption": "Partial Upfront"}', json='test1.test1') DBSession.add(data) data = AwsPrice( sku='test', offer_term_code='test2', price_dimensions= '{"test.test2.test2": {"description": "$0.79 per On Demand Linux m4.xlarge Instance Hour", "pricePerUnit": {"USD": "0.4567800000"}, "rateCode": "test.test2.test2", "endRange": "Inf", "beginRange": "0", "appliesTo": [], "unit": "Hrs"}}', term_attributes='{}', json='test.test2') DBSession.add(data) data = AwsProduct( sku='test', location='US East (N. Virginia)', instance_type='m4.xlarge', current_generation=True, tenancy='Shared', usage_type='test:test', operation='test:test', operating_system='Linux', json= '{"sku": "test", "productFamily": "Compute Instance", "attributes": {"enhancedNetworkingSupported": "Yes", "networkPerformance": "High", "preInstalledSw": "NA", "instanceFamily": "Storage optimized", "vcpu": "9000", "locationType": "AWS Region", "usagetype": "test:test"", "storage": "100 x 100 MFM", "currentGeneration": "Yes", "operatingSystem": "Linux", "processorArchitecture": "8-bit", "tenancy": "Shared", "licenseModel": "No License required", "servicecode": "test", "memory": "1 ZiB", "processorFeatures": "Shiny; Hot; Metal", "clockSpeed": "0.1 MHz", "operation": "testStuff", "physicalProcessor": "IBM 8088", "instanceType": "m4.xlarge", "location": "US East (N. Virginia)"}}' ) DBSession.add(data)
def lookup_price(options): ''' Digs through a massive nest of json data to extract the on demand pricing for AWS instances. See also: https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/price-changes.html Params: instance_type: any valid AWS instance size. (e.g. 'm4.xlarge') region: an AWS region endpoint name. (e.g. 'us-east-1') tenancy: 'Shared' or 'Dedicated' pricing: 'OnDemand' or 'Reserved' lease_contract_length: '1yr' or '3yr' purchase_option: 'No Upfront' or 'Partial Upfront' or 'Full Upfront' Returns: dict: key - 'Hrs' or 'Quantity' value - Decimal ''' region_name = region_lookup(options.region) products = DBSession.query(\ AwsPrice.price_dimensions, AwsPrice.term_attributes ).filter(\ AwsProduct.instance_type == options.instance_type, AwsProduct.location == region_name, AwsProduct.tenancy == options.tenancy, AwsProduct.operating_system == options.operating_system, AwsPrice.sku == AwsProduct.sku ).all() costs = [] for prd in products: price_dimensions = json.loads(prd[0]) term_attributes = json.loads(prd[1]) if options.pricing == 'OnDemand': rgx = re.compile(r'On Demand %s %s' % (options.operating_system, options.instance_type)) costs.append(_find_cost(rgx, price_dimensions)) elif options.pricing == 'Reserved': # On-Demand has no term_attributes if term_attributes == {}: continue for _, val in price_dimensions.items(): term_attributes.update( {val['description']: val['pricePerUnit']}) costs.append(term_attributes) return costs
def runTest(self): from budget.scripts.openshift_v3_stats import expire expire(DBSession, Openshift3Node, ['12345678-1234-5678-1234-567812345678']) result = DBSession.query(Openshift3Node.uid, Openshift3Node.end_date).all() self.assertEqual(result[0], ('12345678-1234-5678-1234-567812345678', None)) self.assertEqual(result[1][0], '23456789-2345-6789-2345-678923456789') self.assertLessEqual((datetime.now() - result[1][1]).total_seconds(), 2)
def main(args=sys.argv): options = parse_args(args[1:]) setup_logging(config_uri) global log log = logging.getLogger(__name__) settings = get_appsettings(config_uri) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) price = lookup_price(options) if options.pricing == 'OnDemand': for p in price: for k,v in p.items(): print '%s %s in %s: %s per %s' % (options.pricing, options.instance_type, options.region, v,k) elif options.pricing == 'Reserved': for p in sorted(price, key=lambda x: x['PurchaseOption']): print '%s %s' % (p.pop('LeaseContractLength'), p.pop('PurchaseOption')) for k,v in p.items(): print '\t%s - %s %s' % (k, v.items()[0][1], v.items()[0][0])
def lookup_price(options): ''' Digs through a massive nest of json data to extract the on demand pricing for AWS instances. See also: https://docs.aws.amazon.com/awsaccountbilling/latest/aboutv2/price-changes.html Params: instance_type: any valid AWS instance size. (e.g. 'm4.xlarge') region: an AWS region endpoint name. (e.g. 'us-east-1') tenancy: 'Shared' or 'Dedicated' pricing: 'OnDemand' or 'Reserved' lease_contract_length: '1yr' or '3yr' purchase_option: 'No Upfront' or 'Partial Upfront' or 'Full Upfront' Returns: dict: key - 'Hrs' or 'Quantity' value - Decimal ''' region_name = region_lookup(options.region) products = DBSession.query(\ AwsPrice.price_dimensions, AwsPrice.term_attributes ).filter(\ AwsProduct.instance_type == options.instance_type, AwsProduct.location == region_name, AwsProduct.tenancy == options.tenancy, AwsProduct.operating_system == options.operating_system, AwsPrice.sku == AwsProduct.sku ).all() costs = [] for prd in products: price_dimensions = json.loads(prd[0]) term_attributes = json.loads(prd[1]) if options.pricing == 'OnDemand': rgx = re.compile(r'On Demand %s %s' % (options.operating_system, options.instance_type)) costs.append(_find_cost(rgx, price_dimensions)) elif options.pricing == 'Reserved': # On-Demand has no term_attributes if term_attributes == {}: continue for _, val in price_dimensions.items(): term_attributes.update({val['description']:val['pricePerUnit']}) costs.append(term_attributes) return costs
def setUp(self): self.config = testing.setUp() from budget.models import Base, AwsReservationInventory from sqlalchemy import create_engine engine = create_engine('sqlite://') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: data = AwsReservationInventory( reservation_id='r-1234abcd', instance_type='instance type', availability_zone='availability_zone', account=1234567890, purchase_date=yesterday, expiration_date=tomorrow, instance_count=314159) DBSession.add(data) data = AwsReservationInventory( reservation_id='r-4567hijk', instance_type='instance type', availability_zone='availability_zone', account=1234567890, purchase_date=yesterday, expiration_date=yesterday, instance_count=602214) DBSession.add(data)
def setUp(self): self.config = testing.setUp() from budget.models import Base, AwsReservationInventory from sqlalchemy import create_engine engine = create_engine('sqlite://') DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: data = AwsReservationInventory( reservation_id = 'r-1234abcd', instance_type = 'instance type', availability_zone = 'availability_zone', account = 1234567890, purchase_date = yesterday, expiration_date = tomorrow, instance_count = 314159 ) DBSession.add(data) data = AwsReservationInventory( reservation_id = 'r-4567hijk', instance_type = 'instance type', availability_zone = 'availability_zone', account = 1234567890, purchase_date = yesterday, expiration_date = yesterday, instance_count = 602214 ) DBSession.add(data)
def runTest(self): from budget.scripts.openshift_v3_stats import update yml = yaml.load(''' apiVersion: v1 items: - apiVersion: v1 kind: Node metadata: creationTimestamp: 2001-01-01T12:00:00Z labels: color: red type: compute name: test uid: 12345678-1234-5678-1234-567812345678 spec: externalID: i-123456789abcdef providerID: test:///test/i-123456789abcdef status: addresses: - address: 10.0.0.5 type: InternalIP - address: 10.0.0.6 type: ExternalIP allocatable: cpu: "2" memory: 2048Ki pods: "20" capacity: cpu: "2" memory: 2048Ki pods: "20" conditions: - lastHeartbeatTime: 2001-01-01T12:01:00Z lastTransitionTime: 2001-01-01T12:01:00Z message: kubelet is posting ready status reason: KubeletReady status: "True" type: Ready''') yaml_info = {'collection_date' : datetime.now(), 'cluster_id' : 'test'} lst = update(DBSession, Openshift3Node, yml, yaml_info) self.assertEqual(lst, ['12345678-1234-5678-1234-567812345678']) result = DBSession.query(Openshift3Node.uid).all() self.assertEqual(result, [(u'12345678-1234-5678-1234-567812345678',)])
def runTest(self): from budget.scripts.openshift_v3_stats import update yml = yaml.load(''' apiVersion: v1 items: - apiVersion: v1 kind: Node metadata: creationTimestamp: 2001-01-01T12:00:00Z labels: color: red type: compute name: test uid: 12345678-1234-5678-1234-567812345678 spec: externalID: i-123456789abcdef providerID: test:///test/i-123456789abcdef status: addresses: - address: 10.0.0.5 type: InternalIP - address: 10.0.0.6 type: ExternalIP allocatable: cpu: "2" memory: 2048Ki pods: "20" capacity: cpu: "2" memory: 2048Ki pods: "20" conditions: - lastHeartbeatTime: 2001-01-01T12:01:00Z lastTransitionTime: 2001-01-01T12:01:00Z message: kubelet is posting ready status reason: KubeletReady status: "True" type: Ready''') yaml_info = {'collection_date': datetime.now(), 'cluster_id': 'test'} lst = update(DBSession, Openshift3Node, yml, yaml_info) self.assertEqual(lst, ['12345678-1234-5678-1234-567812345678']) result = DBSession.query(Openshift3Node.uid).all() self.assertEqual(result, [(u'12345678-1234-5678-1234-567812345678', )])
def setUp(self): self.config = testing.setUp() from budget.models import Base, AwsInstanceInventory from sqlalchemy import create_engine engine = create_engine('sqlite://') #FIxME if DBSession: DBSession.remove() DBSession.configure(bind=engine) Base.metadata.drop_all(engine) Base.metadata.create_all(engine) with transaction.manager: data = AwsInstanceInventory( name = 'instance1', environment = 'test', instance_id = 'i-1234abcd', instance_type = 'm42.ultrahuge', availability_zone = 'area-51', account = 1234567890, status = 'running', launch_date = yesterday, check_date = today ) DBSession.add(data) data = AwsInstanceInventory( name = 'instance2', environment = 'test', instance_id = 'i-4567hijk', instance_type = 'm42.ultrahuge', availability_zone = 'area-51', account = 1234567890, status = 'stopped', launch_date = yesterday, check_date = today ) DBSession.add(data)
def tearDown(self): DBSession.remove() testing.tearDown()
def tearDown(self): from budget.models import DBSession DBSession.remove() testing.tearDown()
def tearDown(self): DBSession.remove() testing.tearDown() os.remove(self.testpath + '/' + self.testfilename)
def main(args): ''' entry point ''' if len(args) < 1: usage() selected = None if len(args) > 2: selected = args[2] if not os.path.exists(selected): usage() config_uri = args[1] options = parse_vars(args[3:]) settings = get_appsettings(config_uri, options=options) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) setup_logging(config_uri) global log log = logging.getLogger(__name__) global cache_dir cache_dir = settings['cache.dir'] + "/v3stats" # global to enable us to handle KeyboardInterrupts without leaving zombies around. global pool pool = Pool(processes=cpu_count()*2) if not selected: selected = select_latest() objects = [] pids = [] stats_path = extract_tarbz2(selected) for filename in os.listdir(stats_path+'/stats'): try: run = pool.apply_async(read_stats, (stats_path+'/stats', filename), callback=objects.append) pids.append(run) except Exception as exc: print exc log.debug(exc) raise # get the output of all our processes for pid in pids: pid.get() # ensure the sqlalchemy objects aren't garbage-collected before we commit them. # see: http://docs.sqlalchemy.org/en/latest/orm/session_state_management.html#session-referencing-behavior merged = [] uidlist = {} for uids, arglist in objects: for table, defaults, kwargs in arglist: if table in uidlist.keys(): uidlist[table].update(uids) else: uidlist[table] = uids obj = insert_or_update(DBSession, table, defaults=defaults, **kwargs) merged.append(DBSession.merge(obj)) try: transaction.commit() except IntegrityError as exc: DBSession.rollback() log.error(exc) pool.close() pool.join() for table in uidlist: rgx = re.compile(r'v3stats-(\d{4}-\d{2}-\d{2}).tar.bz2') scandate, = rgx.search(selected).groups() expire(DBSession, table, uidlist[table], scandate) shutil.rmtree(stats_path)
def main(args): ''' entry point ''' if len(args) < 1: usage() selected = None if len(args) > 2: selected = args[2] if not os.path.exists(selected): usage() config_uri = args[1] options = parse_vars(args[3:]) settings = get_appsettings(config_uri, options=options) engine = engine_from_config(settings, 'sqlalchemy.') DBSession.configure(bind=engine) setup_logging(config_uri) global log log = logging.getLogger(__name__) global cache_dir cache_dir = settings['cache.dir'] + "/v3stats" # global to enable us to handle KeyboardInterrupts without leaving zombies around. global pool pool = Pool(processes=cpu_count() * 2) if not selected: selected = select_latest() objects = [] pids = [] stats_path = extract_tarbz2(selected) for filename in os.listdir(stats_path + '/stats'): try: run = pool.apply_async(read_stats, (stats_path + '/stats', filename), callback=objects.append) pids.append(run) except Exception as exc: print exc log.debug(exc) raise # get the output of all our processes for pid in pids: pid.get() # ensure the sqlalchemy objects aren't garbage-collected before we commit them. # see: http://docs.sqlalchemy.org/en/latest/orm/session_state_management.html#session-referencing-behavior merged = [] uidlist = {} for uids, arglist in objects: for table, defaults, kwargs in arglist: if table in uidlist.keys(): uidlist[table].update(uids) else: uidlist[table] = uids obj = insert_or_update(DBSession, table, defaults=defaults, **kwargs) merged.append(DBSession.merge(obj)) try: transaction.commit() except IntegrityError as exc: DBSession.rollback() log.error(exc) pool.close() pool.join() for table in uidlist: rgx = re.compile(r'v3stats-(\d{4}-\d{2}-\d{2}).tar.bz2') scandate, = rgx.search(selected).groups() expire(DBSession, table, uidlist[table], scandate) shutil.rmtree(stats_path)