def crawl_account_hierarchy(provider_uuid=None): """Crawl top level accounts to discover hierarchy.""" if provider_uuid: _, polling_accounts = Orchestrator.get_accounts( provider_uuid=provider_uuid) else: _, polling_accounts = Orchestrator.get_accounts() LOG.info("Account hierarchy crawler found %s accounts to scan" % len(polling_accounts)) processed = 0 skipped = 0 for account in polling_accounts: crawler = None # Look for a known crawler class to handle this provider if account.get("provider_type") == Provider.PROVIDER_AWS: crawler = AWSOrgUnitCrawler(account) if crawler: LOG.info( "Starting account hierarchy crawler for type {} with provider_uuid: {}" .format(account.get("provider_type"), account.get("provider_uuid"))) crawler.crawl_account_hierarchy() processed += 1 else: LOG.info( "No known crawler for account with provider_uuid: {} of type {}" .format(account.get("provider_uuid"), account.get("provider_type"))) skipped += 1 LOG.info( f"Account hierarchy crawler finished. {processed} processed and {skipped} skipped" )
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() # Deduplicate schema_name since accounts may have the same schema_name but different providers schemas = set(account['schema_name'] for account in accounts) for schema in schemas: for table in table_export_settings: # Upload this month's reports query_and_upload_to_s3(schema, table, (curr_month_first_day, curr_month_last_day)) # Upload last month's reports query_and_upload_to_s3(schema, table, (prev_month_first_day, prev_month_last_day))
def test_crawl_account_hierarchy_without_provider_uuid(self, mock_crawler): """Test that all polling accounts for user are used when no provider_uuid is provided.""" _, polling_accounts = Orchestrator.get_accounts() mock_crawler.crawl_account_hierarchy.return_value = True with self.assertLogs("masu.celery.tasks", "INFO") as captured_logs: tasks.crawl_account_hierarchy() expected_log_msg = "Account hierarchy crawler found %s accounts to scan" % (len(polling_accounts)) self.assertIn(expected_log_msg, captured_logs.output[0])
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" LOG.info('Beginning upload_normalized_data') curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info( 'processing schema %s provider uuid %s', account['schema_name'], account['provider_uuid'], ) for table in table_export_settings: # Celery does not serialize named tuples, convert it # to a dict before handing it off to the celery task. table_dict = dictify_table_export_settings(table) # Upload this month's reports query_and_upload_to_s3.delay( account['schema_name'], account['provider_uuid'], table_dict, curr_month_first_day, curr_month_last_day, ) # Upload last month's reports query_and_upload_to_s3.delay( account['schema_name'], account['provider_uuid'], table_dict, prev_month_first_day, prev_month_last_day, ) LOG.info('Completed upload_normalized_data')
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" if not settings.ENABLE_S3_ARCHIVING: LOG.info("S3 Archiving is disabled. Not running task.") return LOG.info("Beginning upload_normalized_data") curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info("processing schema %s provider uuid %s", account["schema_name"], account["provider_uuid"]) for table in table_export_settings: # Celery does not serialize named tuples, convert it # to a dict before handing it off to the celery task. table_dict = dictify_table_export_settings(table) # Upload this month's reports query_and_upload_to_s3.delay(account["schema_name"], account["provider_uuid"], table_dict, curr_month_first_day, curr_month_last_day) # Upload last month's reports query_and_upload_to_s3.delay(account["schema_name"], account["provider_uuid"], table_dict, prev_month_first_day, prev_month_last_day) LOG.info("Completed upload_normalized_data")
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" log_uuid = str(uuid.uuid4()) LOG.info('%s Beginning upload_normalized_data', log_uuid) curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info( '%s processing schema %s provider uuid %s', log_uuid, account['schema_name'], account['provider_uuid'], ) for table in table_export_settings: # Upload this month's reports query_and_upload_to_s3( account['schema_name'], account['provider_uuid'], table, (curr_month_first_day, curr_month_last_day), ) # Upload last month's reports query_and_upload_to_s3( account['schema_name'], account['provider_uuid'], table, (prev_month_first_day, prev_month_last_day), ) LOG.info('%s Completed upload_normalized_data', log_uuid)