def crawl_account_hierarchy(provider_uuid=None): """Crawl top level accounts to discover hierarchy.""" if provider_uuid: _, polling_accounts = Orchestrator.get_accounts( provider_uuid=provider_uuid) else: _, polling_accounts = Orchestrator.get_accounts() LOG.info("Account hierarchy crawler found %s accounts to scan" % len(polling_accounts)) processed = 0 skipped = 0 for account in polling_accounts: crawler = None # Look for a known crawler class to handle this provider if account.get("provider_type") == Provider.PROVIDER_AWS: crawler = AWSOrgUnitCrawler(account) if crawler: LOG.info( "Starting account hierarchy crawler for type {} with provider_uuid: {}" .format(account.get("provider_type"), account.get("provider_uuid"))) crawler.crawl_account_hierarchy() processed += 1 else: LOG.info( "No known crawler for account with provider_uuid: {} of type {}" .format(account.get("provider_uuid"), account.get("provider_type"))) skipped += 1 LOG.info( f"Account hierarchy crawler finished. {processed} processed and {skipped} skipped" )
def test_prepare_w_exception(self, mock_task, mock_labeler, mock_inspect): """Test that Orchestrator.prepare() handles broad exceptions.""" orchestrator = Orchestrator() orchestrator.prepare() mock_task.assert_called() mock_labeler.assert_not_called()
def test_prepare_w_no_manifest_found(self, mock_task, mock_labeler, mock_inspect): """Test that Orchestrator.prepare() is skipped when no manifest is found.""" orchestrator = Orchestrator() orchestrator.prepare() mock_task.assert_not_called() mock_labeler.assert_not_called()
def test_prepare_no_accounts(self, mock_downloader, mock_accounts_accessor): """Test downloading cost usage reports.""" orchestrator = Orchestrator() reports = orchestrator.prepare() self.assertIsNone(reports)
def test_start_manifest_processing(self, mock_download_manifest, mock_task): """Test start_manifest_processing.""" test_matrix = [ {"mock_downloader_manifest": {}, "expect_chord_called": False}, { "mock_downloader_manifest": { "manifest_id": 1, "files": [{"local_file": "file1.csv", "key": "filekey"}], }, "expect_chord_called": True, }, ] for test in test_matrix: mock_download_manifest.return_value = test.get("mock_downloader_manifest") orchestrator = Orchestrator() account = self.mock_accounts[0] orchestrator.start_manifest_processing( account.get("customer_name"), account.get("authentication"), account.get("billing_source"), "AWS-local", account.get("schema_name"), account.get("provider_uuid"), DateAccessor().get_billing_months(1)[0], ) if test.get("expect_chord_called"): mock_task.assert_called() else: mock_task.assert_not_called()
def test_prepare_w_status_backoff(self, mock_task, mock_accessor): """Test that Orchestrator.prepare() is skipped when backing off.""" mock_accessor.is_valid.return_value = False mock_accessor.is_backing_off.return_value = True orchestrator = Orchestrator() orchestrator.prepare() mock_task.assert_not_called()
def test_prepare_w_downloader_error(self, mock_task, mock_labeler, mock_inspect): """Test that Orchestrator.prepare() handles downloader errors.""" orchestrator = Orchestrator() orchestrator.prepare() mock_task.assert_called() mock_labeler.assert_not_called()
def test_prepare_no_accounts(self, mock_downloader, mock_accounts_accessor, mock_inspect, mock_account_labler): """Test downloading cost usage reports.""" orchestrator = Orchestrator() reports = orchestrator.prepare() self.assertIsNone(reports) mock_account_labler.assert_not_called()
def test_prepare_w_manifest_processing_successful(self, mock_task, mock_labeler, mock_inspect): """Test that Orchestrator.prepare() works when manifest processing is successful.""" mock_labeler().get_label_details.return_value = (True, True) orchestrator = Orchestrator() orchestrator.prepare() mock_labeler.assert_called()
def test_remove_expired_report_data_no_accounts(self, mock_task, mock_remover, mock_accessor): """Test removing expired report data with no accounts.""" expected_results = [{"account_payer_id": "999999999", "billing_period_start": "2018-06-24 15:47:33.052509"}] mock_remover.return_value = expected_results mock_accessor.return_value = [] orchestrator = Orchestrator() results = orchestrator.remove_expired_report_data() self.assertEqual(results, [])
def test_prepare_w_status_valid(self, mock_task, mock_accessor, mock_labeler): """Test that Orchestrator.prepare() works when status is valid.""" mock_labeler().get_label_details.return_value = (True, True) mock_accessor().is_valid.return_value = True mock_accessor().is_backing_off.return_value = False orchestrator = Orchestrator() orchestrator.prepare() mock_task.assert_called()
def test_start_manifest_processing_priority_queue(self, mock_download_manifest, mock_task, mock_inspect): """Test start_manifest_processing using priority queue.""" test_queues = [ { "name": "qe-account", "provider_uuid": str(uuid4()), "queue-name": "priority", "expected": "priority" }, { "name": "qe-account", "provider_uuid": None, "queue-name": "priority", "expected": "summary" }, { "name": "qe-account", "provider_uuid": str(uuid4()), "queue-name": None, "expected": "summary" }, ] mock_manifest = { "mock_downloader_manifest": { "manifest_id": 1, "files": [{ "local_file": "file1.csv", "key": "filekey" }] } } for test in test_queues: with self.subTest(test=test.get("name")): mock_download_manifest.return_value = mock_manifest.get( "mock_downloader_manifest") orchestrator = Orchestrator( provider_uuid=test.get("provider_uuid"), queue_name=test.get("queue-name")) account = self.mock_accounts[0] orchestrator.start_manifest_processing( account.get("customer_name"), account.get("credentials"), account.get("data_source"), "AWS-local", account.get("schema_name"), account.get("provider_uuid"), DateAccessor().get_billing_months(1)[0], ) actual_queue = mock_task.call_args.args[1].options.get("queue") self.assertEqual(actual_queue, test.get("expected"))
def expired_data(): """Return expired data.""" simulate = True if request.method == 'DELETE' and Config.DEBUG: simulate = False LOG.info('Simulate Flag: %s', simulate) orchestrator = Orchestrator() async_delete_results = orchestrator.remove_expired_report_data(simulate=simulate) response_key = 'Async jobs for expired data removal' if simulate: response_key = response_key + ' (simulated)' return jsonify({response_key: str(async_delete_results)})
def expired_data(request): """Return expired data.""" simulate = True if request.method == "DELETE" and Config.DEBUG: simulate = False LOG.info("Simulate Flag: %s", simulate) orchestrator = Orchestrator() async_delete_results = orchestrator.remove_expired_report_data( simulate=simulate) response_key = "Async jobs for expired data removal" if simulate: response_key = response_key + " (simulated)" return Response({response_key: str(async_delete_results)})
def test_start_manifest_processing_in_progress(self, mock_record_report_status, mock_download_manifest, mock_task): """Test start_manifest_processing with report in progressed.""" orchestrator = Orchestrator() account = self.mock_accounts[0] orchestrator.start_manifest_processing( account.get("customer_name"), account.get("authentication"), account.get("billing_source"), "AWS-local", account.get("schema_name"), account.get("provider_uuid"), DateAccessor().get_billing_months(1)[0], ) mock_task.assert_not_called()
def test_remove_expired_report_data(self, mock_task, mock_remover): """Test removing expired report data.""" expected_results = [{"account_payer_id": "999999999", "billing_period_start": "2018-06-24 15:47:33.052509"}] mock_remover.return_value = expected_results expected = "INFO:masu.processor.orchestrator:Expired data removal queued - schema_name: acct10001, Task ID: {}" # unset disabling all logging below CRITICAL from masu/__init__.py logging.disable(logging.NOTSET) with self.assertLogs("masu.processor.orchestrator", level="INFO") as logger: orchestrator = Orchestrator() results = orchestrator.remove_expired_report_data() self.assertTrue(results) self.assertEqual(len(results), 4) async_id = results.pop().get("async_id") self.assertIn(expected.format(async_id), logger.output)
def test_init_all_accounts_error(self, mock_accessor): """Test initializing orchestrator accounts error.""" mock_accessor.side_effect = AccountsAccessorError("Sample timeout error") try: Orchestrator() except Exception: self.fail("unexpected error")
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() # Deduplicate schema_name since accounts may have the same schema_name but different providers schemas = set(account['schema_name'] for account in accounts) for schema in schemas: for table in table_export_settings: # Upload this month's reports query_and_upload_to_s3(schema, table, (curr_month_first_day, curr_month_last_day)) # Upload last month's reports query_and_upload_to_s3(schema, table, (prev_month_first_day, prev_month_last_day))
def test_crawl_account_hierarchy_without_provider_uuid(self, mock_crawler): """Test that all polling accounts for user are used when no provider_uuid is provided.""" _, polling_accounts = Orchestrator.get_accounts() mock_crawler.crawl_account_hierarchy.return_value = True with self.assertLogs("masu.celery.tasks", "INFO") as captured_logs: tasks.crawl_account_hierarchy() expected_log_msg = "Account hierarchy crawler found %s accounts to scan" % (len(polling_accounts)) self.assertIn(expected_log_msg, captured_logs.output[0])
def test_start_manifest_processing_already_progressed( self, mock_record_report_status, mock_download_manifest, mock_task, mock_inspect): """Test start_manifest_processing with report already processed.""" orchestrator = Orchestrator() account = self.mock_accounts[0] orchestrator.start_manifest_processing( account.get("customer_name"), account.get("credentials"), account.get("data_source"), "AWS-local", account.get("schema_name"), account.get("provider_uuid"), DateAccessor().get_billing_months(1)[0], ) mock_task.assert_not_called()
def test_init_with_billing_source(self, mock_accessor): """Test initializing orchestrator with forced billing source.""" mock_accessor.return_value = self.mock_accounts fake_source = random.choice(self.mock_accounts) individual = Orchestrator(fake_source.get("billing_source")) self.assertEqual(len(individual._accounts), 1) found_account = individual._accounts[0] self.assertEqual(found_account.get("billing_source"), fake_source.get("billing_source"))
def test_initializer(self, mock_inspect): """Test to init.""" orchestrator = Orchestrator() provider_count = Provider.objects.filter(active=True).count() if len(orchestrator._accounts) != provider_count: self.fail("Unexpected number of test accounts") for account in orchestrator._accounts: with self.subTest(provider_type=account.get("provider_type")): if account.get("provider_type") in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): self.assertEqual(account.get("credentials"), self.aws_credentials) self.assertEqual(account.get("data_source"), self.aws_data_source) self.assertEqual(account.get("customer_name"), self.schema) elif account.get("provider_type") == Provider.PROVIDER_OCP: self.assertIn(account.get("credentials"), self.ocp_credentials) self.assertEqual(account.get("data_source"), self.ocp_data_source) self.assertEqual(account.get("customer_name"), self.schema) elif account.get("provider_type") in ( Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): self.assertEqual(account.get("credentials"), self.azure_credentials) self.assertEqual(account.get("data_source"), self.azure_data_source) self.assertEqual(account.get("customer_name"), self.schema) else: self.fail("Unexpected provider") if len(orchestrator._polling_accounts) != 2: self.fail("Unexpected number of listener test accounts") for account in orchestrator._polling_accounts: with self.subTest(provider_type=account.get("provider_type")): if account.get("provider_type") in ( Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): self.assertEqual(account.get("credentials"), self.aws_credentials) self.assertEqual(account.get("data_source"), self.aws_data_source) self.assertEqual(account.get("customer_name"), self.schema) elif account.get("provider_type") in ( Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): self.assertEqual(account.get("credentials"), self.azure_credentials) self.assertEqual(account.get("data_source"), self.azure_data_source) self.assertEqual(account.get("customer_name"), self.schema) else: self.fail("Unexpected provider")
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" LOG.info('Beginning upload_normalized_data') curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info( 'processing schema %s provider uuid %s', account['schema_name'], account['provider_uuid'], ) for table in table_export_settings: # Celery does not serialize named tuples, convert it # to a dict before handing it off to the celery task. table_dict = dictify_table_export_settings(table) # Upload this month's reports query_and_upload_to_s3.delay( account['schema_name'], account['provider_uuid'], table_dict, curr_month_first_day, curr_month_last_day, ) # Upload last month's reports query_and_upload_to_s3.delay( account['schema_name'], account['provider_uuid'], table_dict, prev_month_first_day, prev_month_last_day, ) LOG.info('Completed upload_normalized_data')
def post_notification(): """Packages response for class-based view.""" header_list = request.headers.to_wsgi_list() body = request.data.decode('utf-8') logger.debug('Received Header: %s', str(request.headers)) logger.debug('Received Body: %s', str(body)) notified_billing_source = None try: handler = NotificationHandler(header_list, body) notified_billing_source = handler.billing_source() except NotificationHandlerError as error: logger.error(str(error)) except NotificationHandlerFilter as info: logger.info(str(info)) if notified_billing_source: orchestrator = Orchestrator(notified_billing_source) orchestrator.prepare() return ('', 204)
def test_get_reports(self, fake_accessor, mock_inspect): """Test get_reports for combinations of setup_complete and ingest override.""" initial_month_qty = Config.INITIAL_INGEST_NUM_MONTHS test_matrix = [ { "get_setup_complete": True, "ingest_override": True, "test_months": 5, "expected_month_length": 5 }, { "get_setup_complete": False, "ingest_override": True, "test_months": 5, "expected_month_length": 5 }, { "get_setup_complete": True, "ingest_override": False, "test_months": 5, "expected_month_length": 2 }, { "get_setup_complete": False, "ingest_override": False, "test_months": 5, "expected_month_length": 5 }, ] for test in test_matrix: test_months = test.get("test_months") fake_accessor.return_value = test.get("get_setup_complete") Config.INGEST_OVERRIDE = test.get("ingest_override") Config.INITIAL_INGEST_NUM_MONTHS = test_months orchestrator = Orchestrator() months = orchestrator.get_reports(self.aws_provider_uuid) self.assertEqual(test.get("expected_month_length"), len(months)) Config.INGEST_OVERRIDE = False Config.INITIAL_INGEST_NUM_MONTHS = initial_month_qty
def test_remove_expired_report_data(self, mock_task, mock_remover): """Test removing expired report data.""" expected_results = [{ 'account_payer_id': '999999999', 'billing_period_start': '2018-06-24 15:47:33.052509', }] mock_remover.return_value = expected_results expected = 'INFO:masu.processor.orchestrator:Expired data removal queued - customer: acct10001, Task ID: {}' # unset disabling all logging below CRITICAL from masu/__init__.py logging.disable(logging.NOTSET) with self.assertLogs('masu.processor.orchestrator', level='INFO') as logger: orchestrator = Orchestrator() results = orchestrator.remove_expired_report_data() self.assertTrue(results) self.assertEqual(len(results), 2) async_id = results.pop().get('async_id') self.assertIn(expected.format(async_id), logger.output)
def test_initializer(self): """Test to init.""" orchestrator = Orchestrator() provider_count = Provider.objects.count() if len(orchestrator._accounts) != provider_count: self.fail("Unexpected number of test accounts") for account in orchestrator._accounts: if account.get("provider_type") in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): self.assertEqual(account.get("authentication"), self.aws_provider_resource_name) self.assertEqual(account.get("billing_source"), self.aws_billing_source) self.assertEqual(account.get("customer_name"), self.schema) elif account.get("provider_type") == Provider.PROVIDER_OCP: self.assertIn(account.get("authentication"), self.ocp_provider_resource_names) self.assertEqual(account.get("billing_source"), self.ocp_billing_source) self.assertEqual(account.get("customer_name"), self.schema) elif account.get("provider_type") in ( Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): self.assertEqual(account.get("authentication"), self.azure_credentials) self.assertEqual(account.get("billing_source"), self.azure_data_source) self.assertEqual(account.get("customer_name"), self.schema) else: self.fail("Unexpected provider") if len(orchestrator._polling_accounts) != 2: self.fail("Unexpected number of listener test accounts") for account in orchestrator._polling_accounts: if account.get("provider_type") in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): self.assertEqual(account.get("authentication"), self.aws_provider_resource_name) self.assertEqual(account.get("billing_source"), self.aws_billing_source) self.assertEqual(account.get("customer_name"), self.schema) elif account.get("provider_type") in ( Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): self.assertEqual(account.get("authentication"), self.azure_credentials) self.assertEqual(account.get("billing_source"), self.azure_data_source) self.assertEqual(account.get("customer_name"), self.schema) else: self.fail("Unexpected provider")
def test_initializer(self): """Test to init""" orchestrator = Orchestrator() if len(orchestrator._accounts) != 1: self.fail("Unexpected number of test accounts") account = orchestrator._accounts.pop() self.assertEqual(account.get('authentication'), 'arn:aws:iam::111111111111:role/CostManagement') self.assertEqual(account.get('billing_source'), 'test-bucket') self.assertEqual(account.get('customer_name'), 'Test Customer') self.assertEqual(account.get('provider_type'), AMAZON_WEB_SERVICES)
def expired_data(request): """Return expired data.""" simulate = True if request.method == "DELETE" and Config.DEBUG: simulate = False LOG.info("Simulate Flag: %s", simulate) params = request.query_params acceptabools = ["true", "false"] line_items_only = params.get("line_items_only", "false").lower() if line_items_only not in acceptabools: errmsg = "The param line_items_only must be {}.".format( str(acceptabools)) return Response({"Error": errmsg}, status=status.HTTP_400_BAD_REQUEST) line_items_only = json.loads(line_items_only) orchestrator = Orchestrator() async_delete_results = orchestrator.remove_expired_report_data( simulate=simulate, line_items_only=line_items_only) response_key = "Async jobs for expired data removal" if simulate: response_key = response_key + " (simulated)" return Response({response_key: str(async_delete_results)})
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" if not settings.ENABLE_S3_ARCHIVING: LOG.info("S3 Archiving is disabled. Not running task.") return LOG.info("Beginning upload_normalized_data") curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info("processing schema %s provider uuid %s", account["schema_name"], account["provider_uuid"]) for table in table_export_settings: # Celery does not serialize named tuples, convert it # to a dict before handing it off to the celery task. table_dict = dictify_table_export_settings(table) # Upload this month's reports query_and_upload_to_s3.delay(account["schema_name"], account["provider_uuid"], table_dict, curr_month_first_day, curr_month_last_day) # Upload last month's reports query_and_upload_to_s3.delay(account["schema_name"], account["provider_uuid"], table_dict, prev_month_first_day, prev_month_last_day) LOG.info("Completed upload_normalized_data")