Exemple #1
0
def crawl_account_hierarchy(provider_uuid=None):
    """Crawl top level accounts to discover hierarchy."""
    if provider_uuid:
        _, polling_accounts = Orchestrator.get_accounts(
            provider_uuid=provider_uuid)
    else:
        _, polling_accounts = Orchestrator.get_accounts()
    LOG.info("Account hierarchy crawler found %s accounts to scan" %
             len(polling_accounts))
    processed = 0
    skipped = 0
    for account in polling_accounts:
        crawler = None

        # Look for a known crawler class to handle this provider
        if account.get("provider_type") == Provider.PROVIDER_AWS:
            crawler = AWSOrgUnitCrawler(account)

        if crawler:
            LOG.info(
                "Starting account hierarchy crawler for type {} with provider_uuid: {}"
                .format(account.get("provider_type"),
                        account.get("provider_uuid")))
            crawler.crawl_account_hierarchy()
            processed += 1
        else:
            LOG.info(
                "No known crawler for account with provider_uuid: {} of type {}"
                .format(account.get("provider_uuid"),
                        account.get("provider_type")))
            skipped += 1
    LOG.info(
        f"Account hierarchy crawler finished. {processed} processed and {skipped} skipped"
    )
Exemple #2
0
def upload_normalized_data():
    """Scheduled task to export normalized data to s3."""
    curr_date = DateAccessor().today()
    curr_month_range = calendar.monthrange(curr_date.year, curr_date.month)
    curr_month_first_day = date(year=curr_date.year,
                                month=curr_date.month,
                                day=1)
    curr_month_last_day = date(year=curr_date.year,
                               month=curr_date.month,
                               day=curr_month_range[1])

    previous_month = curr_date - relativedelta(months=1)

    prev_month_range = calendar.monthrange(previous_month.year,
                                           previous_month.month)
    prev_month_first_day = date(year=previous_month.year,
                                month=previous_month.month,
                                day=1)
    prev_month_last_day = date(year=previous_month.year,
                               month=previous_month.month,
                               day=prev_month_range[1])

    accounts, _ = Orchestrator.get_accounts()

    # Deduplicate schema_name since accounts may have the same schema_name but different providers
    schemas = set(account['schema_name'] for account in accounts)
    for schema in schemas:
        for table in table_export_settings:
            # Upload this month's reports
            query_and_upload_to_s3(schema, table,
                                   (curr_month_first_day, curr_month_last_day))

            # Upload last month's reports
            query_and_upload_to_s3(schema, table,
                                   (prev_month_first_day, prev_month_last_day))
Exemple #3
0
 def test_crawl_account_hierarchy_without_provider_uuid(self, mock_crawler):
     """Test that all polling accounts for user are used when no provider_uuid is provided."""
     _, polling_accounts = Orchestrator.get_accounts()
     mock_crawler.crawl_account_hierarchy.return_value = True
     with self.assertLogs("masu.celery.tasks", "INFO") as captured_logs:
         tasks.crawl_account_hierarchy()
         expected_log_msg = "Account hierarchy crawler found %s accounts to scan" % (len(polling_accounts))
         self.assertIn(expected_log_msg, captured_logs.output[0])
Exemple #4
0
def upload_normalized_data():
    """Scheduled task to export normalized data to s3."""
    LOG.info('Beginning upload_normalized_data')
    curr_date = DateAccessor().today()
    curr_month_range = calendar.monthrange(curr_date.year, curr_date.month)
    curr_month_first_day = date(year=curr_date.year,
                                month=curr_date.month,
                                day=1)
    curr_month_last_day = date(year=curr_date.year,
                               month=curr_date.month,
                               day=curr_month_range[1])

    previous_month = curr_date - relativedelta(months=1)

    prev_month_range = calendar.monthrange(previous_month.year,
                                           previous_month.month)
    prev_month_first_day = date(year=previous_month.year,
                                month=previous_month.month,
                                day=1)
    prev_month_last_day = date(year=previous_month.year,
                               month=previous_month.month,
                               day=prev_month_range[1])

    accounts, _ = Orchestrator.get_accounts()

    for account in accounts:
        LOG.info(
            'processing schema %s provider uuid %s',
            account['schema_name'],
            account['provider_uuid'],
        )
        for table in table_export_settings:

            # Celery does not serialize named tuples, convert it
            # to a dict before handing it off to the celery task.
            table_dict = dictify_table_export_settings(table)

            # Upload this month's reports
            query_and_upload_to_s3.delay(
                account['schema_name'],
                account['provider_uuid'],
                table_dict,
                curr_month_first_day,
                curr_month_last_day,
            )

            # Upload last month's reports
            query_and_upload_to_s3.delay(
                account['schema_name'],
                account['provider_uuid'],
                table_dict,
                prev_month_first_day,
                prev_month_last_day,
            )
    LOG.info('Completed upload_normalized_data')
Exemple #5
0
def upload_normalized_data():
    """Scheduled task to export normalized data to s3."""
    if not settings.ENABLE_S3_ARCHIVING:
        LOG.info("S3 Archiving is disabled. Not running task.")
        return

    LOG.info("Beginning upload_normalized_data")
    curr_date = DateAccessor().today()
    curr_month_range = calendar.monthrange(curr_date.year, curr_date.month)
    curr_month_first_day = date(year=curr_date.year,
                                month=curr_date.month,
                                day=1)
    curr_month_last_day = date(year=curr_date.year,
                               month=curr_date.month,
                               day=curr_month_range[1])

    previous_month = curr_date - relativedelta(months=1)

    prev_month_range = calendar.monthrange(previous_month.year,
                                           previous_month.month)
    prev_month_first_day = date(year=previous_month.year,
                                month=previous_month.month,
                                day=1)
    prev_month_last_day = date(year=previous_month.year,
                               month=previous_month.month,
                               day=prev_month_range[1])

    accounts, _ = Orchestrator.get_accounts()

    for account in accounts:
        LOG.info("processing schema %s provider uuid %s",
                 account["schema_name"], account["provider_uuid"])
        for table in table_export_settings:

            # Celery does not serialize named tuples, convert it
            # to a dict before handing it off to the celery task.
            table_dict = dictify_table_export_settings(table)

            # Upload this month's reports
            query_and_upload_to_s3.delay(account["schema_name"],
                                         account["provider_uuid"], table_dict,
                                         curr_month_first_day,
                                         curr_month_last_day)

            # Upload last month's reports
            query_and_upload_to_s3.delay(account["schema_name"],
                                         account["provider_uuid"], table_dict,
                                         prev_month_first_day,
                                         prev_month_last_day)
    LOG.info("Completed upload_normalized_data")
Exemple #6
0
def upload_normalized_data():
    """Scheduled task to export normalized data to s3."""
    log_uuid = str(uuid.uuid4())
    LOG.info('%s Beginning upload_normalized_data', log_uuid)
    curr_date = DateAccessor().today()
    curr_month_range = calendar.monthrange(curr_date.year, curr_date.month)
    curr_month_first_day = date(year=curr_date.year,
                                month=curr_date.month,
                                day=1)
    curr_month_last_day = date(year=curr_date.year,
                               month=curr_date.month,
                               day=curr_month_range[1])

    previous_month = curr_date - relativedelta(months=1)

    prev_month_range = calendar.monthrange(previous_month.year,
                                           previous_month.month)
    prev_month_first_day = date(year=previous_month.year,
                                month=previous_month.month,
                                day=1)
    prev_month_last_day = date(year=previous_month.year,
                               month=previous_month.month,
                               day=prev_month_range[1])

    accounts, _ = Orchestrator.get_accounts()

    for account in accounts:
        LOG.info(
            '%s processing schema %s provider uuid %s',
            log_uuid,
            account['schema_name'],
            account['provider_uuid'],
        )
        for table in table_export_settings:
            # Upload this month's reports
            query_and_upload_to_s3(
                account['schema_name'],
                account['provider_uuid'],
                table,
                (curr_month_first_day, curr_month_last_day),
            )

            # Upload last month's reports
            query_and_upload_to_s3(
                account['schema_name'],
                account['provider_uuid'],
                table,
                (prev_month_first_day, prev_month_last_day),
            )
    LOG.info('%s Completed upload_normalized_data', log_uuid)