def test_populate_from_scope_record(self):

        scope_id = gen_string_id()
        sweep_id = gen_string_id()

        console_token = 'console token'
        platform_token = 'platform token'

        scope_record = AssetScope()
        scope_record.scope = scope_id
        scope_record.scope_api_token = console_token
        scope_record.set_cache(platform_tokens={platform_token})

        PlatformTokenManager.populate_from_scope_entity(scope_record, sweep_id)

        # now let's make sure we see those tokens:

        # Scope-centered jobs must result in scope-centered key for token storage
        job_scope = JobScope(sweep_id=sweep_id,
                             entity_type=Entity.Scope,
                             entity_id=scope_id)
        assert console_token == PlatformTokenManager.from_job_scope(
            job_scope).get_best_token()

        job_scope = JobScope(
            sweep_id=sweep_id,
            # uses .namespace default value as 2nd value in redis key. no need to set here.
        )
        assert platform_token == PlatformTokenManager.from_job_scope(
            job_scope).get_best_token()
Example #2
0
def _report_failure(job_scope: JobScope, start_time: float, exc: Exception,
                    **kwargs: Any):
    """Report task stats when task fails."""
    end_time = time.time()
    job_scope.running_time = math.ceil(end_time - start_time)
    job_scope.datapoint_count = kwargs.get('partial_datapoint_count')

    ErrorInspector.inspect(exc, job_scope.ad_account_id,
                           {'job_scope': job_scope})

    if isinstance(exc, FacebookRequestError):
        failure_status, failure_bucket = FacebookApiErrorInspector(
            exc).get_status_and_bucket()
    else:
        failure_status, failure_bucket = ExternalPlatformJobStatus.GenericError, FailureBucket.Other

    # No entity type means we don't know what table to target
    if failure_bucket == FailureBucket.InaccessibleObject and job_scope.entity_type is not None:
        set_inaccessible_entity_task.delay(job_scope)

    report_job_status_task.delay(failure_status, job_scope)
    PlatformTokenManager.from_job_scope(
        job_scope).report_usage_per_failure_bucket(job_scope.token,
                                                   failure_bucket)
    SweepStatusTracker(job_scope.sweep_id).report_status(failure_bucket)
    _send_measurement_task_runtime(job_scope, failure_bucket)
Example #3
0
def _report_success(job_scope: JobScope, start_time: float, ret_value: Any):
    """Report task stats when successful."""
    end_time = time.time()
    job_scope.running_time = math.ceil(end_time - start_time)

    if isinstance(ret_value, int):
        job_scope.datapoint_count = ret_value

    report_job_status_task.delay(ExternalPlatformJobStatus.Done, job_scope)
    SweepStatusTracker(job_scope.sweep_id).report_status(FailureBucket.Success)
    _send_measurement_task_runtime(job_scope, FailureBucket.Success)
def sync_expectations(job_scope: JobScope):
    assert (job_scope.report_type == ReportType.sync_expectations
            ), 'Only sync_expectations report type is processed by this task'

    if job_scope.ad_account_id:
        # this is per AA task. No need to iterate over all
        ad_account_ids_iter = [job_scope.ad_account_id]
    else:
        ad_account_ids_iter = expecations_store.iter_expectations_ad_accounts(
            sweep_id=job_scope.sweep_id)

    for ad_account_id in ad_account_ids_iter:

        ad_account_scoped_job_scope = JobScope(job_scope.to_dict(),
                                               ad_account_id=ad_account_id,
                                               entity_type=Entity.AdAccount,
                                               entity_id=ad_account_id)

        with ChunkDumpStore(ad_account_scoped_job_scope,
                            chunk_size=200) as store:

            job_ids_iter = expecations_store.iter_expectations_per_ad_account(
                ad_account_id, ad_account_scoped_job_scope.sweep_id)

            for job_id in job_ids_iter:
                job_id_parts = parse_id_parts(job_id)

                # default is platform namespace and we communicate out only those
                if job_id_parts.namespace == JobScope.namespace:
                    store({
                        'job_id':
                        job_id,
                        # 'status':'expected',
                        'account_id':
                        job_id_parts.ad_account_id,
                        'entity_type':
                        job_id_parts.entity_type,
                        'entity_id':
                        job_id_parts.entity_id,
                        'report_type':
                        job_id_parts.report_type,
                        'report_variant':
                        job_id_parts.report_variant,
                        'range_start':
                        _to_date_string_if_set(job_id_parts.range_start),
                        'range_end':
                        _to_date_string_if_set(job_id_parts.range_end),
                        'platform_namespace':
                        job_id_parts.namespace,
                    })
def iter_collect_entities_per_page_post(
        job_scope: JobScope) -> Generator[Dict[str, Any], None, None]:
    """
    Collects an arbitrary entity for a page post
    """
    entity_type = job_scope.report_variant

    page_token_manager = PageTokenManager.from_job_scope(job_scope)
    with PlatformApiContext(
            page_token_manager.get_best_token(
                job_scope.ad_account_id)) as fb_ctx:
        root_fb_entity = fb_ctx.to_fb_model(job_scope.entity_id,
                                            Entity.PagePost)

    entities = iter_native_entities_per_page_post(root_fb_entity, entity_type)

    record_id_base_data = job_scope.to_dict()
    record_id_base_data.update(entity_type=entity_type, report_variant=None)
    del record_id_base_data['entity_id']

    with ChunkDumpStore(job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store:
        for entity in entities:
            entity_data = entity.export_all_data()
            entity_data = add_vendor_data(entity_data,
                                          id=generate_universal_id(
                                              entity_id=entity_data.get('id'),
                                              **record_id_base_data))
            entity_data['page_id'] = job_scope.ad_account_id
            entity_data['page_post_id'] = job_scope.entity_id

            # Store the individual datum, use job context for the cold
            # storage thing to divine whatever it needs from the job context
            store(entity_data)

            yield entity_data
Example #6
0
def test_set_inaccessible_entity_task():
    mock_model = Mock()
    mock_factory = Mock(return_value=mock_model)
    with patch.dict('oozer.set_inaccessible_entity_task.ENTITY_TYPE_MODEL_MAP', {Entity.PagePost: mock_factory}):
        set_inaccessible_entity_task(JobScope(report_variant=Entity.PagePost))

        mock_model.update.assert_called_once_with(actions=[mock_factory.is_accessible.set(False)])
    def test_fails_without_a_token(self):
        job_scope = JobScope(tokens=[None],
                             report_time=datetime.utcnow(),
                             report_type='entity',
                             report_variant=Entity.Page,
                             sweep_id='1')

        with SweepRunningFlag(job_scope.sweep_id), mock.patch.object(
                report_job_status_task, 'delay') as status_task, mock.patch(
                    'common.error_inspector.BugSnagContextData.notify'
                ) as bugsnag_notify, mock.patch(
                    'common.error_inspector.API_KEY', 'something'):
            collect_pages_from_business_task(job_scope, JobContext())

            assert bugsnag_notify.called
            actual_args, actual_kwargs = bugsnag_notify.call_args
            assert (
                str(actual_args[0]) ==
                'Job fb||||entity|P cannot proceed. No platform tokens provided.'
            ), 'Notify bugsnag correctly using correct Exception'
            assert {
                'severity': SEVERITY_ERROR,
                'job_scope': job_scope,
                'error_type': ErrorTypesReport.UNKNOWN,
            } == actual_kwargs, 'Notify bugsnag correctly'
            assert status_task.called

            status_task_args, _ = status_task.call_args
            assert (
                JobStatus.GenericError, job_scope
            ) == status_task_args, 'Must report status correctly on failure'
    def test_fails_with_wrong_report_variant(self):
        job_scope = JobScope(tokens=['blah'],
                             report_time=datetime.utcnow(),
                             report_type='entity',
                             report_variant=None,
                             sweep_id='1')

        with SweepRunningFlag(job_scope.sweep_id), mock.patch.object(
                report_job_status_task, 'delay') as status_task, mock.patch(
                    'common.error_inspector.BugSnagContextData.notify'
                ) as bugsnag_notify, mock.patch(
                    'common.error_inspector.API_KEY', 'something'):
            collect_pages_from_business_task(job_scope, JobContext())

            assert bugsnag_notify.called
            actual_args, actual_kwargs = bugsnag_notify.call_args
            assert (isinstance(actual_args[0], ValueError) and str(
                actual_args[0]) == 'Report level None specified is not: P'
                    ), 'Notify bugsnag correctly using correct Exception'
            assert {
                'severity': SEVERITY_ERROR,
                'job_scope': job_scope,
                'error_type': ErrorTypesReport.UNKNOWN,
            } == actual_kwargs, 'Notify bugsnag correctly'
            assert status_task.called
            parameters, _ = status_task.call_args
            assert (JobStatus.GenericError, job_scope
                    ) == parameters, 'Must report status correctly on failure'
    def test_lifetime_campaigns(self):

        job_scope = JobScope(
            ad_account_id=AD_ACCOUNT,
            report_type=ReportType.lifetime,
            report_variant=Entity.Campaign,
            sweep_id='sweep',
            tokens=[TOKEN],
        )

        captured_data = []  # type: List[Tuple[Dict, JobScope, int]]

        def _store(data, job_scope, chunk_marker=0):
            captured_data.append((data, job_scope, chunk_marker))

        with mock.patch.object(cold_storage, 'store', _store):
            data_iter = Insights.iter_collect_insights(job_scope, None)
            cnt = 0
            for datum in data_iter:
                cnt += 1
                if cnt == 4:
                    break

        assert cnt == 4

        for datum, job_scope_inner, _ in captured_data:
            assert datum['campaign_id'] == job_scope_inner.entity_id
def collect_page(job_scope: JobScope, _job_context: JobContext):
    """
    Collect a single facebook page
    """
    if job_scope.report_variant != Entity.Page:
        raise ValueError(
            f"Report level {job_scope.report_variant} specified is not: {Entity.Page}"
        )

    token = job_scope.token
    if not token:
        raise ValueError(
            f"Job {job_scope.job_id} cannot proceed. No platform tokens provided."
        )

    # We don't use it for getting a token. Something else that calls us does.
    # However, we use it to report usages of the token we got.
    token_manager = PlatformTokenManager.from_job_scope(job_scope)

    with PlatformApiContext(token) as fb_ctx:
        page_inst = page.Page(fbid=job_scope.entity_id, api=fb_ctx.api)
        page_fetched = page_inst.api_get(fields=get_default_fields(Page))
        report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched,
                                     job_scope)
        token_manager.report_usage(token, 2)

        record_id_data = job_scope.to_dict()
        record_id_data.update(entity_type=Entity.Page,
                              entity_id=job_scope.entity_id,
                              report_variant=None)
        entity_data = page_fetched.export_all_data()
        entity_data = add_vendor_data(
            entity_data, id=generate_universal_id(**record_id_data))
        store = NormalStore(job_scope)
        store.store(entity_data)
    def test_key_s3_date_snapped_with_chunk_id(self):
        """
        Check that the key is constructed as we expect
        """

        job_scope = JobScope(
            ad_account_id=gen_string_id(),
            report_type=ReportType.day_platform,
            report_variant=Entity.Ad,
            range_start=date(2000, 1, 2),
        )

        chunk_marker = 7

        dt_should_be = datetime(2000, 1, 2, 0, 0, 0)
        with mock.patch.object(uuid, 'uuid4', return_value='UUID-HERE'):
            storage_key = cold_storage.store({'data': 'yeah!'}, job_scope, chunk_marker=7)

        prefix = xxhash.xxh64(job_scope.ad_account_id).hexdigest()[:6]
        expected_key = (
            f'fb/'
            + f'{prefix}-{job_scope.ad_account_id}/'
            + f'{job_scope.report_type}/'
            + f'{dt_should_be.strftime("%Y")}/'
            + f'{dt_should_be.strftime("%m")}/'
            + f'{dt_should_be.strftime("%d")}/'
            + f'{dt_should_be.strftime("%Y-%m-%dT%H:%M:%SZ")}-'
            + f'{job_scope.job_id}-'
            + f'{chunk_marker}-'
            + f'UUID-HERE'
            + f'.json'
        )

        assert storage_key == expected_key
    def test_key_s3_date_less(self):
        """
        Check that the key is constructed as we expect
        """
        import common.tztools

        job_scope = JobScope(
            ad_account_id=gen_string_id(), report_type=ReportType.entity, report_variant=Entity.Campaign
        )

        now_dt = datetime(2000, 1, 2, 3, 4, 5)
        with mock.patch.object(common.tztools, 'now', return_value=now_dt) as now_mocked, mock.patch.object(
            uuid, 'uuid4', return_value='UUID-HERE'
        ):

            storage_key = cold_storage.store({'data': 'yeah!'}, job_scope)

        assert now_mocked.called

        prefix = xxhash.xxh64(job_scope.ad_account_id).hexdigest()[:6]

        expected_key = (
            f'fb/'
            + f'{prefix}-{job_scope.ad_account_id}/'
            + f'{job_scope.report_type}/'
            + f'{now_dt.strftime("%Y")}/'
            + f'{now_dt.strftime("%m")}/'
            + f'{now_dt.strftime("%d")}/'
            + f'{now_dt.strftime("%Y-%m-%dT%H:%M:%SZ")}-'
            + f'{job_scope.job_id}-'
            + f'UUID-HERE'
            + f'.json'
        )

        assert storage_key == expected_key
Example #13
0
    def test_task_error_is_logged_into_job_report(self):
        from oozer.common.report_job_status_task import report_job_status_task

        class MyException(Exception):
            pass

        sync_expectations_job_scope = JobScope(
            sweep_id=random.gen_string_id(),
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.sync_expectations,
        )

        with mock.patch.object(report_job_status_task,
                               'delay') as job_report, mock.patch.object(
                                   sync_expectations_task,
                                   'sync_expectations',
                                   side_effect=MyException('nope!')):

            with self.assertRaises(MyException):
                sync_expectations_task.sync_expectations_task.delay(
                    sync_expectations_job_scope, None)

        assert job_report.called

        aa, kk = job_report.call_args

        assert not kk
        code, job_scope_actual = aa
        assert code < 0  # some sort of *Failure* code
        assert job_scope_actual == sync_expectations_job_scope
    def test_resolve_job_scope_to_celery_task_ad_account(
            self, mock_lifetime_iter, mock_breakdowns_iter):
        real_claim = RealityClaim(entity_id='A1',
                                  ad_account_id='A1',
                                  entity_type=Entity.AdAccount,
                                  timezone='America/Los_Angeles')
        mock_lifetime_iter.return_value = []
        mock_breakdowns_iter.return_value = [
            RealityClaim(
                entity_id='AD1',
                ad_account_id='A1',
                entity_type=Entity.Ad,
                range_start=datetime(2019, 1, 20, 12, 0),
                timezone='America/Los_Angeles',
            )
        ]
        for job_generator in entity_expectation_generator_map[
                Entity.AdAccount]:
            for exp_claim in job_generator(real_claim):
                with self.subTest(job_generator=job_generator,
                                  exp_claim=exp_claim):
                    job_scope = JobScope(parse_id(exp_claim.job_id))

                    assert inventory.resolve_job_scope_to_celery_task(
                        job_scope)
Example #15
0
    def test_task_does_not_blow_up(self):
        # this is almost same thing as the next test
        # where we check that call signature is right,
        # but when call signature changes and our tests don't,
        # it becomes irrelevant if we have tests - they check for wrong thing
        # So, here we actually call "store" and in next test
        # we intercept the call and check payload.
        # Don't remove me. Not duplicate.

        expectation_job_id = generate_id(
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.day_hour,
            report_variant=Entity.Ad,
            range_start='2000-01-01',
        )
        rr = [expectation_job_id]

        sync_expectations_job_scope = JobScope(
            sweep_id=random.gen_string_id(),
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.sync_expectations,
        )

        with mock.patch.object(expecations_store,
                               'iter_expectations_per_ad_account',
                               return_value=rr):
            sync_expectations_task.sync_expectations(
                sync_expectations_job_scope)
Example #16
0
def collect_organic_insights_task(job_scope: JobScope, _: JobContext):
    logger.info(f'{job_scope} started')

    if not job_scope.tokens:
        good_token = PlatformTokenManager.from_job_scope(
            job_scope).get_best_token()
        if good_token is not None:
            job_scope.tokens = [good_token]

    data_iter = InsightsOrganic.iter_collect_insights(job_scope)

    cnt = 0
    try:
        for _ in data_iter:
            cnt += 1
            if cnt % 100 == 0:
                logger.info(f'{job_scope} processed {cnt} data points so far')
    except Exception as e:
        # re-raising causes loss of original stack trace. printing it.
        ErrorInspector.inspect(e, job_scope.ad_account_id,
                               {'job_scope': job_scope})
        raise CollectionError(e, cnt)

    logger.info(f'{job_scope} complete a total of {cnt} data points')
    return cnt
Example #17
0
    def iter_tasks(
        self
    ) -> Generator[Tuple[CeleryTask, JobScope, JobContext, int], None, None]:
        """Read persisted jobs and pass-through context objects for inspection"""
        with self.queue.JobsReader() as jobs_iter:
            for job_id, job_scope_additional_data, score in jobs_iter:

                job_id_parts = parse_id(job_id)
                job_scope = JobScope(job_scope_additional_data,
                                     job_id_parts,
                                     sweep_id=self.sweep_id,
                                     score=score)

                try:
                    celery_task = resolve_job_scope_to_celery_task(job_scope)
                    # TODO: Decide what to do with this.
                    # Was designed for massive hash collection and such,
                    # but cannot have too much data in there because we pickle it and put in on Redis
                    job_context = JobContext()
                    yield celery_task, job_scope, job_context, score
                    logger.info(
                        f"#{self.sweep_id}: Scheduling job_id {job_id} with score {score}."
                    )
                except InvalidJobScopeException as e:
                    ErrorInspector.inspect(e, job_scope.ad_account_id, {
                        'sweep_id': job_scope.sweep_id,
                        'job_id': job_scope.job_id
                    })
def test__job_scope_to_metadata():
    scope = JobScope(
        job_id='job identifier',
        namespace='fb',
        ad_account_id='007',
        report_type='report type',
        entity_type=Entity.Campaign,
        report_variant=Entity.Ad,
        range_start=datetime.fromtimestamp(1),
        score=10,
    )

    result = _job_scope_to_metadata(scope)

    result.pop('extracted_at')
    result.pop('build_id')

    assert {
        'job_id': 'fb|007|C||report+type|A|1970-01-01T00%3A00%3A01',
        'ad_account_id': '007',
        'report_type': 'report type',
        'entity_type': 'A',
        'platform_api_version': 'v6.0',
        'platform': 'fb',
        'score': '10',
    } == result
def collect_pages_from_business(job_scope: JobScope,
                                _job_context: JobContext) -> int:
    """
    Collect all facebook pages that are active
    """
    if job_scope.report_variant != Entity.Page:
        raise ValueError(
            f"Report level {job_scope.report_variant} specified is not: {Entity.Page}"
        )

    token = job_scope.token
    if not token:
        raise ValueError(
            f"Job {job_scope.job_id} cannot proceed. No platform tokens provided."
        )

    # We don't use it for getting a token. Something else that calls us does.
    # However, we use it to report usages of the token we got.
    token_manager = PlatformTokenManager.from_job_scope(job_scope)

    with PlatformApiContext(token) as fb_ctx:
        fb_req = FacebookRequest(node_id="me",
                                 method="GET",
                                 endpoint="/businesses",
                                 api=fb_ctx.api,
                                 api_type='EDGE',
                                 target_class=Business)
        businesses = fb_req.execute()

    report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched,
                                 job_scope)
    token_manager.report_usage(token)

    entity_type = Entity.Page

    record_id_base_data = job_scope.to_dict()
    record_id_base_data.update(entity_type=entity_type, report_variant=None)

    cnt = 0
    for biz in businesses:
        client_pages = list(
            biz.get_client_pages(fields=get_default_fields(Page)))
        owned_pages = list(
            biz.get_owned_pages(fields=get_default_fields(Page)))
        pages_list = client_pages + owned_pages

        for page_inst in pages_list:

            entity_data = page_inst.export_all_data()
            record_id_base_data.update(entity_id=entity_data.get('id'))
            entity_data = add_vendor_data(
                entity_data, id=generate_universal_id(**record_id_base_data))

            store = NormalStore(job_scope)
            store.store(entity_data)
            cnt += 1

    report_job_status_task.delay(ExternalPlatformJobStatus.Done, job_scope)
    return cnt
Example #20
0
    def test_task_is_called_with_right_data(self):

        range_start = now()
        range_start_should_be = range_start.strftime('%Y-%m-%d')

        expected_job_id = generate_id(
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.day_hour,
            report_variant=Entity.Ad,
            range_start=range_start,
        )
        rr = [expected_job_id]
        expected_job_id_parts = parse_id_parts(expected_job_id)

        sync_expectations_job_scope = JobScope(
            sweep_id=random.gen_string_id(),
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.sync_expectations,
        )

        with mock.patch.object(expecations_store,
                               'iter_expectations_per_ad_account',
                               return_value=rr) as jid_iter, mock.patch.object(
                                   cold_storage.ChunkDumpStore,
                                   'store') as store:

            sync_expectations_task.sync_expectations(
                sync_expectations_job_scope)

        assert jid_iter.called
        aa, kk = jid_iter.call_args
        assert not kk
        assert aa == (sync_expectations_job_scope.ad_account_id,
                      sync_expectations_job_scope.sweep_id)

        assert store.called
        aa, kk = store.call_args
        assert not kk
        assert len(aa) == 1

        data = aa[0]

        assert data == {
            'job_id': expected_job_id,
            # missing "ad_" is intentional.
            # this matches this attr name as sent by FB
            # and ysed by us elsewhere in the company
            'account_id': expected_job_id_parts.ad_account_id,
            'entity_type': expected_job_id_parts.entity_type,
            'entity_id': expected_job_id_parts.entity_id,
            'report_type': expected_job_id_parts.report_type,
            'report_variant': expected_job_id_parts.report_variant,
            'range_start':
            range_start_should_be,  # checking manually to ensure it's properly stringified
            'range_end': None,
            'platform_namespace': JobScope.namespace,  # default platform value
        }
    def setUp(self):

        self.job_scope = JobScope(
            sweep_id=gen_string_id(),
            ad_account_id=gen_string_id(),
            entity_id=gen_string_id(),
            entity_type=Entity.Campaign,
            report_type=ReportType.entity,
        )
Example #22
0
def collect_adaccount(job_scope: JobScope) -> Dict[str, Any]:
    """
    Collects ad account data for a AA specific JobScope definition.
    :param JobScope job_scope: The JobScope as we get it from the task itself
    """
    if job_scope.report_variant != Entity.AdAccount:
        raise ValueError(
            f"Report level {job_scope.report_variant} specified is not: {Entity.AdAccount}"
        )

    token = job_scope.token
    if not token:
        raise ValueError(
            f"Job {job_scope.job_id} cannot proceed. No platform tokens provided."
        )

    assert (
        job_scope.ad_account_id == job_scope.entity_id
    ), f'This is an ad account entity job, account_id should be equal to entity_id'

    # Used to report token usage by this job
    token_manager = PlatformTokenManager.from_job_scope(job_scope)

    with PlatformApiContext(token) as fb_ctx:
        ad_account = fb_ctx.to_fb_model(job_scope.ad_account_id,
                                        Entity.AdAccount)

        fields = get_default_fields(ad_account.__class__)

        ad_account_with_selected_fields = ad_account.api_get(
            fields=fields)  # Read just the fields we need
        ad_account_data_dict = ad_account_with_selected_fields.export_all_data(
        )  # Export the object to a dict

        token_manager.report_usage(token)

        job_scope_base = {
            # Duplicate the job_scope data to avoid mutating it
            **job_scope.to_dict(),
            'entity_type': Entity.AdAccount,
            'report_variant': None,
        }

        augmented_ad_account_data = add_vendor_data(
            # Augment the data returned from the remote API with our vendor data
            ad_account_data_dict,
            id=generate_universal_id(**job_scope_base),
        )
        feedback_entity_task.delay(ad_account_data_dict,
                                   job_scope.report_variant)
        store = NormalStore(job_scope)
        store.store(augmented_ad_account_data)

        # TODO: feedback account? this probably wouldn't make sense at the moment
        # because ad accounts are discovered from console and their lifecycle is controlled from there.

        return ad_account_data_dict
Example #23
0
def collect_adaccount_task(job_scope: JobScope, _: JobContext):
    logger.info(f'{job_scope} started')

    if not job_scope.tokens:
        good_token = PlatformTokenManager.from_job_scope(
            job_scope).get_best_token()
        if good_token is not None:
            job_scope.tokens = [good_token]

    collect_adaccount(job_scope)
Example #24
0
    def test_runs_correctly(self):
        account_id = random.gen_string_id()
        job_scope = JobScope(
            ad_account_id=self.ad_account_id,
            entity_id=self.ad_account_id,
            tokens=['A_REAL_TOKEN'],
            report_time=datetime.utcnow(),
            report_type='entity',
            report_variant=Entity.AdAccount,
            sweep_id='1',
        )

        universal_id_should_be = generate_universal_id(
            ad_account_id=self.ad_account_id,
            report_type=ReportType.entity,
            entity_id=self.ad_account_id,
            entity_type=Entity.AdAccount,
        )

        account_data = AdAccount(fbid=account_id)
        # Did not find a better way how to set this data on the inner AbstractCrudObject.
        timezone = 'Europe/Prague'
        account_data._data['timezone_name'] = timezone
        account_data._data['account_id'] = account_id

        with mock.patch.object(FB_ADACCOUNT_MODEL,
                               'api_get',
                               return_value=account_data), mock.patch.object(
                                   NormalStore, 'store') as store:
            collect_adaccount(job_scope)

        assert store.called_with(
            account_data), 'Data should be stored with the cold store module'

        assert store.called
        store_args, store_keyword_args = store.call_args
        assert not store_keyword_args
        assert len(
            store_args
        ) == 1, 'Store method should be called with just 1 parameter'

        data_actual = store_args[0]

        vendor_data_key = '__oprm'

        ad_account_dynamo = AdAccountEntity.get(DEFAULT_SCOPE, account_id)
        assert ad_account_dynamo.timezone == timezone
        assert ad_account_dynamo.ad_account_id == account_id

        assert (vendor_data_key in data_actual
                and type(data_actual[vendor_data_key])
                == dict), 'Special vendor key is present in the returned data'
        assert data_actual[vendor_data_key] == {
            'id': universal_id_should_be
        }, 'Vendor data is set with the right universal id'
    def test_resolve_job_scope_to_celery_task_page_post(self):
        real_claim = RealityClaim(entity_id='PP1',
                                  ad_account_id='P1',
                                  entity_type=Entity.PagePost)
        for job_generator in entity_expectation_generator_map[Entity.PagePost]:
            with self.subTest(job_generator=job_generator):
                exp_claim = next(job_generator(real_claim))

                job_scope = JobScope(parse_id(exp_claim.job_id))

                assert inventory.resolve_job_scope_to_celery_task(job_scope)
def iter_collect_entities_per_page(
        job_scope: JobScope) -> Generator[Dict[str, Any], None, None]:
    """
    Collects an arbitrary entity for a page
    """
    token, entity_type, root_fb_entity = _extract_token_entity_type_parent_entity(
        job_scope, [Entity.PagePost, Entity.PageVideo], Entity.Page,
        'ad_account_id')

    entities = iter_native_entities_per_page(root_fb_entity, entity_type)

    record_id_base_data = job_scope.to_dict()
    record_id_base_data.update(entity_type=entity_type, report_variant=None)

    token_manager = PlatformTokenManager.from_job_scope(job_scope)
    with ChunkDumpStore(
            job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store, ChunkDumpStore(
                job_scope,
                chunk_size=DEFAULT_CHUNK_SIZE,
                bucket_type=ColdStoreBucketType.RAW_BUCKET,
                custom_namespace=NAMESPACE_RAW,
            ) as raw_store:
        cnt = 0
        for entity in entities:
            entity_data = entity.export_all_data()

            entity_data = add_vendor_data(entity_data,
                                          id=generate_universal_id(
                                              entity_id=entity_data.get('id'),
                                              **record_id_base_data))
            entity_data['page_id'] = job_scope.ad_account_id

            if entity_type == Entity.PagePost:
                # store raw version of response (just to remain consistent)
                raw_store(entity_data)
                entity_data = _augment_page_post(entity_data)

            # Store the individual datum, use job context for the cold
            # storage thing to divine whatever it needs from the job context
            store(entity_data)

            # Signal to the system the new entity
            feedback_entity_task.delay(entity_data, entity_type)

            yield entity_data
            cnt += 1

            if cnt % 1000 == 0:
                # default paging size for entities per parent
                # is typically around 200. So, each 200 results
                # means about 5 hits to FB
                token_manager.report_usage(token, 5)

    token_manager.report_usage(token)
def collect_page_task(job_scope: JobScope, job_context: JobContext) -> int:
    logger.info(f'{job_scope} started')

    if not job_scope.tokens:
        good_token = PlatformTokenManager.from_job_scope(
            job_scope).get_best_token()
        if good_token is not None:
            job_scope.tokens = [good_token]

    collect_page(job_scope, job_context)
    return 1  # we collect 1 page at a time
    def test_from_job_scope(self):

        key_gen = '{asset_scope}-{sweep_id}-sorted-token-queue'.format

        sweep_id = gen_string_id()
        entity_id = gen_string_id()
        scope_id = gen_string_id()

        # Scope-centered jobs must result in scope-centered key for token storage
        job_scope = JobScope(sweep_id=sweep_id,
                             entity_type=Entity.Scope,
                             entity_id=scope_id)
        token_manager = PlatformTokenManager.from_job_scope(job_scope)
        assert token_manager.queue_key == key_gen(asset_scope=scope_id,
                                                  sweep_id=sweep_id)

        # non-Scope-centered jobs must result in 'fb'-centered key for token storage
        job_scope = JobScope(sweep_id=sweep_id)
        token_manager = PlatformTokenManager.from_job_scope(job_scope)
        assert token_manager.queue_key == key_gen(
            asset_scope=JobScope.namespace, sweep_id=sweep_id)
    def store(self, datum):
        from oozer.common.report_job_status_task import report_job_status_task

        entity_id = datum.get(self.id_attribute_name) or datum.get('id')
        assert entity_id, "This code must have an entity ID for building of unique insertion ID"
        normative_job_scope = JobScope(self.job_scope_base_data, entity_id=entity_id)
        # and store data under that per-entity, normative JobScope.
        self._store(datum, normative_job_scope, DEFAULT_CHUNK_NUMBER, self.bucket_type)
        # since we report for many entities in this code,
        # must also communicate out the status inside of the for-loop
        # at the normative level.
        report_job_status_task.delay(JobStatus.Done, normative_job_scope)
Example #30
0
    def test_task_complains_about_bad_report_type(self):

        sync_expectations_job_scope = JobScope(
            sweep_id=random.gen_string_id(),
            ad_account_id=random.gen_string_id(),
            report_type=ReportType.lifetime,  # <----------- this is wrong
        )

        with self.assertRaises(AssertionError) as ex_catcher:
            sync_expectations_task.sync_expectations(
                sync_expectations_job_scope)

        assert 'Only sync_expectations report' in str(ex_catcher.exception)