def test_populate_from_scope_record(self): scope_id = gen_string_id() sweep_id = gen_string_id() console_token = 'console token' platform_token = 'platform token' scope_record = AssetScope() scope_record.scope = scope_id scope_record.scope_api_token = console_token scope_record.set_cache(platform_tokens={platform_token}) PlatformTokenManager.populate_from_scope_entity(scope_record, sweep_id) # now let's make sure we see those tokens: # Scope-centered jobs must result in scope-centered key for token storage job_scope = JobScope(sweep_id=sweep_id, entity_type=Entity.Scope, entity_id=scope_id) assert console_token == PlatformTokenManager.from_job_scope( job_scope).get_best_token() job_scope = JobScope( sweep_id=sweep_id, # uses .namespace default value as 2nd value in redis key. no need to set here. ) assert platform_token == PlatformTokenManager.from_job_scope( job_scope).get_best_token()
def _report_failure(job_scope: JobScope, start_time: float, exc: Exception, **kwargs: Any): """Report task stats when task fails.""" end_time = time.time() job_scope.running_time = math.ceil(end_time - start_time) job_scope.datapoint_count = kwargs.get('partial_datapoint_count') ErrorInspector.inspect(exc, job_scope.ad_account_id, {'job_scope': job_scope}) if isinstance(exc, FacebookRequestError): failure_status, failure_bucket = FacebookApiErrorInspector( exc).get_status_and_bucket() else: failure_status, failure_bucket = ExternalPlatformJobStatus.GenericError, FailureBucket.Other # No entity type means we don't know what table to target if failure_bucket == FailureBucket.InaccessibleObject and job_scope.entity_type is not None: set_inaccessible_entity_task.delay(job_scope) report_job_status_task.delay(failure_status, job_scope) PlatformTokenManager.from_job_scope( job_scope).report_usage_per_failure_bucket(job_scope.token, failure_bucket) SweepStatusTracker(job_scope.sweep_id).report_status(failure_bucket) _send_measurement_task_runtime(job_scope, failure_bucket)
def _report_success(job_scope: JobScope, start_time: float, ret_value: Any): """Report task stats when successful.""" end_time = time.time() job_scope.running_time = math.ceil(end_time - start_time) if isinstance(ret_value, int): job_scope.datapoint_count = ret_value report_job_status_task.delay(ExternalPlatformJobStatus.Done, job_scope) SweepStatusTracker(job_scope.sweep_id).report_status(FailureBucket.Success) _send_measurement_task_runtime(job_scope, FailureBucket.Success)
def sync_expectations(job_scope: JobScope): assert (job_scope.report_type == ReportType.sync_expectations ), 'Only sync_expectations report type is processed by this task' if job_scope.ad_account_id: # this is per AA task. No need to iterate over all ad_account_ids_iter = [job_scope.ad_account_id] else: ad_account_ids_iter = expecations_store.iter_expectations_ad_accounts( sweep_id=job_scope.sweep_id) for ad_account_id in ad_account_ids_iter: ad_account_scoped_job_scope = JobScope(job_scope.to_dict(), ad_account_id=ad_account_id, entity_type=Entity.AdAccount, entity_id=ad_account_id) with ChunkDumpStore(ad_account_scoped_job_scope, chunk_size=200) as store: job_ids_iter = expecations_store.iter_expectations_per_ad_account( ad_account_id, ad_account_scoped_job_scope.sweep_id) for job_id in job_ids_iter: job_id_parts = parse_id_parts(job_id) # default is platform namespace and we communicate out only those if job_id_parts.namespace == JobScope.namespace: store({ 'job_id': job_id, # 'status':'expected', 'account_id': job_id_parts.ad_account_id, 'entity_type': job_id_parts.entity_type, 'entity_id': job_id_parts.entity_id, 'report_type': job_id_parts.report_type, 'report_variant': job_id_parts.report_variant, 'range_start': _to_date_string_if_set(job_id_parts.range_start), 'range_end': _to_date_string_if_set(job_id_parts.range_end), 'platform_namespace': job_id_parts.namespace, })
def iter_collect_entities_per_page_post( job_scope: JobScope) -> Generator[Dict[str, Any], None, None]: """ Collects an arbitrary entity for a page post """ entity_type = job_scope.report_variant page_token_manager = PageTokenManager.from_job_scope(job_scope) with PlatformApiContext( page_token_manager.get_best_token( job_scope.ad_account_id)) as fb_ctx: root_fb_entity = fb_ctx.to_fb_model(job_scope.entity_id, Entity.PagePost) entities = iter_native_entities_per_page_post(root_fb_entity, entity_type) record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) del record_id_base_data['entity_id'] with ChunkDumpStore(job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store: for entity in entities: entity_data = entity.export_all_data() entity_data = add_vendor_data(entity_data, id=generate_universal_id( entity_id=entity_data.get('id'), **record_id_base_data)) entity_data['page_id'] = job_scope.ad_account_id entity_data['page_post_id'] = job_scope.entity_id # Store the individual datum, use job context for the cold # storage thing to divine whatever it needs from the job context store(entity_data) yield entity_data
def test_set_inaccessible_entity_task(): mock_model = Mock() mock_factory = Mock(return_value=mock_model) with patch.dict('oozer.set_inaccessible_entity_task.ENTITY_TYPE_MODEL_MAP', {Entity.PagePost: mock_factory}): set_inaccessible_entity_task(JobScope(report_variant=Entity.PagePost)) mock_model.update.assert_called_once_with(actions=[mock_factory.is_accessible.set(False)])
def test_fails_without_a_token(self): job_scope = JobScope(tokens=[None], report_time=datetime.utcnow(), report_type='entity', report_variant=Entity.Page, sweep_id='1') with SweepRunningFlag(job_scope.sweep_id), mock.patch.object( report_job_status_task, 'delay') as status_task, mock.patch( 'common.error_inspector.BugSnagContextData.notify' ) as bugsnag_notify, mock.patch( 'common.error_inspector.API_KEY', 'something'): collect_pages_from_business_task(job_scope, JobContext()) assert bugsnag_notify.called actual_args, actual_kwargs = bugsnag_notify.call_args assert ( str(actual_args[0]) == 'Job fb||||entity|P cannot proceed. No platform tokens provided.' ), 'Notify bugsnag correctly using correct Exception' assert { 'severity': SEVERITY_ERROR, 'job_scope': job_scope, 'error_type': ErrorTypesReport.UNKNOWN, } == actual_kwargs, 'Notify bugsnag correctly' assert status_task.called status_task_args, _ = status_task.call_args assert ( JobStatus.GenericError, job_scope ) == status_task_args, 'Must report status correctly on failure'
def test_fails_with_wrong_report_variant(self): job_scope = JobScope(tokens=['blah'], report_time=datetime.utcnow(), report_type='entity', report_variant=None, sweep_id='1') with SweepRunningFlag(job_scope.sweep_id), mock.patch.object( report_job_status_task, 'delay') as status_task, mock.patch( 'common.error_inspector.BugSnagContextData.notify' ) as bugsnag_notify, mock.patch( 'common.error_inspector.API_KEY', 'something'): collect_pages_from_business_task(job_scope, JobContext()) assert bugsnag_notify.called actual_args, actual_kwargs = bugsnag_notify.call_args assert (isinstance(actual_args[0], ValueError) and str( actual_args[0]) == 'Report level None specified is not: P' ), 'Notify bugsnag correctly using correct Exception' assert { 'severity': SEVERITY_ERROR, 'job_scope': job_scope, 'error_type': ErrorTypesReport.UNKNOWN, } == actual_kwargs, 'Notify bugsnag correctly' assert status_task.called parameters, _ = status_task.call_args assert (JobStatus.GenericError, job_scope ) == parameters, 'Must report status correctly on failure'
def test_lifetime_campaigns(self): job_scope = JobScope( ad_account_id=AD_ACCOUNT, report_type=ReportType.lifetime, report_variant=Entity.Campaign, sweep_id='sweep', tokens=[TOKEN], ) captured_data = [] # type: List[Tuple[Dict, JobScope, int]] def _store(data, job_scope, chunk_marker=0): captured_data.append((data, job_scope, chunk_marker)) with mock.patch.object(cold_storage, 'store', _store): data_iter = Insights.iter_collect_insights(job_scope, None) cnt = 0 for datum in data_iter: cnt += 1 if cnt == 4: break assert cnt == 4 for datum, job_scope_inner, _ in captured_data: assert datum['campaign_id'] == job_scope_inner.entity_id
def collect_page(job_scope: JobScope, _job_context: JobContext): """ Collect a single facebook page """ if job_scope.report_variant != Entity.Page: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.Page}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) # We don't use it for getting a token. Something else that calls us does. # However, we use it to report usages of the token we got. token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: page_inst = page.Page(fbid=job_scope.entity_id, api=fb_ctx.api) page_fetched = page_inst.api_get(fields=get_default_fields(Page)) report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched, job_scope) token_manager.report_usage(token, 2) record_id_data = job_scope.to_dict() record_id_data.update(entity_type=Entity.Page, entity_id=job_scope.entity_id, report_variant=None) entity_data = page_fetched.export_all_data() entity_data = add_vendor_data( entity_data, id=generate_universal_id(**record_id_data)) store = NormalStore(job_scope) store.store(entity_data)
def test_key_s3_date_snapped_with_chunk_id(self): """ Check that the key is constructed as we expect """ job_scope = JobScope( ad_account_id=gen_string_id(), report_type=ReportType.day_platform, report_variant=Entity.Ad, range_start=date(2000, 1, 2), ) chunk_marker = 7 dt_should_be = datetime(2000, 1, 2, 0, 0, 0) with mock.patch.object(uuid, 'uuid4', return_value='UUID-HERE'): storage_key = cold_storage.store({'data': 'yeah!'}, job_scope, chunk_marker=7) prefix = xxhash.xxh64(job_scope.ad_account_id).hexdigest()[:6] expected_key = ( f'fb/' + f'{prefix}-{job_scope.ad_account_id}/' + f'{job_scope.report_type}/' + f'{dt_should_be.strftime("%Y")}/' + f'{dt_should_be.strftime("%m")}/' + f'{dt_should_be.strftime("%d")}/' + f'{dt_should_be.strftime("%Y-%m-%dT%H:%M:%SZ")}-' + f'{job_scope.job_id}-' + f'{chunk_marker}-' + f'UUID-HERE' + f'.json' ) assert storage_key == expected_key
def test_key_s3_date_less(self): """ Check that the key is constructed as we expect """ import common.tztools job_scope = JobScope( ad_account_id=gen_string_id(), report_type=ReportType.entity, report_variant=Entity.Campaign ) now_dt = datetime(2000, 1, 2, 3, 4, 5) with mock.patch.object(common.tztools, 'now', return_value=now_dt) as now_mocked, mock.patch.object( uuid, 'uuid4', return_value='UUID-HERE' ): storage_key = cold_storage.store({'data': 'yeah!'}, job_scope) assert now_mocked.called prefix = xxhash.xxh64(job_scope.ad_account_id).hexdigest()[:6] expected_key = ( f'fb/' + f'{prefix}-{job_scope.ad_account_id}/' + f'{job_scope.report_type}/' + f'{now_dt.strftime("%Y")}/' + f'{now_dt.strftime("%m")}/' + f'{now_dt.strftime("%d")}/' + f'{now_dt.strftime("%Y-%m-%dT%H:%M:%SZ")}-' + f'{job_scope.job_id}-' + f'UUID-HERE' + f'.json' ) assert storage_key == expected_key
def test_task_error_is_logged_into_job_report(self): from oozer.common.report_job_status_task import report_job_status_task class MyException(Exception): pass sync_expectations_job_scope = JobScope( sweep_id=random.gen_string_id(), ad_account_id=random.gen_string_id(), report_type=ReportType.sync_expectations, ) with mock.patch.object(report_job_status_task, 'delay') as job_report, mock.patch.object( sync_expectations_task, 'sync_expectations', side_effect=MyException('nope!')): with self.assertRaises(MyException): sync_expectations_task.sync_expectations_task.delay( sync_expectations_job_scope, None) assert job_report.called aa, kk = job_report.call_args assert not kk code, job_scope_actual = aa assert code < 0 # some sort of *Failure* code assert job_scope_actual == sync_expectations_job_scope
def test_resolve_job_scope_to_celery_task_ad_account( self, mock_lifetime_iter, mock_breakdowns_iter): real_claim = RealityClaim(entity_id='A1', ad_account_id='A1', entity_type=Entity.AdAccount, timezone='America/Los_Angeles') mock_lifetime_iter.return_value = [] mock_breakdowns_iter.return_value = [ RealityClaim( entity_id='AD1', ad_account_id='A1', entity_type=Entity.Ad, range_start=datetime(2019, 1, 20, 12, 0), timezone='America/Los_Angeles', ) ] for job_generator in entity_expectation_generator_map[ Entity.AdAccount]: for exp_claim in job_generator(real_claim): with self.subTest(job_generator=job_generator, exp_claim=exp_claim): job_scope = JobScope(parse_id(exp_claim.job_id)) assert inventory.resolve_job_scope_to_celery_task( job_scope)
def test_task_does_not_blow_up(self): # this is almost same thing as the next test # where we check that call signature is right, # but when call signature changes and our tests don't, # it becomes irrelevant if we have tests - they check for wrong thing # So, here we actually call "store" and in next test # we intercept the call and check payload. # Don't remove me. Not duplicate. expectation_job_id = generate_id( ad_account_id=random.gen_string_id(), report_type=ReportType.day_hour, report_variant=Entity.Ad, range_start='2000-01-01', ) rr = [expectation_job_id] sync_expectations_job_scope = JobScope( sweep_id=random.gen_string_id(), ad_account_id=random.gen_string_id(), report_type=ReportType.sync_expectations, ) with mock.patch.object(expecations_store, 'iter_expectations_per_ad_account', return_value=rr): sync_expectations_task.sync_expectations( sync_expectations_job_scope)
def collect_organic_insights_task(job_scope: JobScope, _: JobContext): logger.info(f'{job_scope} started') if not job_scope.tokens: good_token = PlatformTokenManager.from_job_scope( job_scope).get_best_token() if good_token is not None: job_scope.tokens = [good_token] data_iter = InsightsOrganic.iter_collect_insights(job_scope) cnt = 0 try: for _ in data_iter: cnt += 1 if cnt % 100 == 0: logger.info(f'{job_scope} processed {cnt} data points so far') except Exception as e: # re-raising causes loss of original stack trace. printing it. ErrorInspector.inspect(e, job_scope.ad_account_id, {'job_scope': job_scope}) raise CollectionError(e, cnt) logger.info(f'{job_scope} complete a total of {cnt} data points') return cnt
def iter_tasks( self ) -> Generator[Tuple[CeleryTask, JobScope, JobContext, int], None, None]: """Read persisted jobs and pass-through context objects for inspection""" with self.queue.JobsReader() as jobs_iter: for job_id, job_scope_additional_data, score in jobs_iter: job_id_parts = parse_id(job_id) job_scope = JobScope(job_scope_additional_data, job_id_parts, sweep_id=self.sweep_id, score=score) try: celery_task = resolve_job_scope_to_celery_task(job_scope) # TODO: Decide what to do with this. # Was designed for massive hash collection and such, # but cannot have too much data in there because we pickle it and put in on Redis job_context = JobContext() yield celery_task, job_scope, job_context, score logger.info( f"#{self.sweep_id}: Scheduling job_id {job_id} with score {score}." ) except InvalidJobScopeException as e: ErrorInspector.inspect(e, job_scope.ad_account_id, { 'sweep_id': job_scope.sweep_id, 'job_id': job_scope.job_id })
def test__job_scope_to_metadata(): scope = JobScope( job_id='job identifier', namespace='fb', ad_account_id='007', report_type='report type', entity_type=Entity.Campaign, report_variant=Entity.Ad, range_start=datetime.fromtimestamp(1), score=10, ) result = _job_scope_to_metadata(scope) result.pop('extracted_at') result.pop('build_id') assert { 'job_id': 'fb|007|C||report+type|A|1970-01-01T00%3A00%3A01', 'ad_account_id': '007', 'report_type': 'report type', 'entity_type': 'A', 'platform_api_version': 'v6.0', 'platform': 'fb', 'score': '10', } == result
def collect_pages_from_business(job_scope: JobScope, _job_context: JobContext) -> int: """ Collect all facebook pages that are active """ if job_scope.report_variant != Entity.Page: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.Page}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) # We don't use it for getting a token. Something else that calls us does. # However, we use it to report usages of the token we got. token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: fb_req = FacebookRequest(node_id="me", method="GET", endpoint="/businesses", api=fb_ctx.api, api_type='EDGE', target_class=Business) businesses = fb_req.execute() report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched, job_scope) token_manager.report_usage(token) entity_type = Entity.Page record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) cnt = 0 for biz in businesses: client_pages = list( biz.get_client_pages(fields=get_default_fields(Page))) owned_pages = list( biz.get_owned_pages(fields=get_default_fields(Page))) pages_list = client_pages + owned_pages for page_inst in pages_list: entity_data = page_inst.export_all_data() record_id_base_data.update(entity_id=entity_data.get('id')) entity_data = add_vendor_data( entity_data, id=generate_universal_id(**record_id_base_data)) store = NormalStore(job_scope) store.store(entity_data) cnt += 1 report_job_status_task.delay(ExternalPlatformJobStatus.Done, job_scope) return cnt
def test_task_is_called_with_right_data(self): range_start = now() range_start_should_be = range_start.strftime('%Y-%m-%d') expected_job_id = generate_id( ad_account_id=random.gen_string_id(), report_type=ReportType.day_hour, report_variant=Entity.Ad, range_start=range_start, ) rr = [expected_job_id] expected_job_id_parts = parse_id_parts(expected_job_id) sync_expectations_job_scope = JobScope( sweep_id=random.gen_string_id(), ad_account_id=random.gen_string_id(), report_type=ReportType.sync_expectations, ) with mock.patch.object(expecations_store, 'iter_expectations_per_ad_account', return_value=rr) as jid_iter, mock.patch.object( cold_storage.ChunkDumpStore, 'store') as store: sync_expectations_task.sync_expectations( sync_expectations_job_scope) assert jid_iter.called aa, kk = jid_iter.call_args assert not kk assert aa == (sync_expectations_job_scope.ad_account_id, sync_expectations_job_scope.sweep_id) assert store.called aa, kk = store.call_args assert not kk assert len(aa) == 1 data = aa[0] assert data == { 'job_id': expected_job_id, # missing "ad_" is intentional. # this matches this attr name as sent by FB # and ysed by us elsewhere in the company 'account_id': expected_job_id_parts.ad_account_id, 'entity_type': expected_job_id_parts.entity_type, 'entity_id': expected_job_id_parts.entity_id, 'report_type': expected_job_id_parts.report_type, 'report_variant': expected_job_id_parts.report_variant, 'range_start': range_start_should_be, # checking manually to ensure it's properly stringified 'range_end': None, 'platform_namespace': JobScope.namespace, # default platform value }
def setUp(self): self.job_scope = JobScope( sweep_id=gen_string_id(), ad_account_id=gen_string_id(), entity_id=gen_string_id(), entity_type=Entity.Campaign, report_type=ReportType.entity, )
def collect_adaccount(job_scope: JobScope) -> Dict[str, Any]: """ Collects ad account data for a AA specific JobScope definition. :param JobScope job_scope: The JobScope as we get it from the task itself """ if job_scope.report_variant != Entity.AdAccount: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.AdAccount}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) assert ( job_scope.ad_account_id == job_scope.entity_id ), f'This is an ad account entity job, account_id should be equal to entity_id' # Used to report token usage by this job token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: ad_account = fb_ctx.to_fb_model(job_scope.ad_account_id, Entity.AdAccount) fields = get_default_fields(ad_account.__class__) ad_account_with_selected_fields = ad_account.api_get( fields=fields) # Read just the fields we need ad_account_data_dict = ad_account_with_selected_fields.export_all_data( ) # Export the object to a dict token_manager.report_usage(token) job_scope_base = { # Duplicate the job_scope data to avoid mutating it **job_scope.to_dict(), 'entity_type': Entity.AdAccount, 'report_variant': None, } augmented_ad_account_data = add_vendor_data( # Augment the data returned from the remote API with our vendor data ad_account_data_dict, id=generate_universal_id(**job_scope_base), ) feedback_entity_task.delay(ad_account_data_dict, job_scope.report_variant) store = NormalStore(job_scope) store.store(augmented_ad_account_data) # TODO: feedback account? this probably wouldn't make sense at the moment # because ad accounts are discovered from console and their lifecycle is controlled from there. return ad_account_data_dict
def collect_adaccount_task(job_scope: JobScope, _: JobContext): logger.info(f'{job_scope} started') if not job_scope.tokens: good_token = PlatformTokenManager.from_job_scope( job_scope).get_best_token() if good_token is not None: job_scope.tokens = [good_token] collect_adaccount(job_scope)
def test_runs_correctly(self): account_id = random.gen_string_id() job_scope = JobScope( ad_account_id=self.ad_account_id, entity_id=self.ad_account_id, tokens=['A_REAL_TOKEN'], report_time=datetime.utcnow(), report_type='entity', report_variant=Entity.AdAccount, sweep_id='1', ) universal_id_should_be = generate_universal_id( ad_account_id=self.ad_account_id, report_type=ReportType.entity, entity_id=self.ad_account_id, entity_type=Entity.AdAccount, ) account_data = AdAccount(fbid=account_id) # Did not find a better way how to set this data on the inner AbstractCrudObject. timezone = 'Europe/Prague' account_data._data['timezone_name'] = timezone account_data._data['account_id'] = account_id with mock.patch.object(FB_ADACCOUNT_MODEL, 'api_get', return_value=account_data), mock.patch.object( NormalStore, 'store') as store: collect_adaccount(job_scope) assert store.called_with( account_data), 'Data should be stored with the cold store module' assert store.called store_args, store_keyword_args = store.call_args assert not store_keyword_args assert len( store_args ) == 1, 'Store method should be called with just 1 parameter' data_actual = store_args[0] vendor_data_key = '__oprm' ad_account_dynamo = AdAccountEntity.get(DEFAULT_SCOPE, account_id) assert ad_account_dynamo.timezone == timezone assert ad_account_dynamo.ad_account_id == account_id assert (vendor_data_key in data_actual and type(data_actual[vendor_data_key]) == dict), 'Special vendor key is present in the returned data' assert data_actual[vendor_data_key] == { 'id': universal_id_should_be }, 'Vendor data is set with the right universal id'
def test_resolve_job_scope_to_celery_task_page_post(self): real_claim = RealityClaim(entity_id='PP1', ad_account_id='P1', entity_type=Entity.PagePost) for job_generator in entity_expectation_generator_map[Entity.PagePost]: with self.subTest(job_generator=job_generator): exp_claim = next(job_generator(real_claim)) job_scope = JobScope(parse_id(exp_claim.job_id)) assert inventory.resolve_job_scope_to_celery_task(job_scope)
def iter_collect_entities_per_page( job_scope: JobScope) -> Generator[Dict[str, Any], None, None]: """ Collects an arbitrary entity for a page """ token, entity_type, root_fb_entity = _extract_token_entity_type_parent_entity( job_scope, [Entity.PagePost, Entity.PageVideo], Entity.Page, 'ad_account_id') entities = iter_native_entities_per_page(root_fb_entity, entity_type) record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) token_manager = PlatformTokenManager.from_job_scope(job_scope) with ChunkDumpStore( job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store, ChunkDumpStore( job_scope, chunk_size=DEFAULT_CHUNK_SIZE, bucket_type=ColdStoreBucketType.RAW_BUCKET, custom_namespace=NAMESPACE_RAW, ) as raw_store: cnt = 0 for entity in entities: entity_data = entity.export_all_data() entity_data = add_vendor_data(entity_data, id=generate_universal_id( entity_id=entity_data.get('id'), **record_id_base_data)) entity_data['page_id'] = job_scope.ad_account_id if entity_type == Entity.PagePost: # store raw version of response (just to remain consistent) raw_store(entity_data) entity_data = _augment_page_post(entity_data) # Store the individual datum, use job context for the cold # storage thing to divine whatever it needs from the job context store(entity_data) # Signal to the system the new entity feedback_entity_task.delay(entity_data, entity_type) yield entity_data cnt += 1 if cnt % 1000 == 0: # default paging size for entities per parent # is typically around 200. So, each 200 results # means about 5 hits to FB token_manager.report_usage(token, 5) token_manager.report_usage(token)
def collect_page_task(job_scope: JobScope, job_context: JobContext) -> int: logger.info(f'{job_scope} started') if not job_scope.tokens: good_token = PlatformTokenManager.from_job_scope( job_scope).get_best_token() if good_token is not None: job_scope.tokens = [good_token] collect_page(job_scope, job_context) return 1 # we collect 1 page at a time
def test_from_job_scope(self): key_gen = '{asset_scope}-{sweep_id}-sorted-token-queue'.format sweep_id = gen_string_id() entity_id = gen_string_id() scope_id = gen_string_id() # Scope-centered jobs must result in scope-centered key for token storage job_scope = JobScope(sweep_id=sweep_id, entity_type=Entity.Scope, entity_id=scope_id) token_manager = PlatformTokenManager.from_job_scope(job_scope) assert token_manager.queue_key == key_gen(asset_scope=scope_id, sweep_id=sweep_id) # non-Scope-centered jobs must result in 'fb'-centered key for token storage job_scope = JobScope(sweep_id=sweep_id) token_manager = PlatformTokenManager.from_job_scope(job_scope) assert token_manager.queue_key == key_gen( asset_scope=JobScope.namespace, sweep_id=sweep_id)
def store(self, datum): from oozer.common.report_job_status_task import report_job_status_task entity_id = datum.get(self.id_attribute_name) or datum.get('id') assert entity_id, "This code must have an entity ID for building of unique insertion ID" normative_job_scope = JobScope(self.job_scope_base_data, entity_id=entity_id) # and store data under that per-entity, normative JobScope. self._store(datum, normative_job_scope, DEFAULT_CHUNK_NUMBER, self.bucket_type) # since we report for many entities in this code, # must also communicate out the status inside of the for-loop # at the normative level. report_job_status_task.delay(JobStatus.Done, normative_job_scope)
def test_task_complains_about_bad_report_type(self): sync_expectations_job_scope = JobScope( sweep_id=random.gen_string_id(), ad_account_id=random.gen_string_id(), report_type=ReportType.lifetime, # <----------- this is wrong ) with self.assertRaises(AssertionError) as ex_catcher: sync_expectations_task.sync_expectations( sync_expectations_job_scope) assert 'Only sync_expectations report' in str(ex_catcher.exception)