def iter_collect_insights(cls, job_scope: JobScope): """ Central, *GENERIC* implementation of insights fetcher task The goal of this method is to be the entry point for metrics fetching Celery tasks. This method is expected to parse the JobScope object, figure out that needs to be done based on data in the JobScope object and convert that data into proper parameters for calling FB :param job_scope: The JobScope as we get it from the task itself """ if not job_scope.tokens: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) token = job_scope.token token_manager = PlatformTokenManager.from_job_scope(job_scope) report_entity_kind = InsightsOrganic._detect_report_api_kind(job_scope) page_token_manager = PageTokenManager.from_job_scope(job_scope) if report_entity_kind == ReportEntityApiKind.Video: with PlatformApiContext(job_scope.token) as fb_ctx: report_root_fb_entity = fb_ctx.to_fb_model( job_scope.entity_id, job_scope.report_variant) data_iter = cls.iter_video_insights(report_root_fb_entity) elif report_entity_kind in { ReportEntityApiKind.Page, ReportEntityApiKind.Post }: with PlatformApiContext( page_token_manager.get_best_token( job_scope.ad_account_id)) as fb_ctx: report_root_fb_entity = fb_ctx.to_fb_model( job_scope.entity_id, job_scope.report_variant) data_iter = cls.iter_page_entities_lifetime_insights( report_root_fb_entity, report_entity_kind) else: raise ValueError( f'Unsupported report entity kind "{report_entity_kind}" to collect organic insights' ) for datum in cls._iter_collect_organic_insights(data_iter, job_scope): yield datum # right now, we support fetching insights for only one entity at a time # so no reason to report usage here token_manager.report_usage(token)
def iter_collect_entities_per_page_post( job_scope: JobScope) -> Generator[Dict[str, Any], None, None]: """ Collects an arbitrary entity for a page post """ entity_type = job_scope.report_variant page_token_manager = PageTokenManager.from_job_scope(job_scope) with PlatformApiContext( page_token_manager.get_best_token( job_scope.ad_account_id)) as fb_ctx: root_fb_entity = fb_ctx.to_fb_model(job_scope.entity_id, Entity.PagePost) entities = iter_native_entities_per_page_post(root_fb_entity, entity_type) record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) del record_id_base_data['entity_id'] with ChunkDumpStore(job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store: for entity in entities: entity_data = entity.export_all_data() entity_data = add_vendor_data(entity_data, id=generate_universal_id( entity_id=entity_data.get('id'), **record_id_base_data)) entity_data['page_id'] = job_scope.ad_account_id entity_data['page_post_id'] = job_scope.entity_id # Store the individual datum, use job context for the cold # storage thing to divine whatever it needs from the job context store(entity_data) yield entity_data
def collect_page(job_scope: JobScope, _job_context: JobContext): """ Collect a single facebook page """ if job_scope.report_variant != Entity.Page: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.Page}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) # We don't use it for getting a token. Something else that calls us does. # However, we use it to report usages of the token we got. token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: page_inst = page.Page(fbid=job_scope.entity_id, api=fb_ctx.api) page_fetched = page_inst.api_get(fields=get_default_fields(Page)) report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched, job_scope) token_manager.report_usage(token, 2) record_id_data = job_scope.to_dict() record_id_data.update(entity_type=Entity.Page, entity_id=job_scope.entity_id, report_variant=None) entity_data = page_fetched.export_all_data() entity_data = add_vendor_data( entity_data, id=generate_universal_id(**record_id_data)) store = NormalStore(job_scope) store.store(entity_data)
def collect_pages_from_business(job_scope: JobScope, _job_context: JobContext) -> int: """ Collect all facebook pages that are active """ if job_scope.report_variant != Entity.Page: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.Page}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) # We don't use it for getting a token. Something else that calls us does. # However, we use it to report usages of the token we got. token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: fb_req = FacebookRequest(node_id="me", method="GET", endpoint="/businesses", api=fb_ctx.api, api_type='EDGE', target_class=Business) businesses = fb_req.execute() report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched, job_scope) token_manager.report_usage(token) entity_type = Entity.Page record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) cnt = 0 for biz in businesses: client_pages = list( biz.get_client_pages(fields=get_default_fields(Page))) owned_pages = list( biz.get_owned_pages(fields=get_default_fields(Page))) pages_list = client_pages + owned_pages for page_inst in pages_list: entity_data = page_inst.export_all_data() record_id_base_data.update(entity_id=entity_data.get('id')) entity_data = add_vendor_data( entity_data, id=generate_universal_id(**record_id_base_data)) store = NormalStore(job_scope) store.store(entity_data) cnt += 1 report_job_status_task.delay(ExternalPlatformJobStatus.Done, job_scope) return cnt
def collect_adaccount(job_scope: JobScope) -> Dict[str, Any]: """ Collects ad account data for a AA specific JobScope definition. :param JobScope job_scope: The JobScope as we get it from the task itself """ if job_scope.report_variant != Entity.AdAccount: raise ValueError( f"Report level {job_scope.report_variant} specified is not: {Entity.AdAccount}" ) token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) assert ( job_scope.ad_account_id == job_scope.entity_id ), f'This is an ad account entity job, account_id should be equal to entity_id' # Used to report token usage by this job token_manager = PlatformTokenManager.from_job_scope(job_scope) with PlatformApiContext(token) as fb_ctx: ad_account = fb_ctx.to_fb_model(job_scope.ad_account_id, Entity.AdAccount) fields = get_default_fields(ad_account.__class__) ad_account_with_selected_fields = ad_account.api_get( fields=fields) # Read just the fields we need ad_account_data_dict = ad_account_with_selected_fields.export_all_data( ) # Export the object to a dict token_manager.report_usage(token) job_scope_base = { # Duplicate the job_scope data to avoid mutating it **job_scope.to_dict(), 'entity_type': Entity.AdAccount, 'report_variant': None, } augmented_ad_account_data = add_vendor_data( # Augment the data returned from the remote API with our vendor data ad_account_data_dict, id=generate_universal_id(**job_scope_base), ) feedback_entity_task.delay(ad_account_data_dict, job_scope.report_variant) store = NormalStore(job_scope) store.store(augmented_ad_account_data) # TODO: feedback account? this probably wouldn't make sense at the moment # because ad accounts are discovered from console and their lifecycle is controlled from there. return ad_account_data_dict
def test_fetch_all_page_posts(self): with PlatformApiContext(TOKEN) as ctx: page = ctx.to_fb_model(PAGE, Entity.Page) entities = iter_native_entities_per_page(page, Entity.PagePost) cnt = 0 for _ in entities: cnt += 1 break assert cnt
def test_fetch_all_custom_audiences(self): with PlatformApiContext(TOKEN) as ctx: ad_account = ctx.to_fb_model(AD_ACCOUNT, Entity.AdAccount) entities = iter_native_entities_per_adaccount(ad_account, Entity.CustomAudience) cnt = 0 for _ in entities: cnt += 1 break assert cnt
def test_fetch_all_ad_videos(self): with PlatformApiContext(TOKEN) as ctx: ad_account = ctx.to_fb_model(AD_ACCOUNT, Entity.AdAccount) entities = iter_native_entities_per_adaccount(ad_account, Entity.AdVideo) cnt = 0 for entity in entities: assert entity['account_id'] == AD_ACCOUNT # This tests if we're augmenting correctly cnt += 1 break assert cnt
def test_fetch_insights_adaccount_ad_lifetime(self): with PlatformApiContext(TOKEN) as context: entity = context.to_fb_model(AD_ACCOUNT, Entity.AdAccount) metrics = Insights.iter_ads_insights(entity, { 'date_preset': 'lifetime', 'level': 'ad' }) datum = None for datum in metrics: break assert datum
def page_remote_view(cts, scope, id=None, token=None): from oozer.common.facebook_api import PlatformApiContext, get_default_fields from facebook_business.adobjects.user import User if not token: scope = AssetScope.get(scope) token = PlatformToken.get(list(scope.platform_token_ids)[0]) with PlatformApiContext(token.token) as fb_ctx: pages = User(fbid='me', api=fb_ctx.api).get_accounts() for page in pages: print(page)
def iter_collect_entities_per_page_graph( job_scope: JobScope) -> Generator[Dict[str, Any], None, None]: """ Collects an arbitrary entity for a page using graph API """ page_token_manager = PageTokenManager.from_job_scope(job_scope) with PlatformApiContext( page_token_manager.get_best_token( job_scope.ad_account_id)) as fb_ctx: page_root_fb_entity = fb_ctx.to_fb_model(job_scope.ad_account_id, Entity.Page) entity_type = job_scope.report_variant # page size reduced to avoid error: # "Please reduce the amount of data you're asking for, then retry your request" entities = iter_native_entities_per_page_graph(page_root_fb_entity, entity_type, page_size=30) record_id_base_data = job_scope.to_dict() record_id_base_data.update(entity_type=entity_type, report_variant=None) with ChunkDumpStore( job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store, ChunkDumpStore( job_scope, chunk_size=DEFAULT_CHUNK_SIZE, bucket_type=ColdStoreBucketType.RAW_BUCKET, custom_namespace=NAMESPACE_RAW, ) as raw_store: for entity in entities: entity_data = entity.export_all_data() entity_data = add_vendor_data(entity_data, id=generate_universal_id( entity_id=entity_data.get('id'), **record_id_base_data)) entity_data['page_id'] = job_scope.ad_account_id if entity_type == Entity.PagePostPromotable: # store raw version of response (just to remain consistent) raw_store(entity_data) entity_data = _augment_page_post(entity_data) # Store the individual datum, use job context for the cold # storage thing to divine whatever it needs from the job context store(entity_data) # Signal to the system the new entity feedback_entity_task.delay(entity_data, entity_type) yield entity_data
def ad_account_remote_view(cts, scope, id, token=None): from oozer.common.facebook_api import PlatformApiContext, get_default_fields from common.enums.entity import Entity if not token: scope = AssetScope.get(scope) token = PlatformToken.get(list(scope.platform_token_ids)[0]) with PlatformApiContext(token.token) as fb_ctx: ad_account = fb_ctx.to_fb_model(id, Entity.AdAccount) fields = get_default_fields(ad_account.__class__) ad_account_with_selected_fields = ad_account.api_get( fields=['id', 'name']) # Read just the fields we need ad_account_data_dict = ad_account_with_selected_fields.export_all_data( ) # Export the object to a dict print(ad_account_data_dict)
def populate_from_scope_entity(cls, scope_entity: AssetScope, sweep_id: str): asset_scope = JobScope.namespace tokens = list(scope_entity.platform_tokens) try: manager = PageTokenManager(asset_scope, sweep_id) with PlatformApiContext(tokens[0]) as fb_ctx: request = FacebookRequest(node_id='me', method='GET', endpoint='/accounts', api=fb_ctx.api, api_type='NODE') request.add_params({'limit': DEFAULT_PAGE_ACCESS_TOKEN_LIMIT}) cnt = 0 while True: # I assume that there's a better way to do paginate over this, # but I wasn't able to find the corresponding target class in SDK :/ response = request.execute() response_json = response.json() for page in response_json['data']: manager.add(page['id'], page['access_token']) cnt += 1 if 'next' in response_json['paging']: request._path = response_json['paging']['next'] else: break logger.warning( f'Loaded {cnt} page tokens for scope "{scope_entity.scope}"' ) except Exception as ex: print(ex) logger.warning( 'Fetching page tokens has failed so organic data jobs will not work in this sweep' )
def _extract_token_entity_type_parent_entity( job_scope: JobScope, allowed_entity_types: List[str], parent_entity_type: str, parent_entity_id_key: str) -> Tuple[str, str, Any]: """ Returned values here are token, entity_type and initialized parent entity from Facebook SDK """ if job_scope.report_variant not in allowed_entity_types: raise ValueError( f"Report level {job_scope.report_variant} specified is not one of supported values: {allowed_entity_types}" ) entity_type = job_scope.report_variant token = job_scope.token if not token: raise ValueError( f"Job {job_scope.job_id} cannot proceed. No platform tokens provided." ) with PlatformApiContext(token) as fb_ctx: root_fb_entity = fb_ctx.to_fb_model(job_scope[parent_entity_id_key], parent_entity_type) return token, entity_type, root_fb_entity
def __init__(self, job_scope: JobScope, report_entity_api_kind: str): if job_scope.report_type not in ReportType.ALL_METRICS: raise ValueError( f"Report type {job_scope.report_type} specified is not one of supported values: " + ReportType.ALL_METRICS) # cool. we are in the right place... self.report_params = { 'fields': DEFAULT_REPORT_FIELDS, 'action_attribution_windows': [ # https://developers.facebook.com/docs/marketing-api/reference/adgroup/insights/ # https://developers.facebook.com/docs/marketing-api/insights#sample # 'actions' and 'action_values' can contain values per different measurement window # In case of 'actions', default 'value' is always 1d_view PLUS 28d_click and cannot be removed. # In case of 'action_values', default 'value' is some weighted sum of # 1d_view AND 28d_click $ values, that may be smaller than raw 1d_view PLUS 28d_click $ values. # Many customers interpret their conversions / actions in different attribution windows. # The more windows we ask the data for, the less reliably it returns reports. # Be super conservative about asking for more / all. AdsInsights.ActionAttributionWindows.value_1d_view, AdsInsights.ActionAttributionWindows.value_7d_view, AdsInsights.ActionAttributionWindows.value_28d_view, AdsInsights.ActionAttributionWindows.value_1d_click, AdsInsights.ActionAttributionWindows.value_7d_click, AdsInsights.ActionAttributionWindows.value_28d_click, ], } # Next is (a) vs (b) - abstraction level determination is_per_parent_report = not job_scope.entity_id and job_scope.report_variant in Entity.ALL if is_per_parent_report: entity_id = job_scope.ad_account_id entity_type = Entity.AdAccount entity_type_reporting = job_scope.report_variant if report_entity_api_kind == ReportEntityApiKind.Ad: self.report_params.update( level=ENUM_LEVEL_MAP[job_scope.report_variant]) else: # direct, per-entity report entity_id = job_scope.entity_id entity_type = job_scope.entity_type entity_type_reporting = job_scope.report_variant if report_entity_api_kind == ReportEntityApiKind.Ad: self.report_params.update( level=ENUM_LEVEL_MAP[entity_type_reporting]) # Now, (c), (d), (e), (f), (g) choices # we already checked above that this is one of metrics report types # So we know it will be either lifetime or day-with-breakdown type # TODO: add fields listings appropriate for each type if job_scope.report_type == ReportType.lifetime: self.report_params.update( date_preset=AdsInsights.DatePreset.lifetime) elif job_scope.report_type in REPORT_TYPE_FB_BREAKDOWN_ENUM: # some day-with-breakdown type self.report_params.update( time_increment=1, # group by calendar day (in AA tz) time_range={ 'since': _convert_and_validate_date_format(job_scope.range_start), # No value for job_scope.range_end means 1-day report for range_start day 'until': _convert_and_validate_date_format( job_scope.range_end or job_scope.range_start), }, breakdowns=REPORT_TYPE_FB_BREAKDOWN_ENUM[ job_scope.report_type], ) else: raise ValueError( f"Report type {job_scope.report_type} does not have a mapped Platform-side breakdown value." ) # Indicates that datum returned in a per-parent report is by itself # naturally mapped to some single normative job , # meaning each element can be stored separately # but only under normative ID computed on the fly # from the datum. # This must be accompanied by a transform fn that # derives a normative ID from data. # special case. # when report type is per-specific-single-entity-ID # AND one of per-day-with-breakdown # per-Entity-ID-per-day bundle with 24 records before saving it. # This results in a single write to the cold store under # single normative ID. is_whole_report_bundle_write = ( # must be one of those per-day reports job_scope.report_type in ReportType.ALL_DAY_BREAKDOWNS and # except for DMA-based data, as these can be very long, # - 10s of thousands of records per day job_scope.report_type not in [ ReportType.day_dma, ReportType.day_region, ReportType.day_country ] and # and the report is per single entity ID job_scope.entity_id and not job_scope.report_variant and # and report is for a single calendar day # ReportType.ALL_DAY_BREAKDOWNS means there must be a non-Null # value in time_range, but we check anyway self.report_params['time_range']['since'] and self.report_params['time_range']['since'] == self.report_params['time_range']['until']) # a more complex variant of whole_report_bundle_write # where, while we canNOT spool entire report into memory to # write it as one bundle, we cannot really write each # individual result out either, as there will be a shit-load of them # and we have to write is some sort of batching mode, but # cannot cleanly group the bundles into per-normative-ID bundles, # and instead will write under effective ID, but with a suffix # indicating the monotonically-increasing chunk number. # Disabled but kept for reference to compare to shorter version immediately below # These represent good range of choices for cold store handlers. # When / if there is value to it, steal from this commented out code. # if is_naturally_normative_child: # self.datum_handler = batch_store.NaturallyNormativeChildStore(job_scope) # elif is_whole_report_bundle_write: # self.datum_handler = batch_store.MemorySpoolStore(job_scope) # elif is_chunk_write: # self.datum_handler = batch_store.ChunkDumpStore(job_scope) # else: # self.datum_handler = batch_store.NormalStore(job_scope) # let's be more aggressive about doing bundled writes to cold store # and (temporarily) get away from "normative" and single-datum writes # There are two ways we can get closer to bundled writes: # - spool entire report in memory and flush out at the end, when we know we can tolerate that # - spool large chunks of report in memory and flush them periodically if we fear large sizes in report. if is_whole_report_bundle_write: self.datum_handler = batch_store.MemorySpoolStore(job_scope) else: self.datum_handler = batch_store.ChunkDumpStore(job_scope, chunk_size=200) with PlatformApiContext(job_scope.token) as fb_ctx: self.report_root_fb_entity = fb_ctx.to_fb_model( entity_id, entity_type) # here we configure code that will augment each datum with record ID vendor_data_extractor = report_type_vendor_data_extractor_map[ job_scope.report_type] if job_scope.report_type == ReportType.day_hour: # hour report type's ID extractor function needs extra leading arg - timezone vendor_data_extractor = functools.partial( vendor_data_extractor, job_scope.ad_account_timezone_name) aux_data = { 'ad_account_id': job_scope.ad_account_id, 'entity_type': entity_type_reporting, 'report_type': job_scope.report_type, } self.augment_with_vendor_data = lambda data: add_vendor_data( data, **vendor_data_extractor(data, **aux_data))