コード例 #1
0
    def iter_collect_insights(cls, job_scope: JobScope):
        """
        Central, *GENERIC* implementation of insights fetcher task

        The goal of this method is to be the entry point for
        metrics fetching Celery tasks. This method is expected to parse
        the JobScope object, figure out that needs to be done
        based on data in the JobScope object and convert that data into
        proper parameters for calling FB

        :param job_scope: The JobScope as we get it from the task itself
        """
        if not job_scope.tokens:
            raise ValueError(
                f"Job {job_scope.job_id} cannot proceed. No platform tokens provided."
            )

        token = job_scope.token
        token_manager = PlatformTokenManager.from_job_scope(job_scope)
        report_entity_kind = InsightsOrganic._detect_report_api_kind(job_scope)
        page_token_manager = PageTokenManager.from_job_scope(job_scope)

        if report_entity_kind == ReportEntityApiKind.Video:
            with PlatformApiContext(job_scope.token) as fb_ctx:
                report_root_fb_entity = fb_ctx.to_fb_model(
                    job_scope.entity_id, job_scope.report_variant)

            data_iter = cls.iter_video_insights(report_root_fb_entity)

        elif report_entity_kind in {
                ReportEntityApiKind.Page, ReportEntityApiKind.Post
        }:
            with PlatformApiContext(
                    page_token_manager.get_best_token(
                        job_scope.ad_account_id)) as fb_ctx:
                report_root_fb_entity = fb_ctx.to_fb_model(
                    job_scope.entity_id, job_scope.report_variant)

            data_iter = cls.iter_page_entities_lifetime_insights(
                report_root_fb_entity, report_entity_kind)
        else:
            raise ValueError(
                f'Unsupported report entity kind "{report_entity_kind}" to collect organic insights'
            )

        for datum in cls._iter_collect_organic_insights(data_iter, job_scope):
            yield datum
        # right now, we support fetching insights for only one entity at a time
        # so no reason to report usage here
        token_manager.report_usage(token)
コード例 #2
0
def iter_collect_entities_per_page_post(
        job_scope: JobScope) -> Generator[Dict[str, Any], None, None]:
    """
    Collects an arbitrary entity for a page post
    """
    entity_type = job_scope.report_variant

    page_token_manager = PageTokenManager.from_job_scope(job_scope)
    with PlatformApiContext(
            page_token_manager.get_best_token(
                job_scope.ad_account_id)) as fb_ctx:
        root_fb_entity = fb_ctx.to_fb_model(job_scope.entity_id,
                                            Entity.PagePost)

    entities = iter_native_entities_per_page_post(root_fb_entity, entity_type)

    record_id_base_data = job_scope.to_dict()
    record_id_base_data.update(entity_type=entity_type, report_variant=None)
    del record_id_base_data['entity_id']

    with ChunkDumpStore(job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store:
        for entity in entities:
            entity_data = entity.export_all_data()
            entity_data = add_vendor_data(entity_data,
                                          id=generate_universal_id(
                                              entity_id=entity_data.get('id'),
                                              **record_id_base_data))
            entity_data['page_id'] = job_scope.ad_account_id
            entity_data['page_post_id'] = job_scope.entity_id

            # Store the individual datum, use job context for the cold
            # storage thing to divine whatever it needs from the job context
            store(entity_data)

            yield entity_data
コード例 #3
0
def collect_page(job_scope: JobScope, _job_context: JobContext):
    """
    Collect a single facebook page
    """
    if job_scope.report_variant != Entity.Page:
        raise ValueError(
            f"Report level {job_scope.report_variant} specified is not: {Entity.Page}"
        )

    token = job_scope.token
    if not token:
        raise ValueError(
            f"Job {job_scope.job_id} cannot proceed. No platform tokens provided."
        )

    # We don't use it for getting a token. Something else that calls us does.
    # However, we use it to report usages of the token we got.
    token_manager = PlatformTokenManager.from_job_scope(job_scope)

    with PlatformApiContext(token) as fb_ctx:
        page_inst = page.Page(fbid=job_scope.entity_id, api=fb_ctx.api)
        page_fetched = page_inst.api_get(fields=get_default_fields(Page))
        report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched,
                                     job_scope)
        token_manager.report_usage(token, 2)

        record_id_data = job_scope.to_dict()
        record_id_data.update(entity_type=Entity.Page,
                              entity_id=job_scope.entity_id,
                              report_variant=None)
        entity_data = page_fetched.export_all_data()
        entity_data = add_vendor_data(
            entity_data, id=generate_universal_id(**record_id_data))
        store = NormalStore(job_scope)
        store.store(entity_data)
コード例 #4
0
def collect_pages_from_business(job_scope: JobScope,
                                _job_context: JobContext) -> int:
    """
    Collect all facebook pages that are active
    """
    if job_scope.report_variant != Entity.Page:
        raise ValueError(
            f"Report level {job_scope.report_variant} specified is not: {Entity.Page}"
        )

    token = job_scope.token
    if not token:
        raise ValueError(
            f"Job {job_scope.job_id} cannot proceed. No platform tokens provided."
        )

    # We don't use it for getting a token. Something else that calls us does.
    # However, we use it to report usages of the token we got.
    token_manager = PlatformTokenManager.from_job_scope(job_scope)

    with PlatformApiContext(token) as fb_ctx:
        fb_req = FacebookRequest(node_id="me",
                                 method="GET",
                                 endpoint="/businesses",
                                 api=fb_ctx.api,
                                 api_type='EDGE',
                                 target_class=Business)
        businesses = fb_req.execute()

    report_job_status_task.delay(ExternalPlatformJobStatus.DataFetched,
                                 job_scope)
    token_manager.report_usage(token)

    entity_type = Entity.Page

    record_id_base_data = job_scope.to_dict()
    record_id_base_data.update(entity_type=entity_type, report_variant=None)

    cnt = 0
    for biz in businesses:
        client_pages = list(
            biz.get_client_pages(fields=get_default_fields(Page)))
        owned_pages = list(
            biz.get_owned_pages(fields=get_default_fields(Page)))
        pages_list = client_pages + owned_pages

        for page_inst in pages_list:

            entity_data = page_inst.export_all_data()
            record_id_base_data.update(entity_id=entity_data.get('id'))
            entity_data = add_vendor_data(
                entity_data, id=generate_universal_id(**record_id_base_data))

            store = NormalStore(job_scope)
            store.store(entity_data)
            cnt += 1

    report_job_status_task.delay(ExternalPlatformJobStatus.Done, job_scope)
    return cnt
コード例 #5
0
def collect_adaccount(job_scope: JobScope) -> Dict[str, Any]:
    """
    Collects ad account data for a AA specific JobScope definition.
    :param JobScope job_scope: The JobScope as we get it from the task itself
    """
    if job_scope.report_variant != Entity.AdAccount:
        raise ValueError(
            f"Report level {job_scope.report_variant} specified is not: {Entity.AdAccount}"
        )

    token = job_scope.token
    if not token:
        raise ValueError(
            f"Job {job_scope.job_id} cannot proceed. No platform tokens provided."
        )

    assert (
        job_scope.ad_account_id == job_scope.entity_id
    ), f'This is an ad account entity job, account_id should be equal to entity_id'

    # Used to report token usage by this job
    token_manager = PlatformTokenManager.from_job_scope(job_scope)

    with PlatformApiContext(token) as fb_ctx:
        ad_account = fb_ctx.to_fb_model(job_scope.ad_account_id,
                                        Entity.AdAccount)

        fields = get_default_fields(ad_account.__class__)

        ad_account_with_selected_fields = ad_account.api_get(
            fields=fields)  # Read just the fields we need
        ad_account_data_dict = ad_account_with_selected_fields.export_all_data(
        )  # Export the object to a dict

        token_manager.report_usage(token)

        job_scope_base = {
            # Duplicate the job_scope data to avoid mutating it
            **job_scope.to_dict(),
            'entity_type': Entity.AdAccount,
            'report_variant': None,
        }

        augmented_ad_account_data = add_vendor_data(
            # Augment the data returned from the remote API with our vendor data
            ad_account_data_dict,
            id=generate_universal_id(**job_scope_base),
        )
        feedback_entity_task.delay(ad_account_data_dict,
                                   job_scope.report_variant)
        store = NormalStore(job_scope)
        store.store(augmented_ad_account_data)

        # TODO: feedback account? this probably wouldn't make sense at the moment
        # because ad accounts are discovered from console and their lifecycle is controlled from there.

        return ad_account_data_dict
コード例 #6
0
    def test_fetch_all_page_posts(self):
        with PlatformApiContext(TOKEN) as ctx:
            page = ctx.to_fb_model(PAGE, Entity.Page)
            entities = iter_native_entities_per_page(page, Entity.PagePost)
            cnt = 0

            for _ in entities:
                cnt += 1
                break

            assert cnt
コード例 #7
0
    def test_fetch_all_custom_audiences(self):
        with PlatformApiContext(TOKEN) as ctx:
            ad_account = ctx.to_fb_model(AD_ACCOUNT, Entity.AdAccount)
            entities = iter_native_entities_per_adaccount(ad_account, Entity.CustomAudience)
            cnt = 0

            for _ in entities:
                cnt += 1
                break

            assert cnt
コード例 #8
0
    def test_fetch_all_ad_videos(self):
        with PlatformApiContext(TOKEN) as ctx:
            ad_account = ctx.to_fb_model(AD_ACCOUNT, Entity.AdAccount)

            entities = iter_native_entities_per_adaccount(ad_account, Entity.AdVideo)
            cnt = 0
            for entity in entities:
                assert entity['account_id'] == AD_ACCOUNT  # This tests if we're augmenting correctly
                cnt += 1
                break

            assert cnt
コード例 #9
0
    def test_fetch_insights_adaccount_ad_lifetime(self):
        with PlatformApiContext(TOKEN) as context:
            entity = context.to_fb_model(AD_ACCOUNT, Entity.AdAccount)

            metrics = Insights.iter_ads_insights(entity, {
                'date_preset': 'lifetime',
                'level': 'ad'
            })

            datum = None
            for datum in metrics:
                break
            assert datum
コード例 #10
0
def page_remote_view(cts, scope, id=None, token=None):
    from oozer.common.facebook_api import PlatformApiContext, get_default_fields
    from facebook_business.adobjects.user import User

    if not token:
        scope = AssetScope.get(scope)
        token = PlatformToken.get(list(scope.platform_token_ids)[0])

    with PlatformApiContext(token.token) as fb_ctx:

        pages = User(fbid='me', api=fb_ctx.api).get_accounts()

        for page in pages:
            print(page)
コード例 #11
0
def iter_collect_entities_per_page_graph(
        job_scope: JobScope) -> Generator[Dict[str, Any], None, None]:
    """
    Collects an arbitrary entity for a page using graph API
    """
    page_token_manager = PageTokenManager.from_job_scope(job_scope)
    with PlatformApiContext(
            page_token_manager.get_best_token(
                job_scope.ad_account_id)) as fb_ctx:
        page_root_fb_entity = fb_ctx.to_fb_model(job_scope.ad_account_id,
                                                 Entity.Page)

    entity_type = job_scope.report_variant
    # page size reduced to avoid error:
    #  "Please reduce the amount of data you're asking for, then retry your request"
    entities = iter_native_entities_per_page_graph(page_root_fb_entity,
                                                   entity_type,
                                                   page_size=30)

    record_id_base_data = job_scope.to_dict()
    record_id_base_data.update(entity_type=entity_type, report_variant=None)

    with ChunkDumpStore(
            job_scope, chunk_size=DEFAULT_CHUNK_SIZE) as store, ChunkDumpStore(
                job_scope,
                chunk_size=DEFAULT_CHUNK_SIZE,
                bucket_type=ColdStoreBucketType.RAW_BUCKET,
                custom_namespace=NAMESPACE_RAW,
            ) as raw_store:
        for entity in entities:
            entity_data = entity.export_all_data()
            entity_data = add_vendor_data(entity_data,
                                          id=generate_universal_id(
                                              entity_id=entity_data.get('id'),
                                              **record_id_base_data))
            entity_data['page_id'] = job_scope.ad_account_id

            if entity_type == Entity.PagePostPromotable:
                # store raw version of response (just to remain consistent)
                raw_store(entity_data)
                entity_data = _augment_page_post(entity_data)

            # Store the individual datum, use job context for the cold
            # storage thing to divine whatever it needs from the job context
            store(entity_data)

            # Signal to the system the new entity
            feedback_entity_task.delay(entity_data, entity_type)
            yield entity_data
コード例 #12
0
def ad_account_remote_view(cts, scope, id, token=None):
    from oozer.common.facebook_api import PlatformApiContext, get_default_fields
    from common.enums.entity import Entity

    if not token:
        scope = AssetScope.get(scope)
        token = PlatformToken.get(list(scope.platform_token_ids)[0])

    with PlatformApiContext(token.token) as fb_ctx:
        ad_account = fb_ctx.to_fb_model(id, Entity.AdAccount)
        fields = get_default_fields(ad_account.__class__)
        ad_account_with_selected_fields = ad_account.api_get(
            fields=['id', 'name'])  # Read just the fields we need
        ad_account_data_dict = ad_account_with_selected_fields.export_all_data(
        )  # Export the object to a dict
        print(ad_account_data_dict)
コード例 #13
0
    def populate_from_scope_entity(cls, scope_entity: AssetScope,
                                   sweep_id: str):
        asset_scope = JobScope.namespace
        tokens = list(scope_entity.platform_tokens)

        try:
            manager = PageTokenManager(asset_scope, sweep_id)
            with PlatformApiContext(tokens[0]) as fb_ctx:
                request = FacebookRequest(node_id='me',
                                          method='GET',
                                          endpoint='/accounts',
                                          api=fb_ctx.api,
                                          api_type='NODE')
                request.add_params({'limit': DEFAULT_PAGE_ACCESS_TOKEN_LIMIT})
                cnt = 0
                while True:
                    # I assume that there's a better way to do paginate over this,
                    # but I wasn't able to find the corresponding target class in SDK :/
                    response = request.execute()
                    response_json = response.json()
                    for page in response_json['data']:
                        manager.add(page['id'], page['access_token'])
                        cnt += 1

                    if 'next' in response_json['paging']:
                        request._path = response_json['paging']['next']
                    else:
                        break

                logger.warning(
                    f'Loaded {cnt} page tokens for scope "{scope_entity.scope}"'
                )
        except Exception as ex:
            print(ex)
            logger.warning(
                'Fetching page tokens has failed so organic data jobs will not work in this sweep'
            )
コード例 #14
0
def _extract_token_entity_type_parent_entity(
        job_scope: JobScope, allowed_entity_types: List[str],
        parent_entity_type: str,
        parent_entity_id_key: str) -> Tuple[str, str, Any]:
    """
    Returned values here are token, entity_type and initialized parent entity from Facebook SDK
    """
    if job_scope.report_variant not in allowed_entity_types:
        raise ValueError(
            f"Report level {job_scope.report_variant} specified is not one of supported values: {allowed_entity_types}"
        )

    entity_type = job_scope.report_variant
    token = job_scope.token
    if not token:
        raise ValueError(
            f"Job {job_scope.job_id} cannot proceed. No platform tokens provided."
        )

    with PlatformApiContext(token) as fb_ctx:
        root_fb_entity = fb_ctx.to_fb_model(job_scope[parent_entity_id_key],
                                            parent_entity_type)

    return token, entity_type, root_fb_entity
コード例 #15
0
    def __init__(self, job_scope: JobScope, report_entity_api_kind: str):
        if job_scope.report_type not in ReportType.ALL_METRICS:
            raise ValueError(
                f"Report type {job_scope.report_type} specified is not one of supported values: "
                + ReportType.ALL_METRICS)
        # cool. we are in the right place...

        self.report_params = {
            'fields':
            DEFAULT_REPORT_FIELDS,
            'action_attribution_windows': [
                # https://developers.facebook.com/docs/marketing-api/reference/adgroup/insights/
                # https://developers.facebook.com/docs/marketing-api/insights#sample
                # 'actions' and 'action_values' can contain values per different measurement window
                # In case of 'actions', default 'value' is always 1d_view PLUS 28d_click and cannot be removed.
                # In case of 'action_values', default 'value' is some weighted sum of
                #  1d_view AND 28d_click $ values, that may be smaller than raw 1d_view PLUS 28d_click $ values.
                # Many customers interpret their conversions / actions in different attribution windows.
                # The more windows we ask the data for, the less reliably it returns reports.
                # Be super conservative about asking for more / all.
                AdsInsights.ActionAttributionWindows.value_1d_view,
                AdsInsights.ActionAttributionWindows.value_7d_view,
                AdsInsights.ActionAttributionWindows.value_28d_view,
                AdsInsights.ActionAttributionWindows.value_1d_click,
                AdsInsights.ActionAttributionWindows.value_7d_click,
                AdsInsights.ActionAttributionWindows.value_28d_click,
            ],
        }

        # Next is (a) vs (b) - abstraction level determination
        is_per_parent_report = not job_scope.entity_id and job_scope.report_variant in Entity.ALL

        if is_per_parent_report:
            entity_id = job_scope.ad_account_id
            entity_type = Entity.AdAccount
            entity_type_reporting = job_scope.report_variant
            if report_entity_api_kind == ReportEntityApiKind.Ad:
                self.report_params.update(
                    level=ENUM_LEVEL_MAP[job_scope.report_variant])
        else:
            # direct, per-entity report
            entity_id = job_scope.entity_id
            entity_type = job_scope.entity_type
            entity_type_reporting = job_scope.report_variant
            if report_entity_api_kind == ReportEntityApiKind.Ad:
                self.report_params.update(
                    level=ENUM_LEVEL_MAP[entity_type_reporting])

        # Now, (c), (d), (e), (f), (g) choices
        # we already checked above that this is one of metrics report types
        # So we know it will be either lifetime or day-with-breakdown type
        # TODO: add fields listings appropriate for each type
        if job_scope.report_type == ReportType.lifetime:
            self.report_params.update(
                date_preset=AdsInsights.DatePreset.lifetime)
        elif job_scope.report_type in REPORT_TYPE_FB_BREAKDOWN_ENUM:  # some day-with-breakdown type
            self.report_params.update(
                time_increment=1,  # group by calendar day (in AA tz)
                time_range={
                    'since':
                    _convert_and_validate_date_format(job_scope.range_start),
                    # No value for job_scope.range_end means 1-day report for range_start day
                    'until':
                    _convert_and_validate_date_format(
                        job_scope.range_end or job_scope.range_start),
                },
                breakdowns=REPORT_TYPE_FB_BREAKDOWN_ENUM[
                    job_scope.report_type],
            )
        else:
            raise ValueError(
                f"Report type {job_scope.report_type} does not have a mapped Platform-side breakdown value."
            )

        # Indicates that datum returned in a per-parent report is by itself
        # naturally mapped to some single normative job ,
        # meaning each element can be stored separately
        # but only under normative ID computed on the fly
        # from the datum.
        # This must be accompanied by a transform fn that
        # derives a normative ID from data.

        # special case.
        # when report type is per-specific-single-entity-ID
        # AND one of per-day-with-breakdown
        # per-Entity-ID-per-day bundle with 24 records before saving it.
        # This results in a single write to the cold store under
        # single normative ID.
        is_whole_report_bundle_write = (
            # must be one of those per-day reports
            job_scope.report_type in ReportType.ALL_DAY_BREAKDOWNS and
            # except for DMA-based data, as these can be very long,
            # - 10s of thousands of records per day
            job_scope.report_type not in [
                ReportType.day_dma, ReportType.day_region,
                ReportType.day_country
            ] and
            # and the report is per single entity ID
            job_scope.entity_id and not job_scope.report_variant and
            # and report is for a single calendar day
            # ReportType.ALL_DAY_BREAKDOWNS means there must be a non-Null
            # value in time_range, but we check anyway
            self.report_params['time_range']['since']
            and self.report_params['time_range']['since']
            == self.report_params['time_range']['until'])

        # a more complex variant of whole_report_bundle_write
        # where, while we canNOT spool entire report into memory to
        # write it as one bundle, we cannot really write each
        # individual result out either, as there will be a shit-load of them
        # and we have to write is some sort of batching mode, but
        # cannot cleanly group the bundles into per-normative-ID bundles,
        # and instead will write under effective ID, but with a suffix
        # indicating the monotonically-increasing chunk number.

        # Disabled but kept for reference to compare to shorter version immediately below
        # These represent good range of choices for cold store handlers.
        # When / if there is value to it, steal from this commented out code.
        # if is_naturally_normative_child:
        #     self.datum_handler = batch_store.NaturallyNormativeChildStore(job_scope)
        # elif is_whole_report_bundle_write:
        #     self.datum_handler = batch_store.MemorySpoolStore(job_scope)
        # elif is_chunk_write:
        #     self.datum_handler = batch_store.ChunkDumpStore(job_scope)
        # else:
        #     self.datum_handler = batch_store.NormalStore(job_scope)

        # let's be more aggressive about doing bundled writes to cold store
        # and (temporarily) get away from "normative" and single-datum writes
        # There are two ways we can get closer to bundled writes:
        #  - spool entire report in memory and flush out at the end, when we know we can tolerate that
        #  - spool large chunks of report in memory and flush them periodically if we fear large sizes in report.
        if is_whole_report_bundle_write:
            self.datum_handler = batch_store.MemorySpoolStore(job_scope)
        else:
            self.datum_handler = batch_store.ChunkDumpStore(job_scope,
                                                            chunk_size=200)

        with PlatformApiContext(job_scope.token) as fb_ctx:
            self.report_root_fb_entity = fb_ctx.to_fb_model(
                entity_id, entity_type)

        # here we configure code that will augment each datum with  record ID
        vendor_data_extractor = report_type_vendor_data_extractor_map[
            job_scope.report_type]
        if job_scope.report_type == ReportType.day_hour:
            # hour report type's ID extractor function needs extra leading arg - timezone
            vendor_data_extractor = functools.partial(
                vendor_data_extractor, job_scope.ad_account_timezone_name)

        aux_data = {
            'ad_account_id': job_scope.ad_account_id,
            'entity_type': entity_type_reporting,
            'report_type': job_scope.report_type,
        }

        self.augment_with_vendor_data = lambda data: add_vendor_data(
            data, **vendor_data_extractor(data, **aux_data))