def test_generate_scorable_job_report_prefer_job_breakdown_false( mock_generate_child_claims, mock_prefer_job_breakdown, mock_fetch_job_report): mock_prefer_job_breakdown.side_effect = [True, False] mock_fetch_job_report.return_value = sentinel.job_report mock_generate_child_claims.return_value = [ Mock( entity_id='ad_id', entity_type=Entity.Ad, report_type=ReportType.lifetime, job_signature=JobSignature('job_id'), ) ] claim = Mock( entity_id='adset_id', entity_type=Entity.AdSet, report_type=ReportType.lifetime, job_signature=JobSignature('job_id'), is_divisible=True, ) result = list(generate_scorable(claim)) assert len(result) == 1 assert result[0].entity_id == 'ad_id'
def test_day_metrics_per_entity_under_ad_account_not_divisible( mock_iter_reality_per_ad_account): reality_claim = RealityClaim( ad_account_id='ad-account-id', entity_id='ad-account-id', entity_type=Entity.AdAccount, timezone='America/Los_Angeles', ) mock_iter_reality_per_ad_account.return_value = [ RealityClaim( entity_type=Entity.Ad, campaign_id='campaign-1', adset_id='adset-1', entity_id='ad-1', bol=datetime(2019, 1, 1, 12, 0), eol=datetime(2019, 1, 2, 12, 0), ), RealityClaim(entity_type=Entity.Ad, entity_id='ad-2', bol=datetime(2019, 1, 3, 12, 0), eol=datetime(2019, 1, 3, 12, 0)), ] result = list( day_metrics_per_ads_under_ad_account([ReportType.day], reality_claim)) assert result == [ ExpectationClaim( 'ad-account-id', Entity.AdAccount, ReportType.day, Entity.Ad, JobSignature('fb|ad-account-id|||day|A|2019-01-01'), ad_account_id='ad-account-id', timezone='America/Los_Angeles', range_start=date(2019, 1, 1), ), ExpectationClaim( 'ad-account-id', Entity.AdAccount, ReportType.day, Entity.Ad, JobSignature('fb|ad-account-id|||day|A|2019-01-02'), ad_account_id='ad-account-id', timezone='America/Los_Angeles', range_start=date(2019, 1, 2), ), ExpectationClaim( 'ad-account-id', Entity.AdAccount, ReportType.day, Entity.Ad, JobSignature('fb|ad-account-id|||day|A|2019-01-03'), ad_account_id='ad-account-id', timezone='America/Los_Angeles', range_start=date(2019, 1, 3), ), ]
def test_assign_score(report_type, historical_ratio, skew_ratio, score): claim = ScorableClaim('A1', Entity.Ad, report_type, Entity.Ad, JobSignature('jobid'), None) with patch.object(ScoreCalculator, 'historical_ratio', return_value=historical_ratio), \ patch.object(ScoreCalculator, 'skew_ratio', return_value=skew_ratio): result = ScoreCalculator.assign_score(claim) assert result == pytest.approx(score, abs=0.1)
def lifetime_metrics_per_entity_under_ad_account( entity_type: str, reality_claim: RealityClaim ) -> Generator[ExpectationClaim, None, None]: """Generate ad-account level expectation claims for lifetime.""" if not reality_claim.timezone: return # TODO: Remove once all entities have parent ids # Divide tasks only if parent levels are defined for all ads is_dividing_possible = True root_node = EntityNode(reality_claim.entity_id, reality_claim.entity_type) for child_claim in iter_reality_per_ad_account_claim( reality_claim, entity_types=[entity_type]): is_dividing_possible = is_dividing_possible and child_claim.all_parent_ids_set new_node = EntityNode(child_claim.entity_id, child_claim.entity_type) root_node.add_node(new_node, path=child_claim.parent_entity_ids) logger.warning( f'[dividing-possible] Ad Account {reality_claim.ad_account_id} Dividing possible: {is_dividing_possible}' ) yield ExpectationClaim( reality_claim.entity_id, reality_claim.entity_type, ReportType.lifetime, entity_type, JobSignature( generate_id(ad_account_id=reality_claim.ad_account_id, report_type=ReportType.lifetime, report_variant=entity_type)), ad_account_id=reality_claim.ad_account_id, entity_hierarchy=root_node if is_dividing_possible else None, timezone=reality_claim.timezone, )
def test_lifetime_score(dt, expected_score): signature = JobSignature('jobid') claim = ScorableClaim('A1', Entity.Ad, ReportType.lifetime, Entity.Ad, signature, None) with patch.object(ScoreSkewHandlers, 'get_now', return_value=dt) as mm: score = ScoreSkewHandlers.lifetime_skew(claim=claim) assert mm.called assert score == pytest.approx(expected_score, abs=0.01)
def test_generate_child_claims(): entity_hierarchy = EntityNode('adset-id', Entity.AdSet) ad1, ad2 = EntityNode('ad-id1', Entity.Ad), EntityNode('ad-id2', Entity.Ad) entity_hierarchy.add_node(ad1) entity_hierarchy.add_node(ad2) claim = ExpectationClaim( 'adset-id', Entity.AdSet, ReportType.lifetime, Entity.Ad, JobSignature('fb|ad-account-id|A|adset-id|lifetime|A|2019-02-20'), ad_account_id='ad-account-id', timezone='timezone', entity_hierarchy=entity_hierarchy, range_start=date(2019, 2, 20), ) result = list(generate_child_claims(claim)) assert result == [ ExpectationClaim( 'ad-id1', Entity.Ad, ReportType.lifetime, Entity.Ad, JobSignature('fb|ad-account-id|A|ad-id1|lifetime|A|2019-02-20'), ad_account_id='ad-account-id', timezone='timezone', entity_hierarchy=ad1, range_start=date(2019, 2, 20), ), ExpectationClaim( 'ad-id2', Entity.Ad, ReportType.lifetime, Entity.Ad, JobSignature('fb|ad-account-id|A|ad-id2|lifetime|A|2019-02-20'), ad_account_id='ad-account-id', timezone='timezone', entity_hierarchy=ad2, range_start=date(2019, 2, 20), ), ]
def test_lifetime_metrics_per_entity_under_ad_account_is_divisible( mock_iter_reality_per_ad_account): reality_claim = RealityClaim( ad_account_id='ad-account-id', entity_id='ad-account-id', entity_type=Entity.AdAccount, timezone='America/Los_Angeles', ) mock_iter_reality_per_ad_account.return_value = [ RealityClaim( entity_type=Entity.Ad, campaign_id='campaign-1', adset_id='adset-1', entity_id='ad-1', bol=datetime(2019, 1, 1, 12, 0), eol=datetime(2019, 1, 1, 12, 0), ) ] result = list( lifetime_metrics_per_entity_under_ad_account(Entity.Ad, reality_claim)) assert result == [ ExpectationClaim( 'ad-account-id', Entity.AdAccount, ReportType.lifetime, Entity.Ad, JobSignature('fb|ad-account-id|||lifetime|A'), ad_account_id='ad-account-id', timezone='America/Los_Angeles', entity_hierarchy=EntityNode( 'ad-account-id', Entity.AdAccount, children=[ EntityNode( 'campaign-1', Entity.Campaign, children=[ EntityNode( 'adset-1', Entity.AdSet, children=[EntityNode('ad-1', Entity.Ad)]) ], ) ], ), ) ]
def test_generate_scorable_job_report_none(mock_generate_child_claims, mock_prefer_job_breakdown, mock_fetch_job_report): mock_fetch_job_report.return_value = None claim = Mock( entity_id='entity_id', entity_type=Entity.Ad, report_type=ReportType.lifetime, job_signature=JobSignature('job_id'), is_divisible=True, ) result = list(generate_scorable(claim)) assert len(result) == 1 assert not mock_prefer_job_breakdown.called assert not mock_generate_child_claims.called
def page_entity( reality_claim: RealityClaim ) -> Generator[ExpectationClaim, None, None]: assert reality_claim.entity_type == Entity.Page, 'Page expectation should be triggered only by page reality claims' yield ExpectationClaim( reality_claim.entity_id, reality_claim.entity_type, ReportType.entity, Entity.Page, JobSignature( generate_id( ad_account_id=reality_claim.ad_account_id, entity_id=reality_claim.entity_id, report_type=ReportType.entity, report_variant=Entity.Page, )), ad_account_id=reality_claim.ad_account_id, )
def entities_per_page( entity_type: str, reality_claim: RealityClaim ) -> Generator[ExpectationClaim, None, None]: """ Generates "fetch EntityType entities metadata per given Page" job call sig """ assert entity_type in Entity.NON_AA_SCOPED yield ExpectationClaim( reality_claim.entity_id, reality_claim.entity_type, ReportType.entity, entity_type, JobSignature( generate_id(ad_account_id=reality_claim.ad_account_id, report_type=ReportType.entity, report_variant=entity_type)), ad_account_id=reality_claim.ad_account_id, )
def lifetime_page_metrics_per_entity( entity_type: str, reality_claim: RealityClaim ) -> Generator[ExpectationClaim, None, None]: assert entity_type in Entity.ALL yield ExpectationClaim( reality_claim.entity_id, reality_claim.entity_type, ReportType.lifetime, entity_type, JobSignature( generate_id( ad_account_id=reality_claim.ad_account_id, entity_type=reality_claim.entity_type, entity_id=reality_claim.entity_id, report_type=ReportType.lifetime, report_variant=entity_type, )), ad_account_id=reality_claim.ad_account_id, )
def ad_account_entity( reality_claim: RealityClaim ) -> Generator[ExpectationClaim, None, None]: assert ( reality_claim.entity_type == Entity.AdAccount ), 'Ad account expectation should be triggered only by ad account reality claims' yield ExpectationClaim( reality_claim.entity_id, reality_claim.entity_type, ReportType.entity, Entity.AdAccount, JobSignature( generate_id( ad_account_id=reality_claim.ad_account_id, entity_id=reality_claim.entity_id, report_type=ReportType.entity, report_variant=Entity.AdAccount, )), ad_account_id=reality_claim.ad_account_id, timezone=reality_claim.timezone, )
def generate_child_claims( claim: ExpectationClaim) -> Generator[ExpectationClaim, None, None]: for child_entity_node in claim.entity_hierarchy.children: yield ExpectationClaim( child_entity_node.entity_id, child_entity_node.entity_type, claim.report_type, claim.report_variant, JobSignature( generate_id( ad_account_id=claim.ad_account_id, range_start=claim.range_start, report_type=claim.report_type, report_variant=claim.report_variant, entity_id=child_entity_node.entity_id, entity_type=child_entity_node.entity_type, )), ad_account_id=claim.ad_account_id, timezone=claim.timezone, entity_hierarchy=child_entity_node, range_start=claim.range_start, )
def entities_per_ad_account( entity_type: str, reality_claim: RealityClaim ) -> Generator[ExpectationClaim, None, None]: """ Generates "fetch EntityType entities metadata per given AA" job call sig """ # Mental Note: # This job signature generator is designed to be parked # *under AdAccount job signatures generators inventory* # In other words, NOT under EntityType inventory (where it would be called # for each EntityType). # Unlike metrics report types, # We don't have an effective "fetch single EntityType entity data per EntityType ID" task (yet). # So, instead of generating many job signatures per EntityTypes, # we create only one per-parent-AA, and making that # into "normative_job_signature" per AA level. # When we have a need to have atomic per-EntityType entity data collection celery task, # atomic per-C entity data job signature would go into normative column # on ExpectationClaim for each and separate EntityType and per-parent-AA # job signature will go into "effective_job_signatures" list on those claims, # AND this function must move from AA-level to EntityType-level signature # generators inventory. assert entity_type in Entity.ALL yield ExpectationClaim( reality_claim.entity_id, reality_claim.entity_type, ReportType.entity, entity_type, JobSignature( generate_id(ad_account_id=reality_claim.ad_account_id, report_type=ReportType.entity, report_variant=entity_type)), ad_account_id=reality_claim.ad_account_id, timezone=reality_claim.timezone, )
def pages_per_scope( reality_claim: RealityClaim ) -> Generator[ExpectationClaim, None, None]: """ Generates "fetch Pages active entity metadata per given scope" job ID To be used by Scope-level RealityClaim / ExpectationClaim. """ yield ExpectationClaim( reality_claim.entity_id, reality_claim.entity_type, ReportType.import_pages, Entity.Page, JobSignature( generate_id( namespace=config.application.UNIVERSAL_ID_SYSTEM_NAMESPACE, # Note absence of value for Page # This is "all Pages per scope X" job. entity_id=reality_claim.entity_id, entity_type=reality_claim.entity_type, report_type=ReportType.import_pages, report_variant=Entity.Page, )), )
def test_persister_saves_job_scope_auxiliary_data_to_data_flower(self): # There is a need to save some context data that does not fit on JobIS # Persister should store that on the Data Flower. sweep_id = random.gen_string_id() entity_id = random.gen_string_id() ad_account_id = random.gen_string_id() job_id = generate_id(ad_account_id=ad_account_id, report_type=ReportType.lifetime, report_variant=Entity.Campaign) prioritized_iter = [ PrioritizationClaim( entity_id, Entity.Campaign, ReportType.lifetime, JobSignature(job_id), 100, ad_account_id=ad_account_id, timezone='Europe/London', # TODO: contemplate moving auxiliary data formation to # place where JobSignatures are generated and use that # data for Data Flower (as it was originally intended # but not implemented because saving each job's data # individually to Data Flower was too slow) ) ] persisted = persister.iter_persist_prioritized(sweep_id, prioritized_iter) cnt = 0 for item in persisted: cnt += 1 # just need to spin the generator # so it does all the saving it needs to do per item assert cnt == 1 # Now, finally, the testing: jobs_queued_actual = [] with SortedJobsQueue(sweep_id).JobsReader() as jobs_iter: for job_id, job_scope_data, score in jobs_iter: jobs_queued_actual.append((job_id, job_scope_data, score)) jobs_queued_should_be = [( job_id, # Contents of this dict is what we are testing here dict( # comes from Persister code # manually peeled off *Claim and injected into Data Flower ad_account_timezone_name='Europe/London'), 100, )] assert jobs_queued_actual == jobs_queued_should_be # And, another way of looking at it # looper.iter_tasks preassembles JobScope and should apply aux data to it. job_scope = None cnt = 0 for celery_task, job_scope, job_context, score in TaskProducer( sweep_id).iter_tasks(): cnt += 1 # this just needs to spin once assert cnt == 1 job_scope_should_be = JobScope( sweep_id=sweep_id, namespace='fb', ad_account_id=ad_account_id, report_type=ReportType.lifetime, report_variant=Entity.Campaign, # \/ This is what we are testing \/ # comes from Persister code # manually peeled off *Claim and injected into Data Flower ad_account_timezone_name='Europe/London', score=100, ) assert job_scope.to_dict() == job_scope_should_be.to_dict()