def test_px4_choiceset_filter(self, classifier_mock): # Configure null global filter: self.campaign.campaignglobalfilters.create(filter=self.default_filter, rand_cdf=1) # Configure choiceset: topics_filter = self.client.filters.create() topics_filter.filterfeatures.create( feature_type=models.relational.FilterFeatureType.objects.get_topics(), feature='topics[Weather]', operator=models.FilterFeature.Operator.MIN, value=0.1, ) choice_set = self.client.choicesets.create() choice_set.choicesetfilters.create(filter=topics_filter) self.campaign.campaignchoicesets.create(choice_set=choice_set, rand_cdf=1) # px3 ranked_edges = targeting.px3_crawl(self.token) result = targeting.perform_filtering( ranked_edges, fbid=self.token.fbid, campaign_id=self.campaign.pk, content_id=self.content.pk, visit_id=self.visit.pk, num_faces=1, ) self.assertTrue(result.ranked) self.assertTrue(result.filtered) self.assertEqual(len(result.filtered), len(result.ranked)) # no filtering # px4 (stream, ranked_edges) = targeting.px4_crawl(self.token) self.assertTrue(stream) self.assertEqual(models.dynamo.PostTopics.items.count(), 0) filtering_result = targeting.px4_filter( stream, ranked_edges, fbid=self.token.fbid, campaign_id=self.campaign.pk, content_id=self.content.pk, visit_id=self.visit.pk, num_faces=1, ) result = targeting.px4_rank(filtering_result) self.assertTrue(result.ranked) self.assertTrue(result.filtered) self.assertTrue(classifier_mock.called) self.assertGreater(models.dynamo.PostTopics.items.count(), 0) self.assertLess(len(result.filtered), len(result.ranked)) mismatch = [user for user in result.filtered.secondaries if user.topics['Weather'] < 0.1] self.assertFalse(mismatch)
def test_px4_filtering(self, classifier_mock): """px4 can filter by topic-interest""" (stream, ranked_edges) = targeting.px4_crawl(self.token) self.assertTrue(stream) # Ensure "closest" friend has low ranking: ranked_edges[0].interactions.clear() # Configure ranking key: ranking_key = self.client.rankingkeys.create() ranking_key.campaignrankingkeys.create(campaign=self.campaign) ranking_key.rankingkeyfeatures.create( feature='topics[Weather]', feature_type=models.relational.RankingFeatureType.objects.get_topics(), reverse=True, ) # Prevent TooFewFriendsError self.campaign.campaignproperties.update(min_friends=1) # Configure filter: client_filter = self.client.filters.create() client_filter.filterfeatures.create( feature_type=models.relational.FilterFeatureType.objects.get_topics(), feature='topics[Weather]', operator=models.FilterFeature.Operator.MIN, value=0.1, ) self.campaign.campaignglobalfilters.create(filter=client_filter, rand_cdf=1) self.assertEqual(models.dynamo.PostTopics.items.count(), 0) visitor = models.relational.Visitor.objects.create(fbid=self.token.fbid) visit = visitor.visits.create(session_id='123', app_id=123, ip='127.0.0.1') filtering_result = targeting.px4_filter( stream, ranked_edges, fbid=self.token.fbid, campaign_id=self.campaign.pk, content_id=self.content.pk, visit_id=visit.pk, num_faces=1, ) result = targeting.px4_rank(filtering_result) self.assertTrue(all(result)) self.assertTrue(classifier_mock.called) self.assertNotEqual(result.ranked, ranked_edges) self.assertLess(len(result.filtered), len(result.ranked)) mismatch = [user for user in result.filtered.secondaries if user.topics['Weather'] < 0.1] self.assertFalse(mismatch) self.assertGreater(ranked_edges[0].score, ranked_edges[14].score) self.assertNotEqual(result.ranked[0].secondary, ranked_edges[0].secondary) self.assertGreater(result.ranked[0].secondary.topics['Weather'], result.ranked[14].secondary.topics.get('Weather', 0)) self.assertGreater(result.filtered.secondaries[0].topics['Weather'], result.filtered.secondaries[-1].topics['Weather']) self.assertGreater(models.dynamo.PostTopics.items.count(), 0)
def test_px4_filtering_dynamo(self, classifier_mock): """px4 can filter by topic-interest, read from DDB""" # Set up data in DynamoDB: self.assertEqual(models.dynamo.IncomingEdge.items.count(), 0) self.assertEqual(models.dynamo.PostTopics.items.count(), 0) self.assertEqual(models.dynamo.PostInteractions.items.count(), 0) postids = set() for fbid in xrange(2, DB_MIN_FRIEND_COUNT + 2): models.User.items.create(fbid=fbid) post_likes = random.randint(0, 20) models.IncomingEdge.items.create( fbid_source=fbid, fbid_target=1, post_likes=post_likes, ) interacted_posts = map(str, random.sample(xrange(1, 21), post_likes)) postids.update(interacted_posts) if interacted_posts: # batch_get_through doesn't like empty set field (#11) models.PostInteractionsSet.items.create( fbid=fbid, postids=interacted_posts, ) for postid in interacted_posts: models.PostInteractions.items.create(fbid=fbid, postid=postid, post_likes=1) self.assertTrue(postids) for (count, postid) in enumerate(postids): if count == 0: # Skip one to test missing post classifications continue topics = classify_fake("Words, words, words.", 'Weather') models.PostTopics.items.create( postid=postid, classifier=models.PostTopics.QD_CLASSIFIER, **topics ) (stream, ranked_edges) = targeting.px4_crawl(self.token) self.assertIsNone(stream) # No FB stream # Ensure "closest" friend has low ranking: ranked_edges[0].interactions.clear() # Configure ranking key: ranking_key = self.client.rankingkeys.create() ranking_key.campaignrankingkeys.create(campaign=self.campaign) ranking_key.rankingkeyfeatures.create( feature='topics[Weather]', feature_type=models.relational.RankingFeatureType.objects.get_topics(), reverse=True, ) # Prevent TooFewFriendsError self.campaign.campaignproperties.update(min_friends=1) # Configure filter: client_filter = self.client.filters.create() client_filter.filterfeatures.create( feature_type=models.relational.FilterFeatureType.objects.get_topics(), feature='topics[Weather]', operator=models.FilterFeature.Operator.MIN, value=0.1, ) self.campaign.campaignglobalfilters.create(filter=client_filter, rand_cdf=1) visitor = models.relational.Visitor.objects.create(fbid=self.token.fbid) visit = visitor.visits.create(session_id='123', app_id=123, ip='127.0.0.1') filtering_result = targeting.px4_filter( stream, ranked_edges, fbid=self.token.fbid, campaign_id=self.campaign.pk, content_id=self.content.pk, visit_id=visit.pk, num_faces=1, ) result = targeting.px4_rank(filtering_result) self.assertTrue(all(result)) self.assertFalse(classifier_mock.called) self.assertNotEqual(result.ranked, ranked_edges) self.assertLess(len(result.filtered), len(result.ranked)) mismatch = [user for user in result.filtered.secondaries if user.topics['Weather'] < 0.1] self.assertFalse(mismatch) self.assertGreater(ranked_edges[0].score, ranked_edges[-1].score) self.assertNotEqual(result.ranked[0].secondary, ranked_edges[0].secondary) self.assertGreater(result.ranked[0].secondary.topics['Weather'], result.ranked[-1].secondary.topics.get('Weather', 0)) self.assertGreater(result.filtered.secondaries[0].topics['Weather'], result.filtered.secondaries[-1].topics['Weather'])
def crawl_and_filter(campaign, content, notification, offset, end_count, num_face, error_dict, cache=False, mock=False): ''' Grabs all of the tokens for a given UserClient, and throws them through the px4 crawl again ''' LOG.info('Gathering list of users to crawl: offset %s, end count %s', offset, end_count) failed_fbids = [] client = campaign.client ucs = client.userclients.order_by('fbid') end_count = end_count or ucs.count() ucs = ucs[offset:end_count] user_fbids = [{ 'fbid': Decimal(x), 'appid': client.fb_app_id, } for x in ucs.values_list('fbid', flat=True)] user_tokens = dynamo.Token.items.batch_get(keys=user_fbids) for (count, ut) in enumerate(user_tokens, 1): if timezone.now() >= ut.expires: LOG.debug('FBID %s has expired token', ut.fbid) continue LOG.info('Crawling user %s of %s; FBID: %s', count, end_count - offset, ut.fbid) seed = ''.join(str(part) for part in ( ut.fbid, campaign.pk, content.pk, notification.pk, )) hash_ = hashlib.md5(seed).hexdigest() (notification_user, _created) = notification.notificationusers.get_or_create( uuid=hash_, fbid=ut.fbid) try: (stream, edges) = targeting.px4_crawl(ut) except Exception as exc: LOG.exception('Failed to crawl %s', ut.fbid) failed_fbids.append(ut.fbid) error_dict[exc.__class__.__name__] += 1 continue filtered_result = targeting.px4_filter( stream, edges, campaign.pk, content.pk, ut.fbid, notification_user.pk, num_face, visit_type='targetshare.NotificationUser', cache_match=cache, force=True, ) reranked_result = targeting.px4_rank(filtered_result) targeted_edges = reranked_result.filtered and reranked_result.filtered.edges if targeted_edges: yield (hash_, targeted_edges) else: LOG.warning('User %s had too few friends', ut.fbid) failed_fbids.append(ut.fbid) if failed_fbids: LOG.info('Failed users: %r', failed_fbids)