Ejemplo n.º 1
0
    def test_px4_choiceset_filter(self, classifier_mock):
        # Configure null global filter:
        self.campaign.campaignglobalfilters.create(filter=self.default_filter, rand_cdf=1)

        # Configure choiceset:
        topics_filter = self.client.filters.create()
        topics_filter.filterfeatures.create(
            feature_type=models.relational.FilterFeatureType.objects.get_topics(),
            feature='topics[Weather]',
            operator=models.FilterFeature.Operator.MIN,
            value=0.1,
        )
        choice_set = self.client.choicesets.create()
        choice_set.choicesetfilters.create(filter=topics_filter)
        self.campaign.campaignchoicesets.create(choice_set=choice_set, rand_cdf=1)

        # px3
        ranked_edges = targeting.px3_crawl(self.token)
        result = targeting.perform_filtering(
            ranked_edges,
            fbid=self.token.fbid,
            campaign_id=self.campaign.pk,
            content_id=self.content.pk,
            visit_id=self.visit.pk,
            num_faces=1,
        )
        self.assertTrue(result.ranked)
        self.assertTrue(result.filtered)
        self.assertEqual(len(result.filtered), len(result.ranked)) # no filtering

        # px4
        (stream, ranked_edges) = targeting.px4_crawl(self.token)
        self.assertTrue(stream)

        self.assertEqual(models.dynamo.PostTopics.items.count(), 0)

        filtering_result = targeting.px4_filter(
            stream,
            ranked_edges,
            fbid=self.token.fbid,
            campaign_id=self.campaign.pk,
            content_id=self.content.pk,
            visit_id=self.visit.pk,
            num_faces=1,
        )
        result = targeting.px4_rank(filtering_result)

        self.assertTrue(result.ranked)
        self.assertTrue(result.filtered)
        self.assertTrue(classifier_mock.called)

        self.assertGreater(models.dynamo.PostTopics.items.count(), 0)

        self.assertLess(len(result.filtered), len(result.ranked))
        mismatch = [user for user in result.filtered.secondaries
                    if user.topics['Weather'] < 0.1]
        self.assertFalse(mismatch)
Ejemplo n.º 2
0
    def test_px4_filtering(self, classifier_mock):
        """px4 can filter by topic-interest"""
        (stream, ranked_edges) = targeting.px4_crawl(self.token)
        self.assertTrue(stream)
        # Ensure "closest" friend has low ranking:
        ranked_edges[0].interactions.clear()

        # Configure ranking key:
        ranking_key = self.client.rankingkeys.create()
        ranking_key.campaignrankingkeys.create(campaign=self.campaign)
        ranking_key.rankingkeyfeatures.create(
            feature='topics[Weather]',
            feature_type=models.relational.RankingFeatureType.objects.get_topics(),
            reverse=True,
        )

        # Prevent TooFewFriendsError
        self.campaign.campaignproperties.update(min_friends=1)

        # Configure filter:
        client_filter = self.client.filters.create()
        client_filter.filterfeatures.create(
            feature_type=models.relational.FilterFeatureType.objects.get_topics(),
            feature='topics[Weather]',
            operator=models.FilterFeature.Operator.MIN,
            value=0.1,
        )
        self.campaign.campaignglobalfilters.create(filter=client_filter, rand_cdf=1)

        self.assertEqual(models.dynamo.PostTopics.items.count(), 0)

        visitor = models.relational.Visitor.objects.create(fbid=self.token.fbid)
        visit = visitor.visits.create(session_id='123', app_id=123, ip='127.0.0.1')

        filtering_result = targeting.px4_filter(
            stream,
            ranked_edges,
            fbid=self.token.fbid,
            campaign_id=self.campaign.pk,
            content_id=self.content.pk,
            visit_id=visit.pk,
            num_faces=1,
        )
        result = targeting.px4_rank(filtering_result)

        self.assertTrue(all(result))
        self.assertTrue(classifier_mock.called)

        self.assertNotEqual(result.ranked, ranked_edges)

        self.assertLess(len(result.filtered), len(result.ranked))
        mismatch = [user for user in result.filtered.secondaries
                    if user.topics['Weather'] < 0.1]
        self.assertFalse(mismatch)

        self.assertGreater(ranked_edges[0].score, ranked_edges[14].score)
        self.assertNotEqual(result.ranked[0].secondary, ranked_edges[0].secondary)
        self.assertGreater(result.ranked[0].secondary.topics['Weather'],
                           result.ranked[14].secondary.topics.get('Weather', 0))
        self.assertGreater(result.filtered.secondaries[0].topics['Weather'],
                           result.filtered.secondaries[-1].topics['Weather'])

        self.assertGreater(models.dynamo.PostTopics.items.count(), 0)
Ejemplo n.º 3
0
    def test_px4_filtering_dynamo(self, classifier_mock):
        """px4 can filter by topic-interest, read from DDB"""
        # Set up data in DynamoDB:
        self.assertEqual(models.dynamo.IncomingEdge.items.count(), 0)
        self.assertEqual(models.dynamo.PostTopics.items.count(), 0)
        self.assertEqual(models.dynamo.PostInteractions.items.count(), 0)

        postids = set()
        for fbid in xrange(2, DB_MIN_FRIEND_COUNT + 2):
            models.User.items.create(fbid=fbid)

            post_likes = random.randint(0, 20)
            models.IncomingEdge.items.create(
                fbid_source=fbid,
                fbid_target=1,
                post_likes=post_likes,
            )
            interacted_posts = map(str, random.sample(xrange(1, 21), post_likes))
            postids.update(interacted_posts)
            if interacted_posts: # batch_get_through doesn't like empty set field (#11)
                models.PostInteractionsSet.items.create(
                    fbid=fbid,
                    postids=interacted_posts,
                )
            for postid in interacted_posts:
                models.PostInteractions.items.create(fbid=fbid, postid=postid, post_likes=1)

        self.assertTrue(postids)
        for (count, postid) in enumerate(postids):
            if count == 0:
                # Skip one to test missing post classifications
                continue

            topics = classify_fake("Words, words, words.", 'Weather')
            models.PostTopics.items.create(
                postid=postid,
                classifier=models.PostTopics.QD_CLASSIFIER,
                **topics
            )

        (stream, ranked_edges) = targeting.px4_crawl(self.token)
        self.assertIsNone(stream) # No FB stream

        # Ensure "closest" friend has low ranking:
        ranked_edges[0].interactions.clear()

        # Configure ranking key:
        ranking_key = self.client.rankingkeys.create()
        ranking_key.campaignrankingkeys.create(campaign=self.campaign)
        ranking_key.rankingkeyfeatures.create(
            feature='topics[Weather]',
            feature_type=models.relational.RankingFeatureType.objects.get_topics(),
            reverse=True,
        )

        # Prevent TooFewFriendsError
        self.campaign.campaignproperties.update(min_friends=1)

        # Configure filter:
        client_filter = self.client.filters.create()
        client_filter.filterfeatures.create(
            feature_type=models.relational.FilterFeatureType.objects.get_topics(),
            feature='topics[Weather]',
            operator=models.FilterFeature.Operator.MIN,
            value=0.1,
        )
        self.campaign.campaignglobalfilters.create(filter=client_filter, rand_cdf=1)

        visitor = models.relational.Visitor.objects.create(fbid=self.token.fbid)
        visit = visitor.visits.create(session_id='123', app_id=123, ip='127.0.0.1')

        filtering_result = targeting.px4_filter(
            stream,
            ranked_edges,
            fbid=self.token.fbid,
            campaign_id=self.campaign.pk,
            content_id=self.content.pk,
            visit_id=visit.pk,
            num_faces=1,
        )
        result = targeting.px4_rank(filtering_result)

        self.assertTrue(all(result))
        self.assertFalse(classifier_mock.called)

        self.assertNotEqual(result.ranked, ranked_edges)

        self.assertLess(len(result.filtered), len(result.ranked))
        mismatch = [user for user in result.filtered.secondaries
                    if user.topics['Weather'] < 0.1]
        self.assertFalse(mismatch)

        self.assertGreater(ranked_edges[0].score, ranked_edges[-1].score)
        self.assertNotEqual(result.ranked[0].secondary, ranked_edges[0].secondary)
        self.assertGreater(result.ranked[0].secondary.topics['Weather'],
                           result.ranked[-1].secondary.topics.get('Weather', 0))
        self.assertGreater(result.filtered.secondaries[0].topics['Weather'],
                           result.filtered.secondaries[-1].topics['Weather'])
Ejemplo n.º 4
0
def crawl_and_filter(campaign, content, notification, offset,
                      end_count, num_face, error_dict, cache=False, mock=False):
    ''' Grabs all of the tokens for a given UserClient, and throws them
    through the px4 crawl again
    '''
    LOG.info('Gathering list of users to crawl: offset %s, end count %s', offset, end_count)
    failed_fbids = []
    client = campaign.client
    ucs = client.userclients.order_by('fbid')
    end_count = end_count or ucs.count()
    ucs = ucs[offset:end_count]
    user_fbids = [{
        'fbid': Decimal(x),
        'appid': client.fb_app_id,
    } for x in ucs.values_list('fbid', flat=True)]
    user_tokens = dynamo.Token.items.batch_get(keys=user_fbids)
    for (count, ut) in enumerate(user_tokens, 1):
        if timezone.now() >= ut.expires:
            LOG.debug('FBID %s has expired token', ut.fbid)
            continue

        LOG.info('Crawling user %s of %s; FBID: %s',
                 count, end_count - offset, ut.fbid)
        seed = ''.join(str(part) for part in (
            ut.fbid, campaign.pk, content.pk, notification.pk,
        ))
        hash_ = hashlib.md5(seed).hexdigest()
        (notification_user, _created) = notification.notificationusers.get_or_create(
            uuid=hash_, fbid=ut.fbid)

        try:
            (stream, edges) = targeting.px4_crawl(ut)
        except Exception as exc:
            LOG.exception('Failed to crawl %s', ut.fbid)
            failed_fbids.append(ut.fbid)
            error_dict[exc.__class__.__name__] += 1
            continue

        filtered_result = targeting.px4_filter(
            stream,
            edges,
            campaign.pk,
            content.pk,
            ut.fbid,
            notification_user.pk,
            num_face,
            visit_type='targetshare.NotificationUser',
            cache_match=cache,
            force=True,
        )
        reranked_result = targeting.px4_rank(filtered_result)
        targeted_edges = reranked_result.filtered and reranked_result.filtered.edges

        if targeted_edges:
            yield (hash_, targeted_edges)
        else:
            LOG.warning('User %s had too few friends', ut.fbid)
            failed_fbids.append(ut.fbid)

    if failed_fbids:
        LOG.info('Failed users: %r', failed_fbids)