Beispiel #1
0
    def try_token(self, token):
        user_clients = relational.UserClient.objects.filter(fbid=token.fbid, client__fb_app_id=token.appid)
        client_codenames = user_clients.values_list('client__codename', flat=True)
        app_stats = self.stats.get_many(client_codenames.iterator())

        # We expect values of "expires" to be optimistic, meaning we trust dates
        # in the past, but must confirm dates in the future.
        # (We sometimes set field optimistically; and, user can invalidate our
        # token, throwing its actual expires to 0.)

        if not token.expires or token.expires == EPOCH_ZERO:
            app_stats.increment_invalid()
            return False

        if token.expires <= epoch.utcnow():
            app_stats.increment_expired()
            return False

        try:
            secret = _SECRETS[token.appid]
        except KeyError:
            secret = relational.FBApp.objects.values_list('secret', flat=True).get(appid=token.appid)
            _SECRETS[token.appid] = secret

        # Confirm token expiration (and validity)
        debug_result = facebook.client.debug_token(token.appid, secret, token.token)
        debug_data = debug_result['data']
        token_valid = debug_data['is_valid']
        token_expires = debug_data['expires_at']

        # Update token, if needed; but, restart if another process has changed
        # the token (meaning it may now refer to new value):
        token.expires = token_expires
        updated = token.partial_save()

        if token_valid and token.expires > epoch.utcnow():
            app_stats.increment_valid()
        elif token.expires > EPOCH_ZERO:
            app_stats.increment_expired()
        else:
            app_stats.increment_invalid()

        return updated
Beispiel #2
0
    def test_extension(self, _urllib_mock):
        models.relational.FBApp.objects.create(
            appid=471727162864364,
            name='Share!',
            secret='sekret',
        )
        tokens = models.Token.items.filter(fbid__eq=100, appid__eq=471727162864364)
        self.assertEqual(tokens.query_count(), 0)

        now = epoch.utcnow()
        facebook.extend_token(100, 471727162864364, 'xyz', '1.0')

        token = tokens.filter_get()
        self.assertEqual(token.token, 'tok1')
        self.assertAlmostEqual(token.expires, now + timedelta(days=60),
                               delta=timedelta(seconds=1))
Beispiel #3
0
    def handle(self, server, campaign_id, client_content_id, fbidfile, limit, debug, **options):
        # Init and validate:
        if fbidfile and (limit or options['random']):
            raise CommandError("option --fbidfile incompatible with options --limit and --random")

        self.campaign = relational.Campaign.objects.get(pk=campaign_id)
        self.client_content = relational.ClientContent.objects.get(pk=client_content_id)
        client = self.campaign.client
        fb_app_id = client.fb_app_id
        if client != self.client_content.client:
            raise CommandError("Mismatched campaign and client content")

        self.uri = server + reverse('targetshare:faces')
        if not self.uri.startswith('http'):
            self.uri = 'https://' + self.uri

        self.timeout = options['timeout']
        self.verbosity = int(options['verbosity'])

        if fbidfile:
            fh = sys.stdin if fbidfile == '-' else open(fbidfile)
            # Grab fbids separated by any space (newline, space, tab):
            fbids = chain.from_iterable(line.split() for line in fh.xreadlines())
            keys = tuple({'fbid': int(fbid), 'appid': fb_app_id} for fbid in fbids)
            tokens = dynamo.Token.items.batch_get(keys)
            limit = len(keys)
        else:
            if limit is None:
                limit = 100

            if options['random']:
                app_users = relational.UserClient.objects.filter(client__fb_app_id=fb_app_id)
                random_fbids = app_users.values_list('fbid', flat=True).distinct().order_by('?')
                # A lot of these might be no good when debugged, so get 10x
                # TODO: Use scan and filter by expires?
                # It'd be nice if boto allowed you to specify an (infinite)
                # iterator of keys....
                num_fbids = (limit * 10) if debug else limit
                tokens = dynamo.Token.items.batch_get([{'fbid': fbid, 'appid': fb_app_id}
                                                       for fbid in random_fbids[:num_fbids].iterator()])
            else:
                tokens = dynamo.Token.items.filter(appid__eq=fb_app_id, expires__gt=epoch.utcnow())
                if debug:
                    tokens = tokens.scan()
                else:
                    tokens = tokens.scan(limit=limit)

        tokens = tokens.iterable

        if debug:
            self._secrets = dict(relational.FBApp.objects.values_list('appid', 'secret').iterator())
            tokens = self.debug_tokens(tokens, limit)

        # Do one thread per primary for now:
        queue = Queue()
        results = []
        for (count, token) in enumerate(tokens, 1):
            thread = Thread(target=self.do_poll, args=(queue, results))
            thread.setDaemon(True)
            thread.start()
            queue.put(token)

        queue.join()
        if len(results) < count:
            self.perr("Completed polling of {} / {} tokens"
                      .format(len(results), count), 1)

        for (fbid, result_time) in results:
            self.stdout.write("{}: {:.1f}".format(fbid, result_time))
Beispiel #4
0
def crawl_user(fbid, retry_delay=0):
    """Enqueue crawl tasks for the user (`fbid`) and the users of his/her network."""
    # Find a valid token for the user #
    tokens = models.Token.items.query(fbid__eq=fbid)
    # Iterate over user's tokens, starting with most recent:
    for token in sorted(tokens, key=expires_safe, reverse=True):
        # We expect values of "expires" to be optimistic, meaning we trust dates
        # in the past, but must confirm dates in the future.
        # (We sometimes set field optimistically; and, user can invalidate our
        # token, throwing its actual expires to 0.)

        if not token.expires or token.expires <= epoch.utcnow():
            return # This, and any remaining, invalid

        # Confirm token expiration (and validity)
        secret = models.FBApp.objects.values_list('secret', flat=True).get(appid=token.appid)
        try:
            debug_result = facebook.client.debug_token(token.appid, secret, token.token)
            debug_data = debug_result['data']
            token_valid = debug_data['is_valid']
            token_expires = debug_data['expires_at']
        except (KeyError, IOError, RuntimeError) as exc:
            # Facebook is being difficult; retry later (with increasing wait):
            # (We would use self.request.retries rather than define
            # retry_delay; but, we don't want to increase the countdown for
            # *all* retries.)
            crawl_user.retry((fbid, 2 * (retry_delay + 60)), {},
                             countdown=retry_delay, exc=exc)

        # Update token, if needed; but, restart if another process has changed
        # the token (meaning it may now refer to new value):
        token.expires = token_expires
        try:
            token.partial_save()
        except ConditionalCheckFailedException as exc:
            # Token has changed since we loaded it; retry:
            crawl_user.retry((fbid, retry_delay), {}, countdown=0, exc=exc)

        if token_valid and token.expires > epoch.utcnow():
            # We have our token, no lie!
            break
    else:
        return # All tokens were invalid (and liars)

    try:
        edges = _bg_px4_crawl(token)
    except urllib2.HTTPError as exc:
        if 'invalid_token' in exc.headers.get('www-authenticate', ''):
            return # dead token
        raise

    fb_sync_maps = _get_sync_maps(edges, token)

    delay = 0
    for (count, fbm) in enumerate(fb_sync_maps, 1):
        if fbm.status == models.FBSyncMap.WAITING:
            fbm.save_status(models.FBSyncMap.QUEUED)
            initial_crawl.apply_async(
                args=[fbm.fbid_primary, fbm.fbid_secondary],
                countdown=delay
            )
        elif fbm.status == models.FBSyncMap.COMPLETE:
            fbm.save_status(models.FBSyncMap.QUEUED)
            incremental_crawl.apply_async(
                args=[fbm.fbid_primary, fbm.fbid_secondary],
                countdown=delay
            )

        delay += DELAY_INCREMENT if count % 100 == 0 else 0