def urlopen(self, url, **kwargs):
   """Wraps urllib2.urlopen() and passes through the access token.
   """
   log_url = url
   if self.access_token:
     log_url = util.add_query_params(url, [('access_token',
                                            self.access_token[:4] + '...')])
     url = util.add_query_params(url, [('access_token', self.access_token)])
   logging.info('Fetching %s, kwargs %s', log_url, kwargs)
   return urllib2.urlopen(urllib2.Request(url, **kwargs),
                          timeout=appengine_config.HTTP_TIMEOUT)
Example #2
0
 def urlopen(self, url, **kwargs):
   """Wraps urllib2.urlopen() and passes through the access token.
   """
   log_url = url
   if self.access_token:
     log_url = util.add_query_params(url, [('access_token',
                                            self.access_token[:4] + '...')])
     # TODO add access_token to the data parameter for POST requests
     url = util.add_query_params(url, [('access_token', self.access_token)])
   logging.info('Fetching %s, kwargs %s', log_url, kwargs)
   resp = urllib2.urlopen(urllib2.Request(url, **kwargs),
                          timeout=appengine_config.HTTP_TIMEOUT)
   return resp if kwargs.get('data') else json.loads(resp.read()).get('data')
Example #3
0
 def urlopen(self, url, **kwargs):
   """Wraps urllib2.urlopen() and passes through the access token.
   """
   log_url = url
   if self.access_token:
     log_url = util.add_query_params(url, [('access_token',
                                            self.access_token[:4] + '...')])
     # TODO add access_token to the data parameter for POST requests
     url = util.add_query_params(url, [('access_token', self.access_token)])
   logging.info('Fetching %s, kwargs %s', log_url, kwargs)
   resp = urllib2.urlopen(urllib2.Request(url, **kwargs),
                          timeout=appengine_config.HTTP_TIMEOUT)
   return resp if kwargs.get('data') else json.loads(resp.read()).get('data')
Example #4
0
  def urlopen(self, relative_url, parse_response=True, **kwargs):
    """Wraps urllib2.urlopen() and passes through the access token.

    Returns: decoded JSON dict if parse_response is True, otherwise urlopen
      response object
    """
    url = API_BASE + relative_url
    log_url = url
    if self.access_token:
      log_url = util.add_query_params(url, [('access_token',
                                             self.access_token[:4] + '...')])
      url = util.add_query_params(url, [('access_token', self.access_token)])
    logging.info('Fetching %s, kwargs %s', log_url, kwargs)
    resp = urllib2.urlopen(urllib2.Request(url, **kwargs),
                           timeout=appengine_config.HTTP_TIMEOUT)
    return json.loads(resp.read()) if parse_response else resp
Example #5
0
  def fetch_replies(self, activities, min_id=None):
    """Fetches and injects Twitter replies into a list of activities, in place.

    Includes indirect replies ie reply chains, not just direct replies. Searches
    for @-mentions, matches them to the original tweets with
    in_reply_to_status_id_str, and recurses until it's walked the entire tree.

    Args:
      activities: list of activity dicts

    Returns:
      same activities list
    """

    # cache searches for @-mentions for individual users. maps username to dict
    # mapping tweet id to ActivityStreams reply object dict.
    mentions = {}

    # find replies
    for activity in activities:
      # list of ActivityStreams reply object dict and set of seen activity ids
      # (tag URIs). seed with the original tweet; we'll filter it out later.
      replies = [activity]
      _, id = util.parse_tag_uri(activity['id'])
      seen_ids = set([id])

      for reply in replies:
        # get mentions of this tweet's author so we can search them for replies to
        # this tweet. can't use statuses/mentions_timeline because i'd need to
        # auth as the user being mentioned.
        # https://dev.twitter.com/docs/api/1.1/get/statuses/mentions_timeline
        #
        # note that these HTTP requests are synchronous. you can make async
        # requests by using urlfetch.fetch() directly, but not with urllib2.
        # https://developers.google.com/appengine/docs/python/urlfetch/asynchronousrequests
        author = reply['actor']['username']
        if author not in mentions:
          url = API_SEARCH % {
            'q': urllib.quote_plus('@' + author.encode('utf-8')),
            'count': 100,
          }
          if min_id is not None:
            url = util.add_query_params(url, {'since_id': min_id})
          mentions[author] = self.urlopen(url)['statuses']

        # look for replies. add any we find to the end of replies. this makes us
        # recursively follow reply chains to their end. (python supports
        # appending to a sequence while you're iterating over it.)
        for mention in mentions[author]:
          id = mention['id_str']
          if (mention.get('in_reply_to_status_id_str') in seen_ids and
              id not in seen_ids):
            replies.append(self.tweet_to_activity(mention))
            seen_ids.add(id)

      items = [r['object'] for r in replies[1:]]  # filter out seed activity
      activity['object']['replies'] = {
        'items': items,
        'totalItems': len(items),
        }
Example #6
0
  def fetch_replies(self, activities, min_id=None):
    """Fetches and injects Twitter replies into a list of activities, in place.

    Includes indirect replies ie reply chains, not just direct replies. Searches
    for @-mentions, matches them to the original tweets with
    in_reply_to_status_id_str, and recurses until it's walked the entire tree.

    Args:
      activities: list of activity dicts

    Returns:
      same activities list
    """

    # cache searches for @-mentions for individual users. maps username to dict
    # mapping tweet id to ActivityStreams reply object dict.
    mentions = {}

    # find replies
    for activity in activities:
      # list of ActivityStreams reply object dict and set of seen activity ids
      # (tag URIs). seed with the original tweet; we'll filter it out later.
      replies = [activity]
      _, id = util.parse_tag_uri(activity['id'])
      seen_ids = set([id])

      for reply in replies:
        # get mentions of this tweet's author so we can search them for replies to
        # this tweet. can't use statuses/mentions_timeline because i'd need to
        # auth as the user being mentioned.
        # https://dev.twitter.com/docs/api/1.1/get/statuses/mentions_timeline
        #
        # note that these HTTP requests are synchronous. you can make async
        # requests by using urlfetch.fetch() directly, but not with urllib2.
        # https://developers.google.com/appengine/docs/python/urlfetch/asynchronousrequests
        author = reply['actor']['username']
        if author not in mentions:
          url = API_SEARCH_URL % {
            'q': urllib.quote_plus('@' + author),
            'count': 100,
          }
          if min_id is not None:
            url = util.add_query_params(url, {'since_id': min_id})
          mentions[author] = self.urlopen(url)['statuses']

        # look for replies. add any we find to the end of replies. this makes us
        # recursively follow reply chains to their end. (python supports
        # appending to a sequence while you're iterating over it.)
        for mention in mentions[author]:
          id = mention['id_str']
          if (mention.get('in_reply_to_status_id_str') in seen_ids and
              id not in seen_ids):
            replies.append(self.tweet_to_activity(mention))
            seen_ids.add(id)

      items = [r['object'] for r in replies[1:]]  # filter out seed activity
      activity['object']['replies'] = {
        'items': items,
        'totalItems': len(items),
        }
Example #7
0
  def urlopen(self, relative_url, parse_response=True, **kwargs):
    """Wraps urllib2.urlopen() and passes through the access token.

    Returns: decoded JSON dict if parse_response is True, otherwise urlopen
      response object
    """
    url = API_BASE + relative_url
    log_url = url
    if self.access_token:
      log_url = util.add_query_params(url, [('access_token',
                                             self.access_token[:4] + '...')])
      url = util.add_query_params(url, [('access_token', self.access_token)])
    logging.info('Fetching %s, kwargs %s', log_url, kwargs)
    resp = urllib2.urlopen(urllib2.Request(url, **kwargs),
                           timeout=appengine_config.HTTP_TIMEOUT)
    return json.loads(resp.read()) if parse_response else resp
Example #8
0
 def urlopen(self, url, **kwargs):
   """Wraps urllib2.urlopen() and passes through the access token."""
   log_url = url
   if self.access_token:
     # TODO add access_token to the data parameter for POST requests
     url = util.add_query_params(url, [('access_token', self.access_token)])
   resp = util.urlopen(urllib2.Request(url, **kwargs))
   return resp if kwargs.get('data') else json.loads(resp.read()).get('data')
Example #9
0
 def urlopen(self, url, **kwargs):
   """Wraps :func:`urllib2.urlopen()` and passes through the access token."""
   log_url = url
   if self.access_token:
     # TODO add access_token to the data parameter for POST requests
     url = util.add_query_params(url, [('access_token', self.access_token)])
   resp = util.urlopen(urllib2.Request(url, **kwargs))
   return (resp if kwargs.get('data')
           else source.load_json(resp.read(), url).get('data'))
Example #10
0
 def urlopen(self, url, **kwargs):
     """Wraps :func:`urllib2.urlopen()` and passes through the access token."""
     if self.access_token:
         # TODO add access_token to the data parameter for POST requests
         url = util.add_query_params(url,
                                     [('access_token', self.access_token)])
     resp = util.urlopen(urllib.request.Request(url, **kwargs))
     return (resp if kwargs.get('data') else source.load_json(
         resp.read(), url).get('data'))
Example #11
0
  def fetch_mentions(self, username, min_id=None):
    """Fetches a user's @-mentions and returns them as ActivityStreams.

    Tries to only include explicit mentions, not mentions automatically created
    by @-replying. See the get_activities() docstring for details.

    Args:
      username: string
      min_id: only return activities with ids greater than this

    Returns:
      list of activity dicts
    """
    # get mentions
    url = API_SEARCH_URL % {
      'q': urllib.quote_plus('@' + username),
      'count': 100,
    }
    if min_id is not None:
      url = util.add_query_params(url, {'since_id': min_id})
    candidates = self.urlopen(url)['statuses']

    # fetch in-reply-to tweets (if any)
    in_reply_to_ids = util.trim_nulls(
      [c.get('in_reply_to_status_id_str') for c in candidates])
    origs = {o.get('id_str'): o for o in
             self.urlopen(API_LOOKUP_URL % ','.join(in_reply_to_ids))}

    # filter out tweets that we don't consider mentions
    mentions = []
    for c in candidates:
      if (c.get('user', {}).get('screen_name') == username or
          c.get('retweeted_status')):
        continue

      reply_to = origs.get(c.get('in_reply_to_status_id_str'))
      if not reply_to:
        mentions.append(c)
      else:
        reply_to_user = reply_to.get('user', {}).get('screen_name')
        mentioned = [u.get('screen_name') for u in
                     reply_to.get('entities', {}).get('user_mentions', [])]
        if username != reply_to_user and username not in mentioned:
          mentions.append(c)

    return mentions
Example #12
0
class Twitter(source.Source):
    """Implements the ActivityStreams API for Twitter.
  """

    DOMAIN = 'twitter.com'
    NAME = 'Twitter'
    FRONT_PAGE_TEMPLATE = 'templates/twitter_index.html'

    # HTML snippet for embedding a tweet.
    # https://dev.twitter.com/docs/embedded-tweets
    EMBED_POST = """
  <script async defer src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
  <br />
  <blockquote class="twitter-tweet" lang="en" data-dnt="true">
  <p><a href="%(url)s">%(content)s</a></p>
  </blockquote>
  """

    def __init__(self, access_token_key, access_token_secret):
        """Constructor.

    Twitter now requires authentication in v1.1 of their API. You can get an
    OAuth access token by creating an app here: https://dev.twitter.com/apps/new

    Args:
      access_token_key: string, OAuth access token key
      access_token_secret: string, OAuth access token secret
    """
        self.access_token_key = access_token_key
        self.access_token_secret = access_token_secret

    def get_actor(self, screen_name=None):
        """Returns a user as a JSON ActivityStreams actor dict.

    Args:
      screen_name: string username. Defaults to the current user.
    """
        if screen_name is None:
            url = API_CURRENT_USER_URL
        else:
            url = API_USER_URL % screen_name
        return self.user_to_actor(self.urlopen(url))

    def get_activities_response(self,
                                user_id=None,
                                group_id=None,
                                app_id=None,
                                activity_id=None,
                                start_index=0,
                                count=0,
                                etag=None,
                                min_id=None,
                                cache=None,
                                fetch_replies=False,
                                fetch_likes=False,
                                fetch_shares=False,
                                fetch_events=False,
                                search_query=None):
        """Fetches posts and converts them to ActivityStreams activities.

    XXX HACK: this is currently hacked for bridgy to NOT pass min_id to the
    request for fetching activity tweets themselves, but to pass it to all of
    the requests for filling in replies, retweets, etc. That's because we want
    to find new replies and retweets of older initial tweets.
    TODO: find a better way.

    See method docstring in source.py for details. app_id is ignored.
    min_id is translated to Twitter's since_id.

    The code for handling ETags (and 304 Not Changed responses and setting
    If-None-Match) is here, but unused right now since Twitter evidently doesn't
    support ETags. From https://dev.twitter.com/discussions/5800 :
    "I've confirmed with our team that we're not explicitly supporting this
    family of features."

    Likes (ie favorites) are scraped from twitter.com HTML, since Twitter's REST
    API doesn't offer a way to fetch them. You can also get them from the
    Streaming API, though, and convert them with streaming_event_to_object().
    https://dev.twitter.com/docs/streaming-apis/messages#Events_event

    Shares (ie retweets) are fetched with a separate API call per tweet:
    https://dev.twitter.com/docs/api/1.1/get/statuses/retweets/%3Aid

    However, retweets are only fetched for the first 15 tweets that have them,
    since that's Twitter's rate limit per 15 minute window. :(
    https://dev.twitter.com/docs/rate-limiting/1.1/limits

    Use the group_id @self to retrieve a user_id’s timeline. If user_id is None
    or @me, it will return tweets for the current API user.

    group_id can be used to specify the slug of a list for which to return tweets.
    By default the current API user’s lists will be used, but lists owned by other
    users can be fetched by explicitly passing a username to user_id, e.g. to
    fetch tweets from the list @exampleuser/example-list you would call
    get_activities(user_id='exampleuser', group_id='example-list').
    """
        activities = []
        if activity_id:
            tweets = [self.urlopen(API_STATUS_URL % activity_id)]
            total_count = len(tweets)
        else:
            if group_id == source.SELF:
                if user_id in (None, source.ME):
                    url = API_SELF_TIMELINE_URL % (count + start_index)
                else:
                    url = API_USER_TIMELINE_URL % {
                        'count': count + start_index,
                        'screen_name': user_id,
                    }

                if fetch_likes:
                    liked = self.urlopen(API_FAVORITES_URL % (user_id or ''))
                    if liked:
                        user = self.urlopen(
                            API_USER_URL %
                            user_id if user_id else API_CURRENT_USER_URL)
                        activities += [
                            self._make_like(tweet, user) for tweet in liked
                        ]
            elif group_id == source.SEARCH:
                url = API_SEARCH_URL % {
                    'q': urllib.quote_plus(search_query),
                    'count': count + start_index,
                }
            elif group_id in (None, source.FRIENDS, source.ALL):
                url = API_TIMELINE_URL % (count + start_index)
            else:
                url = API_LIST_TIMELINE_URL % {
                    'count':
                    count + start_index,
                    'slug':
                    group_id,
                    'owner_screen_name':
                    user_id or self.get_actor().get('username')
                }

            headers = {'If-None-Match': etag} if etag else {}
            total_count = None
            try:
                resp = self.urlopen(url, headers=headers, parse_response=False)
                etag = resp.info().get('ETag')
                tweet_obj = json.loads(resp.read())
                if group_id == source.SEARCH:
                    tweet_obj = tweet_obj.get('statuses', [])
                tweets = tweet_obj[start_index:]
            except urllib2.HTTPError, e:
                if e.code == 304:  # Not Modified, from a matching ETag
                    tweets = []
                else:
                    raise

        # batch get memcached counts of favorites and retweets for all tweets
        cached = {}
        if cache is not None:
            keys = itertools.product(('ATR', 'ATF'),
                                     [t['id_str'] for t in tweets])
            cached = cache.get_multi('%s %s' % (prefix, id)
                                     for prefix, id in keys)
        # only update the cache at the end, in case we hit an error before then
        cache_updates = {}

        if fetch_shares:
            retweet_calls = 0
            for tweet in tweets:
                if tweet.get('retweeted'):  # this tweet is itself a retweet
                    continue
                elif retweet_calls >= RETWEET_LIMIT:
                    logging.warning(
                        "Hit Twitter's retweet rate limit (%d) with more to "
                        "fetch! Results will be incomplete!" % RETWEET_LIMIT)
                    break

                # store retweets in the 'retweets' field, which is handled by
                # tweet_to_activity().
                # TODO: make these HTTP requests asynchronous. not easy since we don't
                # (yet) require threading support or use a non-blocking HTTP library.
                #
                # twitter limits this API endpoint to one call per minute per user,
                # which is easy to hit, so we stop before we hit that.
                # https://dev.twitter.com/docs/rate-limiting/1.1/limits
                #
                # can't use the statuses/retweets_of_me endpoint because it only
                # returns the original tweets, not the retweets or their authors.
                id = tweet['id_str']
                count = tweet.get('retweet_count')
                if count and count != cached.get('ATR ' + id):
                    url = API_RETWEETS_URL % id
                    if min_id is not None:
                        url = util.add_query_params(url, {'since_id': min_id})
                    tweet['retweets'] = self.urlopen(url)
                    retweet_calls += 1
                    cache_updates['ATR ' + id] = count

        tweet_activities = [self.tweet_to_activity(t) for t in tweets]

        if fetch_replies:
            self.fetch_replies(tweet_activities, min_id=min_id)

        if fetch_likes:
            for tweet, activity in zip(tweets, tweet_activities):
                id = tweet['id_str']
                count = tweet.get('favorite_count')
                if count and count != cached.get('ATF ' + id):
                    url = HTML_FAVORITES_URL % id
                    logging.debug('Fetching %s', url)
                    try:
                        html = json.loads(
                            urllib2.urlopen(url,
                                            timeout=HTTP_TIMEOUT).read()).get(
                                                'htmlUsers', '')
                    except urllib2.URLError, e:
                        util.interpret_http_exception(e)  # just log it
                        continue
                    likes = self.favorites_html_to_likes(tweet, html)
                    activity['object'].setdefault('tags', []).extend(likes)
                    cache_updates['ATF ' + id] = count
Example #13
0
  def maybe_add_or_delete_source(self, source_cls, auth_entity, state, **kwargs):
    """Adds or deletes a source if auth_entity is not None.

    Used in each source's oauth-dropins :meth:`CallbackHandler.finish()` and
    :meth:`CallbackHandler.get()` methods, respectively.

    Args:
      source_cls: source class, e.g. :class:`instagram.Instagram`
      auth_entity: ouath-dropins auth entity
      state: string, OAuth callback state parameter. a JSON serialized dict
        with operation, feature, and an optional callback URL. For deletes,
        it will also include the source key
      kwargs: passed through to the source_cls constructor

    Returns:
      source entity if it was created or updated, otherwise None
    """
    state_obj = util.decode_oauth_state(state)
    operation = state_obj.get('operation', 'add')
    feature = state_obj.get('feature')
    callback = state_obj.get('callback')
    user_url = state_obj.get('user_url')

    logging.debug(
      'maybe_add_or_delete_source with operation=%s, feature=%s, callback=%s',
      operation, feature, callback)

    if operation == 'add':  # this is an add/update
      if not auth_entity:
        if not self.messages:
          self.messages.add("OK, you're not signed up. Hope you reconsider!")
        if callback:
          callback = util.add_query_params(callback, {'result': 'declined'})
          logging.debug(
            'user declined adding source, redirect to external callback %s',
            callback)
          # call super.redirect so the callback url is unmodified
          super(Handler, self).redirect(callback.encode('utf-8'))
        else:
          self.redirect('/')
        return

      CachedPage.invalidate('/users')
      logging.info('%s.create_new with %s', source_cls.__class__.__name__,
                   (auth_entity.key, state, kwargs))
      source = source_cls.create_new(self, auth_entity=auth_entity,
                                     features=feature.split(',') if feature else [],
                                     user_url=user_url, **kwargs)

      if source:
        # add to login cookie
        logins = self.get_logins()
        logins.append(Login(path=source.bridgy_path(), site=source.SHORT_NAME,
                            name=source.label_name()))
        self.set_logins(logins)

      if callback:
        callback = util.add_query_params(callback, {
          'result': 'success',
          'user': source.bridgy_url(self),
          'key': source.key.urlsafe(),
        } if source else {'result': 'failure'})
        logging.debug(
          'finished adding source, redirect to external callback %s', callback)
        # call super.redirect so the callback url is unmodified
        super(Handler, self).redirect(callback.encode('utf-8'))

      elif source and not source.domains:
        self.redirect('/edit-websites?' + urllib.urlencode({
          'source_key': source.key.urlsafe(),
        }))

      else:
        self.redirect(source.bridgy_url(self) if source else '/')

      return source

    else:  # this is a delete
      if auth_entity:
        self.redirect('/delete/finish?auth_entity=%s&state=%s' %
                      (auth_entity.key.urlsafe(), state))
      else:
        self.messages.add('If you want to disable, please approve the %s prompt.' %
                          source_cls.GR_CLASS.NAME)
        source_key = state_obj.get('source')
        if source_key:
          source = ndb.Key(urlsafe=source_key).get()
          if source:
            return self.redirect(source.bridgy_url(self))

        self.redirect('/')
Example #14
0
          resp = self.urlopen(id)
          if resp.get('error'):
            logging.warning("Couldn't fetch object %s: %s", id, resp)
          else:
            posts = [resp]
            break
        except urllib2.URLError, e:
          logging.warning("Couldn't fetch object %s: %s", id, e)
      else:
        posts = []

    else:
      url = API_SELF_POSTS if group_id == source.SELF else API_HOME
      url = url % (user_id if user_id else 'me', start_index)
      if count:
        url = util.add_query_params(url, {'limit': count})
      headers = {'If-None-Match': etag} if etag else {}
      try:
        resp = self.urlopen(url, headers=headers, parse_response=False)
        etag = resp.info().get('ETag')
        posts = json.loads(resp.read()).get('data', [])
      except urllib2.HTTPError, e:
        if e.code == 304:  # Not Modified, from a matching ETag
          posts = []
        else:
          raise

    activities = [self.post_to_activity(p) for p in posts]

    if fetch_shares:
      id_to_activity = {}
Example #15
0
  def maybe_add_or_delete_source(self, source_cls, auth_entity, state, **kwargs):
    """Adds or deletes a source if auth_entity is not None.

    Used in each source's oauth-dropins :meth:`CallbackHandler.finish()` and
    :meth:`CallbackHandler.get()` methods, respectively.

    Args:
      source_cls: source class, e.g. :class:`instagram.Instagram`
      auth_entity: ouath-dropins auth entity
      state: string, OAuth callback state parameter. a JSON serialized dict
        with operation, feature, and an optional callback URL. For deletes,
        it will also include the source key
      kwargs: passed through to the source_cls constructor

    Returns:
      source entity if it was created or updated, otherwise None
    """
    state_obj = util.decode_oauth_state(state)
    operation = state_obj.get('operation', 'add')
    feature = state_obj.get('feature')
    callback = state_obj.get('callback')
    user_url = state_obj.get('user_url')

    logging.debug(
      'maybe_add_or_delete_source with operation=%s, feature=%s, callback=%s',
      operation, feature, callback)

    if operation == 'add':  # this is an add/update
      if not auth_entity:
        if not self.messages:
          self.messages.add("OK, you're not signed up. Hope you reconsider!")
        if callback:
          callback = util.add_query_params(callback, {'result': 'declined'})
          logging.debug(
            'user declined adding source, redirect to external callback %s',
            callback)
          # call super.redirect so the callback url is unmodified
          super(Handler, self).redirect(callback)
        else:
          self.redirect('/')
        return

      CachedPage.invalidate('/users')
      logging.info('%s.create_new with %s', source_cls.__class__.__name__,
                   (auth_entity.key, state, kwargs))
      source = source_cls.create_new(self, auth_entity=auth_entity,
                                     features=feature.split(',') if feature else [],
                                     user_url=user_url, **kwargs)

      if source:
        # add to login cookie
        logins = self.get_logins()
        logins.append(Login(path=source.bridgy_path(), site=source.SHORT_NAME,
                            name=source.label_name()))
        self.set_logins(logins)

      if callback:
        callback = util.add_query_params(callback, {
          'result': 'success',
          'user': source.bridgy_url(self),
          'key': source.key.urlsafe().decode(),
        } if source else {'result': 'failure'})
        logging.debug(
          'finished adding source, redirect to external callback %s', callback)
        # call super.redirect so the callback url is unmodified
        super(Handler, self).redirect(callback)

      elif source and not source.domains:
        self.redirect('/edit-websites?' + urllib.parse.urlencode({
          'source_key': source.key.urlsafe().decode(),
        }))

      else:
        self.redirect(source.bridgy_url(self) if source else '/')

      return source

    else:  # this is a delete
      if auth_entity:
        self.redirect('/delete/finish?auth_entity=%s&state=%s' %
                      (auth_entity.key.urlsafe().decode(), state))
      else:
        self.messages.add('If you want to disable, please approve the %s prompt.' %
                          source_cls.GR_CLASS.NAME)
        source_key = state_obj.get('source')
        if source_key:
          source = ndb.Key(urlsafe=source_key).get()
          if source:
            return self.redirect(source.bridgy_url(self))

        self.redirect('/')
Example #16
0
  def get_activities_response(self, user_id=None, group_id=None, app_id=None,
                              activity_id=None, start_index=0, count=0,
                              etag=None, min_id=None, cache=None,
                              fetch_replies=False, fetch_likes=False,
                              fetch_shares=False, fetch_events=False,
                              fetch_mentions=False, search_query=None,
                              scrape=False, cookie=None, **kwargs):
    """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Mentions are never fetched or included because the API doesn't support
    searching for them.
    https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875

    Shares are never fetched included since there is no share feature.

    Instagram only supports search over hashtags, so if search_query is set, it
    must begin with #.

    May populate a custom 'ig_like_count' property in media objects. (Currently
    only when scraping.)

    Args:
      scrape: if True, scrapes HTML from instagram.com instead of using the API.
        Populates the user's actor object in the 'actor' response field.
        Useful for apps that haven't yet been approved in the new permissions
        approval process. Currently only supports group_id=SELF. Also supports
        passing a shortcode as activity_id as well as the internal API id.
        http://developers.instagram.com/post/133424514006/instagram-platform-update
      cookie: string, only used if scrape=True
      **: see :meth:`Source.get_activities_reponse`

    Raises:
      InstagramAPIError
    """
    if scrape or self.scrape:
      if not (activity_id or
              (group_id == source.SELF and user_id) or
              (group_id == source.FRIENDS and cookie)):
        raise NotImplementedError(
          'Scraping only supports activity_id, user_id and group_id=@self, or cookie and group_id=@friends.')
      return self._scrape(user_id=user_id, activity_id=activity_id, cookie=cookie,
                          fetch_extras=fetch_replies or fetch_likes, cache=cache)

    if user_id is None:
      user_id = 'self'
    if group_id is None:
      group_id = source.FRIENDS

    if search_query:
      if search_query.startswith('#'):
        search_query = search_query[1:]
      else:
        raise NotImplementedError(
          'Instagram only supports search over hashtags, so search_query must '
          'begin with the # character.')

    # TODO: paging
    media = []
    kwargs = {}
    if min_id is not None:
      kwargs['min_id'] = min_id

    activities = []
    try:
      media_url = (API_MEDIA_URL % activity_id if activity_id else
                   API_USER_MEDIA_URL % user_id if group_id == source.SELF else
                   API_MEDIA_POPULAR_URL if group_id == source.ALL else
                   API_MEDIA_SEARCH_URL % search_query if group_id == source.SEARCH else
                   API_USER_FEED_URL if group_id == source.FRIENDS else None)
      assert media_url
      media = self.urlopen(util.add_query_params(media_url, kwargs))
      if media:
        if activity_id:
          media = [media]
        activities += [self.media_to_activity(m) for m in util.trim_nulls(media)]

      if group_id == source.SELF and fetch_likes:
        # add the user's own likes
        liked = self.urlopen(
          util.add_query_params(API_USER_LIKES_URL % user_id, kwargs))
        if liked:
          user = self.urlopen(API_USER_URL % user_id)
          activities += [self.like_to_object(user, l['id'], l['link'])
                         for l in liked]

    except urllib2.HTTPError, e:
      code, body = util.interpret_http_exception(e)
      # instagram api should give us back a json block describing the
      # error. but if it's an error for some other reason, it probably won't
      # be properly formatted json.
      try:
        body_obj = json.loads(body) if body else {}
      except ValueError:
        body_obj = {}

      if body_obj.get('meta', {}).get('error_type') == 'APINotFoundError':
        logging.exception(body_obj.get('meta', {}).get('error_message'))
      else:
        raise e
        try:
          posts = [json.loads(self.urlopen(API_OBJECT_URL % id).read())]
          break
        except urllib2.URLError, e:
          logging.warning("Couldn't fetch object %s: %s", id, e)
      else:
        posts = []

      if posts == [False]:  # FB returns false for "not found"
        posts = []

    else:
      url = API_SELF_POSTS_URL if group_id == source.SELF else API_HOME_URL
      url = url % (user_id if user_id else 'me', start_index)
      if count:
        url = util.add_query_params(url, {'limit': count})
      headers = {'If-None-Match': etag} if etag else {}
      try:
        resp = self.urlopen(url, headers=headers)
        etag = resp.info().get('ETag')
        posts = json.loads(resp.read()).get('data', [])
      except urllib2.HTTPError, e:
        if e.code == 304:  # Not Modified, from a matching ETag
          posts = []
        else:
          raise

    activities = [self.post_to_activity(p) for p in posts]
    response = self._make_activities_base_response(activities)
    response['etag'] = etag
    return response
Example #18
0
  def fetch_mentions(self, username, tweets, min_id=None):
    """Fetches a user's @-mentions and returns them as ActivityStreams.

    Tries to only include explicit mentions, not mentions automatically created
    by @-replying. See the get_activities() docstring for details.

    Args:
      username: string
      tweets: list of Twitter API objects. used to find quote tweets quoting them.
      min_id: only return activities with ids greater than this

    Returns:
      list of activity dicts
    """
    # get @-name mentions
    url = API_SEARCH % {
      'q': urllib.quote_plus('@' + username.encode('utf-8')),
      'count': 100,
    }
    if min_id is not None:
      url = util.add_query_params(url, {'since_id': min_id})
    candidates = self.urlopen(url)['statuses']

    # fetch in-reply-to tweets (if any)
    in_reply_to_ids = util.trim_nulls(
      [c.get('in_reply_to_status_id_str') for c in candidates])
    origs = {
      o.get('id_str'): o for o in
      self.urlopen(API_LOOKUP % ','.join(in_reply_to_ids))
    } if in_reply_to_ids else {}

    # filter out tweets that we don't consider mentions
    mentions = []
    for c in candidates:
      if (c.get('user', {}).get('screen_name') == username or
          c.get('retweeted_status')):
        continue
      reply_to = origs.get(c.get('in_reply_to_status_id_str'))
      if not reply_to:
        mentions.append(c)
      else:
        reply_to_user = reply_to.get('user', {}).get('screen_name')
        mentioned = [u.get('screen_name') for u in
                     reply_to.get('entities', {}).get('user_mentions', [])]
        if username != reply_to_user and username not in mentioned:
          mentions.append(c)

    # search for quote tweets
    # Guideline ("Limit your searches to 10 keywords and operators.")
    # implies fewer, but 20 IDs seems to work in practice.
    # https://dev.twitter.com/rest/public/search
    for batch in [
        tweets[i:i + QUOTE_SEARCH_BATCH_SIZE]
        for i in xrange(0, len(tweets), QUOTE_SEARCH_BATCH_SIZE)
    ]:
      batch_ids = [t['id_str'] for t in batch]
      url = API_SEARCH % {
        'q': urllib.quote_plus(' OR '.join(batch_ids)),
        'count': 100,
      }
      if min_id is not None:
        url = util.add_query_params(url, {'since_id': min_id})
      candidates = self.urlopen(url)['statuses']
      for c in candidates:
        quoted_status_id = c.get('quoted_status_id_str')
        if (quoted_status_id and quoted_status_id in batch_ids and
            not c.get('retweeted_status')):
          mentions.append(c)

    return mentions
Example #19
0
  def get_activities_response(self, user_id=None, group_id=None, app_id=None,
                              activity_id=None, start_index=0, count=0,
                              etag=None, min_id=None, cache=None,
                              fetch_replies=False, fetch_likes=False,
                              fetch_shares=False, fetch_events=False,
                              fetch_mentions=False, search_query=None):
    """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Mentions are never fetched or included because the API doesn't support
    searching for them.
    https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875

    Shares are never fetched included since there is no share feature.

    Instagram only supports search over hashtags, so if search_query is set, it
    must begin with #.

    Raises: InstagramAPIError
    """
    if user_id is None:
      user_id = 'self'
    if group_id is None:
      group_id = source.FRIENDS

    if search_query:
      if search_query.startswith('#'):
        search_query = search_query[1:]
      else:
        raise NotImplementedError(
          'Instagram only supports search over hashtags, so search_query must '
          'begin with the # character.')

    # TODO: paging
    media = []
    kwargs = {}
    if min_id is not None:
      kwargs['min_id'] = min_id

    activities = []
    try:
      media_url = (API_MEDIA_URL % activity_id if activity_id else
                   API_USER_MEDIA_URL % user_id if group_id == source.SELF else
                   API_MEDIA_POPULAR_URL if group_id == source.ALL else
                   API_MEDIA_SEARCH_URL % search_query if group_id == source.SEARCH else
                   API_USER_FEED_URL if group_id == source.FRIENDS else None)
      assert media_url
      media = self.urlopen(util.add_query_params(media_url, kwargs))
      if media:
        if activity_id:
          media = [media]
        activities += [self.media_to_activity(m) for m in util.trim_nulls(media)]

      if group_id == source.SELF and fetch_likes:
        # add the user's own likes
        liked = self.urlopen(
          util.add_query_params(API_USER_LIKES_URL % user_id, kwargs))
        if liked:
          user = self.urlopen(API_USER_URL % user_id)
          activities += [self.like_to_object(user, l['id'], l['link'])
                         for l in liked]

    except urllib2.HTTPError, e:
      code, body = util.interpret_http_exception(e)
      # instagram api should give us back a json block describing the
      # error. but if it's an error for some other reason, it probably won't
      # be properly formatted json.
      try:
        body_obj = json.loads(body) if body else {}
      except ValueError:
        body_obj = {}

      if body_obj.get('meta', {}).get('error_type') == 'APINotFoundError':
        logging.exception(body_obj.get('meta', {}).get('error_message'))
      else:
        raise e
Example #20
0
  def fetch_mentions(self, username, tweets, min_id=None):
    """Fetches a user's @-mentions and returns them as ActivityStreams.

    Tries to only include explicit mentions, not mentions automatically created
    by @-replying. See the get_activities() docstring for details.

    Args:
      username: string
      tweets: list of Twitter API objects. used to find quote tweets quoting them.
      min_id: only return activities with ids greater than this

    Returns:
      list of activity dicts
    """
    # get @-name mentions
    url = API_SEARCH % {
      'q': urllib.quote_plus('@' + username),
      'count': 100,
    }
    if min_id is not None:
      url = util.add_query_params(url, {'since_id': min_id})
    candidates = self.urlopen(url)['statuses']

    # fetch in-reply-to tweets (if any)
    in_reply_to_ids = util.trim_nulls(
      [c.get('in_reply_to_status_id_str') for c in candidates])
    origs = {
      o.get('id_str'): o for o in
      self.urlopen(API_LOOKUP % ','.join(in_reply_to_ids))
    } if in_reply_to_ids else {}

    # filter out tweets that we don't consider mentions
    mentions = []
    for c in candidates:
      if (c.get('user', {}).get('screen_name') == username or
          c.get('retweeted_status')):
        continue
      reply_to = origs.get(c.get('in_reply_to_status_id_str'))
      if not reply_to:
        mentions.append(c)
      else:
        reply_to_user = reply_to.get('user', {}).get('screen_name')
        mentioned = [u.get('screen_name') for u in
                     reply_to.get('entities', {}).get('user_mentions', [])]
        if username != reply_to_user and username not in mentioned:
          mentions.append(c)

    # search for quote tweets
    # Guideline ("Limit your searches to 10 keywords and operators.")
    # implies fewer, but 20 IDs seems to work in practice.
    # https://dev.twitter.com/rest/public/search
    for batch in [
        tweets[i:i + QUOTE_SEARCH_BATCH_SIZE]
        for i in xrange(0, len(tweets), QUOTE_SEARCH_BATCH_SIZE)
    ]:
      batch_ids = [t['id_str'] for t in batch]
      url = API_SEARCH % {
        'q': urllib.quote_plus(' OR '.join(batch_ids)),
        'count': 100,
      }
      if min_id is not None:
        url = util.add_query_params(url, {'since_id': min_id})
      candidates = self.urlopen(url)['statuses']
      for c in candidates:
        quoted_status_id = c.get('quoted_status_id_str')
        if quoted_status_id and quoted_status_id in batch_ids:
          mentions.append(c)

    return mentions
Example #21
0
    def get_activities_response(self,
                                user_id=None,
                                group_id=None,
                                app_id=None,
                                activity_id=None,
                                start_index=0,
                                count=0,
                                etag=None,
                                min_id=None,
                                cache=None,
                                fetch_replies=False,
                                fetch_likes=False,
                                fetch_shares=False,
                                fetch_events=False,
                                fetch_mentions=False,
                                search_query=None,
                                scrape=False,
                                cookie=None,
                                ignore_rate_limit=False,
                                **kwargs):
        """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Mentions are never fetched or included because the API doesn't support
    searching for them.
    https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875

    Shares are never fetched included since there is no share feature.

    Instagram only supports search over hashtags, so if search_query is set, it
    must begin with #.

    May populate a custom 'ig_like_count' property in media objects. (Currently
    only when scraping.)

    Args:
      scrape: if True, scrapes HTML from instagram.com instead of using the API.
        Populates the user's actor object in the 'actor' response field.
        Useful for apps that haven't yet been approved in the new permissions
        approval process. Currently only supports group_id=SELF. Also supports
        passing a shortcode as activity_id as well as the internal API id.
        http://developers.instagram.com/post/133424514006/instagram-platform-update
      cookie: string, only used if scrape=True
      ignore_rate_limit: boolean, for scraping, always make an HTTP request,
        even if we've been rate limited recently
      **: see :meth:`Source.get_activities_response`

    Raises:
      InstagramAPIError
    """
        if group_id is None:
            group_id = source.FRIENDS

        if scrape or self.scrape:
            if not (activity_id or (group_id == source.SELF and user_id) or
                    (group_id == source.FRIENDS and cookie)):
                raise NotImplementedError(
                    'Scraping only supports activity_id, user_id and group_id=@self, or cookie and group_id=@friends.'
                )
            elif fetch_likes and not cookie and not self.cookie:
                raise NotImplementedError('Scraping likes requires a cookie.')

            # cache rate limited responses and short circuit
            global _last_rate_limited, _last_rate_limited_exc
            now = datetime.datetime.now()
            if not ignore_rate_limit and _last_rate_limited:
                retry = _last_rate_limited + RATE_LIMIT_BACKOFF
                if now < retry:
                    logging.info(
                        'Remembered rate limit at %s, waiting until %s to try again.',
                        _last_rate_limited, retry)
                    assert _last_rate_limited_exc
                    raise _last_rate_limited_exc

            try:
                return self._scrape(user_id=user_id,
                                    group_id=group_id,
                                    activity_id=activity_id,
                                    count=count,
                                    cookie=cookie,
                                    fetch_extras=fetch_replies or fetch_likes,
                                    cache=cache)
            except Exception as e:
                code, body = util.interpret_http_exception(e)
                if not ignore_rate_limit and code in ('429', '503'):
                    logging.info('Got rate limited! Remembering for %s',
                                 str(RATE_LIMIT_BACKOFF))
                    _last_rate_limited = now
                    _last_rate_limited_exc = e
                raise

        if user_id is None:
            user_id = 'self'

        if search_query:
            if search_query.startswith('#'):
                search_query = search_query[1:]
            else:
                raise ValueError(
                    'Instagram only supports search over hashtags, so search_query must '
                    'begin with the # character.')

        # TODO: paging
        media = []
        kwargs = {}
        if min_id is not None:
            kwargs['min_id'] = min_id

        activities = []
        try:
            media_url = (
                API_MEDIA_URL %
                activity_id if activity_id else API_USER_MEDIA_URL %
                user_id if group_id == source.SELF else API_MEDIA_POPULAR_URL
                if group_id == source.ALL else API_MEDIA_SEARCH_URL %
                search_query if group_id == source.SEARCH else
                API_USER_FEED_URL if group_id == source.FRIENDS else None)
            assert media_url
            media = self.urlopen(util.add_query_params(media_url, kwargs))
            if media:
                if activity_id:
                    media = [media]
                activities += [
                    self.media_to_activity(m) for m in util.trim_nulls(media)
                ]

            if group_id == source.SELF and fetch_likes:
                # add the user's own likes
                liked = self.urlopen(
                    util.add_query_params(API_USER_LIKES_URL % user_id,
                                          kwargs))
                if liked:
                    user = self.urlopen(API_USER_URL % user_id)
                    activities += [
                        self.like_to_object(user, l['id'], l['link'])
                        for l in liked
                    ]

        except urllib_error.HTTPError as e:
            code, body = util.interpret_http_exception(e)
            # instagram api should give us back a json block describing the
            # error. but if it's an error for some other reason, it probably won't
            # be properly formatted json.
            try:
                body_obj = json.loads(body) if body else {}
            except ValueError:
                body_obj = {}

            if body_obj.get('meta',
                            {}).get('error_type') == 'APINotFoundError':
                logging.warning(body_obj.get('meta', {}).get('error_message'),
                                exc_info=True)
            else:
                raise e

        return self.make_activities_base_response(activities)
Example #22
0
    def get_activities_response(self,
                                user_id=None,
                                group_id=None,
                                app_id=None,
                                activity_id=None,
                                start_index=0,
                                count=0,
                                etag=None,
                                min_id=None,
                                cache=None,
                                fetch_replies=False,
                                fetch_likes=False,
                                fetch_shares=False,
                                fetch_events=False,
                                search_query=None):
        """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Instagram doesn't have a reshare feature, so shares are never included
    since they don't exist. :P

    Raises: InstagramAPIError
    """
        if user_id is None:
            user_id = 'self'
        if group_id is None:
            group_id = source.FRIENDS

        # TODO: paging
        media = []
        kwargs = {}
        if min_id is not None:
            kwargs['min_id'] = min_id

        activities = []
        try:
            media_url = (
                API_MEDIA_URL %
                activity_id if activity_id else API_USER_MEDIA_URL %
                user_id if group_id == source.SELF else API_MEDIA_POPULAR_URL
                if group_id == source.ALL else API_MEDIA_SEARCH_URL %
                search_query if group_id == source.SEARCH else
                API_USER_FEED_URL if group_id == source.FRIENDS else None)
            assert media_url
            media = self.urlopen(util.add_query_params(media_url, kwargs))
            if media:
                if activity_id:
                    media = [media]
                activities += [
                    self.media_to_activity(m) for m in util.trim_nulls(media)
                ]

            if group_id == source.SELF and fetch_likes:
                # add the user's own likes
                liked = self.urlopen(
                    util.add_query_params(API_USER_LIKES_URL % user_id,
                                          kwargs))
                if liked:
                    user = self.urlopen(API_USER_URL % user_id)
                    activities += [
                        self.like_to_object(user, l['id'], l['link'])
                        for l in liked
                    ]

        except urllib2.HTTPError, e:
            code, body = oauth_handlers.interpret_http_exception(e)
            # instagram api should give us back a json block describing the
            # error. but if it's an error for some other reason, it probably won't
            # be properly formatted json.
            try:
                body_obj = json.loads(body) if body else {}
            except ValueError:
                body_obj = {}

            if body_obj.get('meta',
                            {}).get('error_type') == 'APINotFoundError':
                logging.exception(
                    body_obj.get('meta', {}).get('error_message'))
            else:
                raise e
Example #23
0
def maybe_add_or_delete_source(source_cls, auth_entity, state, **kwargs):
    """Adds or deletes a source if auth_entity is not None.

  Used in each source's oauth-dropins :meth:`Callback.finish()` and
  :meth:`Callback.get()` methods, respectively.

  Args:
    source_cls: source class, e.g. :class:`instagram.Instagram`
    auth_entity: ouath-dropins auth entity
    state: string, OAuth callback state parameter. a JSON serialized dict
      with operation, feature, and an optional callback URL. For deletes,
      it will also include the source key
    kwargs: passed through to the source_cls constructor

  Returns:
    source entity if it was created or updated, otherwise None
  """
    state_obj = util.decode_oauth_state(state)
    operation = state_obj.get('operation', 'add')
    feature = state_obj.get('feature')
    callback = state_obj.get('callback')
    user_url = state_obj.get('user_url')

    logger.debug(
        'maybe_add_or_delete_source with operation=%s, feature=%s, callback=%s',
        operation, feature, callback)
    logins = None

    if operation == 'add':  # this is an add/update
        if not auth_entity:
            # TODO: only show if we haven't already flashed another message?
            # get_flashed_messages() caches so it's dangerous to call to check;
            # use eg session.get('_flashes', []) instead.
            # https://stackoverflow.com/a/17243946/186123
            flash("OK, you're not signed up. Hope you reconsider!")
            if callback:
                callback = util.add_query_params(callback,
                                                 {'result': 'declined'})
                logger.debug(
                    f'user declined adding source, redirect to external callback {callback}'
                )
                redirect(callback)
            else:
                redirect('/')

        logger.info(
            f'{source_cls.__class__.__name__}.create_new with {auth_entity.key}, {state}, {kwargs}'
        )
        source = source_cls.create_new(
            auth_entity=auth_entity,
            features=feature.split(',') if feature else [],
            user_url=user_url,
            **kwargs)

        if source:
            # if we're normalizing username case to lower case to make the key id, check
            # if there's and old Source with a capitalized key id, and if so, disable it
            # https://github.com/snarfed/bridgy/issues/884
            if source.USERNAME_KEY_ID and source.username != source.key_id():

                @ndb.transactional()
                def maybe_disable_original():
                    orig = source_cls.get_by_id(source.username)
                    if orig:
                        logging.info(
                            f'Disabling {orig.bridgy_url()} for lower case {source.bridgy_url()}'
                        )
                        orig.features = []
                        orig.put()

                maybe_disable_original()

            # add to login cookie
            logins = get_logins()
            logins.append(
                Login(path=source.bridgy_path(),
                      site=source.SHORT_NAME,
                      name=source.label_name()))

            if callback:
                callback = util.add_query_params(
                    callback, {
                        'result': 'success',
                        'user': source.bridgy_url(),
                        'key': source.key.urlsafe().decode(),
                    } if source else {'result': 'failure'})
                logger.debug(
                    'finished adding source, redirect to external callback %s',
                    callback)
                redirect(callback, logins=logins)
            elif not source.domains:
                redirect('/edit-websites?' + urllib.parse.urlencode({
                    'source_key':
                    source.key.urlsafe().decode(),
                }),
                         logins=logins)
            else:
                redirect(source.bridgy_url(), logins=logins)

        # no source
        redirect('/')

    else:  # this is a delete
        if auth_entity:
            # TODO: remove from logins cookie
            redirect(
                f'/delete/finish?auth_entity={auth_entity.key.urlsafe().decode()}&state={state}'
            )
        else:
            flash(
                f'If you want to disable, please approve the {source_cls.GR_CLASS.NAME} prompt.'
            )
            source_key = state_obj.get('source')
            if source_key:
                source = ndb.Key(urlsafe=source_key).get()
                if source:
                    redirect(source.bridgy_url())

            redirect('/')
Example #24
0
class Twitter(source.Source):
  """Implements the ActivityStreams API for Twitter.
  """

  DOMAIN = 'twitter.com'
  BASE_URL = 'https://twitter.com/'
  NAME = 'Twitter'
  FRONT_PAGE_TEMPLATE = 'templates/twitter_index.html'

  # HTML snippet for embedding a tweet.
  # https://dev.twitter.com/docs/embedded-tweets
  EMBED_POST = """
  <script async defer src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
  <br />
  <blockquote class="twitter-tweet" lang="en" data-dnt="true">
  <p>%(content)s
  <a href="%(url)s">#</a></p>
  </blockquote>
  """

  def __init__(self, access_token_key, access_token_secret, username=None):
    """Constructor.

    Twitter now requires authentication in v1.1 of their API. You can get an
    OAuth access token by creating an app here: https://dev.twitter.com/apps/new

    Args:
      access_token_key: string, OAuth access token key
      access_token_secret: string, OAuth access token secret
      username: string, optional, the current user. Used in e.g. preview/create.
    """
    self.access_token_key = access_token_key
    self.access_token_secret = access_token_secret
    self.username = username

  def get_actor(self, screen_name=None):
    """Returns a user as a JSON ActivityStreams actor dict.

    Args:
      screen_name: string username. Defaults to the current user.
    """
    if screen_name is None:
      url = API_CURRENT_USER
    else:
      url = API_USER % screen_name
    return self.user_to_actor(self.urlopen(url))

  def get_activities_response(self, user_id=None, group_id=None, app_id=None,
                              activity_id=None, start_index=0, count=0,
                              etag=None, min_id=None, cache=None,
                              fetch_replies=False, fetch_likes=False,
                              fetch_shares=False, fetch_events=False,
                              fetch_mentions=False, search_query=None, **kwargs):
    """Fetches posts and converts them to ActivityStreams activities.

    XXX HACK: this is currently hacked for bridgy to NOT pass min_id to the
    request for fetching activity tweets themselves, but to pass it to all of
    the requests for filling in replies, retweets, etc. That's because we want
    to find new replies and retweets of older initial tweets.
    TODO: find a better way.

    See :meth:`source.Source.get_activities_response()` for details. app_id is
    ignored. min_id is translated to Twitter's since_id.

    The code for handling ETags (and 304 Not Changed responses and setting
    If-None-Match) is here, but unused right now since Twitter evidently doesn't
    support ETags. From https://dev.twitter.com/discussions/5800 :
    "I've confirmed with our team that we're not explicitly supporting this
    family of features."

    Likes (ie favorites) are scraped from twitter.com HTML, since Twitter's REST
    API doesn't offer a way to fetch them. You can also get them from the
    Streaming API, though, and convert them with streaming_event_to_object().
    https://dev.twitter.com/docs/streaming-apis/messages#Events_event

    Shares (ie retweets) are fetched with a separate API call per tweet:
    https://dev.twitter.com/docs/api/1.1/get/statuses/retweets/%3Aid

    However, retweets are only fetched for the first 15 tweets that have them,
    since that's Twitter's rate limit per 15 minute window. :(
    https://dev.twitter.com/docs/rate-limiting/1.1/limits

    Quote tweets are fetched by searching for the possibly quoted tweet's ID,
    using the OR operator to search up to 5 IDs at a time, and then checking
    the quoted_status_id_str field
    https://dev.twitter.com/overview/api/tweets#quoted_status_id_str

    Use the group_id @self to retrieve a user_id’s timeline. If user_id is None
    or @me, it will return tweets for the current API user.

    group_id can be used to specify the slug of a list for which to return tweets.
    By default the current API user’s lists will be used, but lists owned by other
    users can be fetched by explicitly passing a username to user_id, e.g. to
    fetch tweets from the list @exampleuser/example-list you would call
    get_activities(user_id='exampleuser', group_id='example-list').

    Twitter replies default to including a mention of the user they're replying
    to, which overloads mentions a bit. When fetch_shares is True, we determine
    that a tweet mentions the current user if it @-mentions their username and:

    * it's not a reply, OR
    * it's a reply, but not to the current user, AND
      * the tweet it's replying to doesn't @-mention the current user
    """
    if group_id is None:
      group_id = source.FRIENDS

    # nested function for lazily fetching the user object if we need it
    user = []
    def _user():
      if not user:
        user.append(self.urlopen(API_USER % user_id if user_id else API_CURRENT_USER))
      return user[0]

    if count:
      count += start_index

    activities = []
    if activity_id:
      tweets = [self.urlopen(API_STATUS % activity_id)]
      total_count = len(tweets)
    else:
      if group_id == source.SELF:
        if user_id in (None, source.ME):
          user_id = ''
        url = API_USER_TIMELINE % {
          'count': count,
          'screen_name': user_id,
        }

        if fetch_likes:
          liked = self.urlopen(API_FAVORITES % user_id)
          if liked:
            activities += [self._make_like(tweet, _user()) for tweet in liked]
      elif group_id == source.SEARCH:
        url = API_SEARCH % {
          'q': urllib.quote_plus(search_query.encode('utf-8')),
          'count': count,
        }
      elif group_id in (source.FRIENDS, source.ALL):
        url = API_TIMELINE % (count)
      else:
        if not user_id:
          user_id = _user().get('screen_name')
        url = API_LIST_TIMELINE % {
          'count': count,
          'slug': group_id,
          'owner_screen_name': user_id,
        }

      headers = {'If-None-Match': etag} if etag else {}
      total_count = None
      try:
        resp = self.urlopen(url, headers=headers, parse_response=False)
        etag = resp.info().get('ETag')
        tweet_obj = source.load_json(resp.read(), url)
        if group_id == source.SEARCH:
          tweet_obj = tweet_obj.get('statuses', [])
        tweets = tweet_obj[start_index:]
      except urllib2.HTTPError, e:
        if e.code == 304:  # Not Modified, from a matching ETag
          tweets = []
        else:
          raise

    # batch get memcached counts of favorites and retweets for all tweets
    cached = {}
    if cache is not None:
      keys = itertools.product(('ATR', 'ATF'), [t['id_str'] for t in tweets])
      cached = cache.get_multi('%s %s' % (prefix, id) for prefix, id in keys)
    # only update the cache at the end, in case we hit an error before then
    cache_updates = {}

    if fetch_shares:
      retweet_calls = 0
      for tweet in tweets:
        # don't fetch retweets the tweet is itself a retweet or if the
        # author's account is protected. /statuses/retweets 403s with error
        # code 200 (?!) for protected accounts.
        # https://github.com/snarfed/bridgy/issues/688
        if tweet.get('retweeted') or tweet.get('user', {}).get('protected'):
          continue
        elif retweet_calls >= RETWEET_LIMIT:
          logging.warning("Hit Twitter's retweet rate limit (%d) with more to "
                          "fetch! Results will be incomplete!" % RETWEET_LIMIT)
          break

        # store retweets in the 'retweets' field, which is handled by
        # tweet_to_activity().
        # TODO: make these HTTP requests asynchronous. not easy since we don't
        # (yet) require threading support or use a non-blocking HTTP library.
        #
        # twitter limits this API endpoint to one call per minute per user,
        # which is easy to hit, so we stop before we hit that.
        # https://dev.twitter.com/docs/rate-limiting/1.1/limits
        #
        # can't use the statuses/retweets_of_me endpoint because it only
        # returns the original tweets, not the retweets or their authors.
        id = tweet['id_str']
        count = tweet.get('retweet_count')
        if count and count != cached.get('ATR ' + id):
          url = API_RETWEETS % id
          if min_id is not None:
            url = util.add_query_params(url, {'since_id': min_id})

          try:
            tweet['retweets'] = self.urlopen(url)
          except urllib2.URLError, e:
            code, _ = util.interpret_http_exception(e)
            if code != '404':  # 404 means the original tweet was deleted
              raise

          retweet_calls += 1
          cache_updates['ATR ' + id] = count