Beispiel #1
0
  def urlopen(self, url, parse_response=True, **kwargs):
    """Wraps urllib2.urlopen() and adds an OAuth signature.
    """
    if not url.startswith('http'):
      url = API_BASE + url

    def request():
      resp = twitter_auth.signed_urlopen(
        url, self.access_token_key, self.access_token_secret, **kwargs)
      if parse_response:
        try:
          return json.loads(resp.read())
        except (ValueError, TypeError):
          msg = 'Non-JSON response! (Synthetic HTTP error generated by Bridgy.)'
          logging.exception(msg)
          raise urllib2.HTTPError(API_BASE + url, 503, msg, {}, None)
      else:
        return resp

    if ('data' not in kwargs and not
        (isinstance(url, urllib2.Request) and url.get_method() == 'POST')):
      # this is a GET. retry up to 3x if we deadline.
      for attempt in range(RETRIES):
        try:
          return request()
        except httplib.HTTPException, e:
          if not str(e).startswith('Deadline exceeded'):
            raise
        except socket.error, e:
          pass
        except urllib2.HTTPError, e:
          code, body = util.interpret_http_exception(e)
          if code is None or int(code) / 100 != 5:
            raise
Beispiel #2
0
 def set_tags(req_id, resp, exc):
     if exc is None:
         tags = obj.setdefault('tags', [])
         for person in resp.get('items', []):
             person_id = person['id']
             person['id'] = self.tag_uri(person['id'])
             tags.append(
                 self.postprocess_object({
                     'id':
                     self.tag_uri('%s_%sd_by_%s' %
                                  (id, verb, person_id)),
                     'objectType':
                     'activity',
                     'verb':
                     verb,
                     'url':
                     obj.get('url') + '#%sd-by-%s' % (verb, person_id),
                     'object': {
                         'url': obj.get('url')
                     },
                     'author':
                     person,
                 }))
         cache_updates[cache_key] = count
     else:
         obj.pop(collection, None)
         code, body = util.interpret_http_exception(exc)
         if code not in ('404', '500', '502',
                         '504'):  # these happen; ignore them
             raise exc
Beispiel #3
0
  def urlopen(self, url, parse_response=True, **kwargs):
    """Wraps urllib2.urlopen() and adds an OAuth signature.
    """
    if not url.startswith('http'):
      url = API_BASE + url

    def request():
      resp = twitter_auth.signed_urlopen(
        url, self.access_token_key, self.access_token_secret, **kwargs)
      if parse_response:
        try:
          return json.loads(resp.read())
        except (ValueError, TypeError):
          msg = 'Non-JSON response! (Synthetic HTTP error generated by Bridgy.)'
          logging.exception(msg)
          raise urllib2.HTTPError(API_BASE + url, 503, msg, {}, None)
      else:
        return resp

    if ('data' not in kwargs and not
        (isinstance(url, urllib2.Request) and url.get_method() == 'POST')):
      # this is a GET. retry up to 3x if we deadline.
      for attempt in range(RETRIES):
        try:
          return request()
        except httplib.HTTPException, e:
          if not str(e).startswith('Deadline exceeded'):
            raise
        except socket.error, e:
          pass
        except urllib2.HTTPError, e:
          code, body = util.interpret_http_exception(e)
          if code is None or int(code) / 100 != 5:
            raise
Beispiel #4
0
  def urlopen(self, url, parse_response=True, **kwargs):
    """Wraps :func:`urllib2.urlopen()` and adds an OAuth signature."""
    if not url.startswith('http'):
      url = API_BASE + url

    def request():
      resp = twitter_auth.signed_urlopen(
        url, self.access_token_key, self.access_token_secret, **kwargs)
      return source.load_json(resp.read(), url) if parse_response else resp

    if ('data' not in kwargs and not
        (isinstance(url, urllib2.Request) and url.get_method() == 'POST')):
      # this is a GET. retry up to 3x if we deadline.
      for attempt in range(RETRIES):
        try:
          return request()
        except httplib.HTTPException, e:
          if not str(e).startswith('Deadline exceeded'):
            raise
        except socket.error, e:
          pass
        except urllib2.HTTPError, e:
          code, body = util.interpret_http_exception(e)
          if code is None or int(code) / 100 != 5:
            raise
Beispiel #5
0
    def user_to_actor(self, resp):
        """Convert a Flickr user dict into an ActivityStreams actor.
    """
        person = resp.get('person', {})
        username = person.get('username', {}).get('_content')
        obj = util.trim_nulls({
            'objectType':
            'person',
            'displayName':
            person.get('realname', {}).get('_content') or username,
            'image': {
                'url':
                self.get_user_image(person.get('iconfarm'),
                                    person.get('iconserver'),
                                    person.get('nsid')),
            },
            'id':
            self.tag_uri(username),
            # numeric_id is our own custom field that always has the source's numeric
            # user id, if available.
            'numeric_id':
            person.get('nsid'),
            'location': {
                'displayName': person.get('location', {}).get('_content'),
            },
            'username':
            username,
            'description':
            person.get('description', {}).get('_content'),
        })

        # fetch profile page to get url(s)
        profile_url = person.get('profileurl', {}).get('_content')
        if profile_url:
            try:
                profile_json = util.fetch_mf2(url=profile_url)
                urls = profile_json.get('rels', {}).get('me', [])
                if urls:
                    obj['url'] = urls[0]
                if len(urls) > 1:
                    obj['urls'] = [{'value': u} for u in urls]
            except requests.RequestException as e:
                util.interpret_http_exception(e)
                logging.warning('could not fetch user homepage %s',
                                profile_url)

        return self.postprocess_object(obj)
Beispiel #6
0
    def _api(self, fn, path, return_json=True, *args, **kwargs):
        headers = kwargs.setdefault('headers', {})
        headers['Authorization'] = 'Bearer ' + self.access_token

        url = urllib.parse.urljoin(self.instance, path)
        resp = fn(url, *args, **kwargs)
        try:
            resp.raise_for_status()
        except BaseException as e:
            util.interpret_http_exception(e)
            raise

        if not return_json:
            return resp
        if fn == util.requests_delete:
            return {}
        else:
            return json_loads(resp.text)
Beispiel #7
0
 def set_comments(req_id, resp, exc, activity=None):
   obj = activity.get('object', {})
   if exc is None:
     obj['replies']['items'] = [
       self.postprocess_comment(c) for c in resp['items']]
     cache_updates['AGC ' + activity['id']] = obj['replies']['totalItems']
   else:
     obj.pop('replies', None)
     code, body = util.interpret_http_exception(exc)
     if code not in ('404', '500'):  # these happen; ignore them
       raise exc
Beispiel #8
0
  def handle_exception(self, e, debug):
    code, text = util.interpret_http_exception(e)
    if code in ('401', '403'):
      self.response.headers['Content-Type'] = 'application/atom+xml'
      host_url = self.request.host_url + '/'
      self.response.out.write(atom.activities_to_atom([{
        'object': {
          'url': self.request.url,
          'content': 'Your twitter-atom login isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url,
          },
        }], {}, title='facebook-atom', host_url=host_url,
        request_url=self.request.path_url))
      return

    return handlers.handle_exception(self, e, debug)
Beispiel #9
0
 def set_comments(req_id, resp, exc, activity=None):
     obj = activity.get('object', {})
     if exc is None:
         obj['replies']['items'] = [
             self.postprocess_comment(c) for c in resp['items']
         ]
         cache_updates[
             'AGC ' +
             activity['id']] = obj['replies']['totalItems']
     else:
         obj.pop('replies', None)
         code, body = util.interpret_http_exception(exc)
         if code not in ('404', '500', '502',
                         '504'):  # these happen; ignore them
             raise exc
Beispiel #10
0
  def get(self):
    cookie = 'sessionid=%s' % urllib.quote(
      util.get_required_param(self, 'sessionid').encode('utf-8'))
    logging.info('Fetching with Cookie: %s', cookie)

    host_url = self.request.host_url + '/'
    ig = instagram.Instagram()
    try:
      resp = ig.get_activities_response(group_id=source.FRIENDS, scrape=True,
                                        cookie=cookie)
    except Exception as e:
      status, text = util.interpret_http_exception(e)
      if status in ('401', '403'):
        self.response.headers['Content-Type'] = 'application/atom+xml'
        self.response.out.write(atom.activities_to_atom([{
          'object': {
            'url': self.request.url,
            'content': 'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>' % host_url,
            },
          }], {}, title='instagram-atom', host_url=host_url,
          request_url=self.request.path_url))
        return
      elif status:
        self.response.status = 502 if int(status) // 100 == 5 else status
      elif util.is_connection_failure(e):
        self.response.status = 504  # HTTP 504 Gateway Timeout
      else:
        logging.exception('oops!')
        self.response.status = 500

      if isinstance(text, str):
        text = text.decode('utf-8')
      self.response.text = text or u'Unknown error.'
      return

    actor = resp.get('actor')
    if actor:
      logging.info('Logged in as %s (%s)',
                   actor.get('username'), actor.get('displayName'))
    else:
      logging.warning("Couldn't determine Instagram user!")

    title = 'instagram-atom feed for %s' % ig.actor_name(actor)
    self.response.headers['Content-Type'] = 'application/atom+xml'
    self.response.out.write(atom.activities_to_atom(
      resp.get('items', []), actor, title=title, host_url=host_url,
      request_url=self.request.path_url, xml_base='https://www.instagram.com/'))
Beispiel #11
0
def cookie():
    cookie = 'sessionid=%s' % urllib.parse.quote(
        request.args['sessionid'].encode('utf-8'))
    logging.info('Fetching with Cookie: %s', cookie)

    ig = instagram.Instagram()
    try:
        resp = ig.get_activities_response(group_id=source.FRIENDS,
                                          scrape=True,
                                          cookie=cookie)
    except Exception as e:
        status, text = util.interpret_http_exception(e)
        if status in ('403', ):
            data = atom.activities_to_atom([{
                'object': {
                    'url':
                    request.url,
                    'content':
                    'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>'
                    % request.host_url,
                },
            }], {},
                                           title='instagram-atom',
                                           host_url=request.host_url,
                                           request_url=request.url)
            return data, {'Content-Type': 'application/atom+xml'}
        elif status == '401':
            # IG returns 401 sometimes as a form of rate limiting or bot detection
            return 'Sorry, Instagram is rate limiting us', 429
        elif status:
            return text, status
        else:
            logging.exception('oops!')
            return '', 500

    actor = resp.get('actor')
    if actor:
        logging.info('Logged in as %s (%s)', actor.get('username'),
                     actor.get('displayName'))
    else:
        logging.warning("Couldn't determine Instagram user!")

    activities = resp.get('items', [])
    return render(activities, actor=actor)
Beispiel #12
0
 def set_tags(req_id, resp, exc):
   if exc is None:
     tags = obj.setdefault('tags', [])
     for person in resp.get('items', []):
       person_id = person['id']
       person['id'] = self.tag_uri(person['id'])
       tags.append(self.postprocess_object({
         'id': self.tag_uri('%s_%sd_by_%s' % (id, verb, person_id)),
         'objectType': 'activity',
         'verb': verb,
         'url': obj.get('url'),
         'object': {'url': obj.get('url')},
         'author': person,
         }))
     cache_updates[cache_key] = count
   else:
     obj.pop(collection, None)
     code, body = util.interpret_http_exception(exc)
     if code not in ('404', '500'):  # these happen; ignore them
       raise exc
Beispiel #13
0
    def get(self):
        cookie = 'sessionid=%s' % urllib.quote(
            util.get_required_param(self, 'sessionid').encode('utf-8'))
        logging.info('Fetching with Cookie: %s', cookie)

        ig = instagram.Instagram()
        try:
            resp = ig.get_activities_response(group_id=source.FRIENDS,
                                              scrape=True,
                                              cookie=cookie)
        except Exception as e:
            status, text = util.interpret_http_exception(e)
            if status:
                self.response.status = 502 if status == 500 else status
            elif util.is_connection_failure(e):
                self.response.status = 504  # HTTP 504 Gateway Timeout
            else:
                logging.exception('oops!')
                self.response.status = 500

            if isinstance(text, str):
                text = text.decode('utf-8')
            self.response.text = text or u'Unknown error.'
            return

        actor = resp.get('actor')
        if actor:
            logging.info('Logged in as %s (%s)', actor.get('username'),
                         actor.get('displayName'))
        else:
            logging.warning("Couldn't determine Instagram user!")

        title = 'instagram-atom feed for %s' % ig.actor_name(actor)
        self.response.headers['Content-Type'] = 'application/atom+xml'
        self.response.out.write(
            atom.activities_to_atom(resp.get('items', []),
                                    actor,
                                    title=title,
                                    host_url=self.request.host_url + '/',
                                    request_url=self.request.path_url,
                                    xml_base='https://www.instagram.com/'))
Beispiel #14
0
def background_handle_exception(handler, e, debug):
  """Common exception handler for background tasks.

  Catches failed outbound HTTP requests and returns HTTP 304.

  Install with eg:

  class MyHandler(webapp2.RequestHandler):
    handle_exception = util.background_handle_exception
    ...
  """
  transients = getattr(handler, 'TRANSIENT_ERROR_HTTP_CODES', ())
  source = getattr(handler, 'source', None)
  if source:
    transients += source.RATE_LIMIT_HTTP_CODES + source.TRANSIENT_ERROR_HTTP_CODES

  code, body = util.interpret_http_exception(e)
  if ((code and int(code) // 100 == 5) or code in transients or
      util.is_connection_failure(e)):
    logging.error('Marking as error and finishing. %s: %s\n%s', code, body, e)
    handler.abort(ERROR_HTTP_RETURN_CODE)
  else:
    raise
Beispiel #15
0
    def _create(self, obj, preview=False, include_link=source.OMIT_LINK, ignore_formatting=False):
        if not preview in (False, True):
            return self.return_error('Invalid Preview parameter, must be True or False')
        verb = source.object_type(obj)
        response = None
        if verb == 'rsvp-yes':
            response = 'yes'
        elif verb == 'rsvp-no':
            response = 'no'
        elif verb == 'rsvp-maybe' or verb == 'rsvp-interested':
            return self.return_error('Meetup.com does not support %(verb)s' % {'verb': verb})
        else:
            return self.return_error('Meetup.com syndication does not support %(verb)s' % {'verb': verb})

        # parse the in-reply-to out
        url_containers = self.base_object(obj)
        if not url_containers:
            return self.return_error('RSVP not to Meetup.com or missing in-reply-to')
        if not 'url' in url_containers:
            return self.return_error('missing an in-reply-to')

        event_url = url_containers['url']
        if not event_url:
            return self.return_error('missing an in-reply-to')

        event_url = self.URL_CANONICALIZER(event_url)
        if not event_url:
            return self.return_error('Invalid Meetup.com event URL')

        parsed_url_part = EVENT_URL_RE.match(event_url)
        if not parsed_url_part:
            return self.return_error('Invalid Meetup.com event URL')

        urlname = parsed_url_part.group(2)
        event_id = parsed_url_part.group(3)

        if preview:
            return source.creation_result(description=Meetup.embed_post(obj))

        post_url = obj.get('url')
        if not post_url:
            return self.return_error('Missing the post\'s url')

        create_resp = {
                'url': '%(event_url)s#rsvp-by-%(url)s' % {
                    'event_url': event_url,
                    'url': urllib.parse.quote_plus(post_url),
                    },
                'type': 'rsvp'
                }

        try:
            resp = self.post_rsvp(urlname, event_id, response)
            logging.debug('Response: %s %s', resp.getcode(), resp.read())
            return source.creation_result(create_resp)
        except urllib.error.HTTPError as e:
            code, body = util.interpret_http_exception(e)
            try:
                msg = json_loads(body)['errors'][0]['message']
            except BaseException:
                msg = body
            return self.return_error(f'From Meetup: {code} error: {msg}')
Beispiel #16
0
  def get_activities_response(self, user_id=None, group_id=None, app_id=None,
                              activity_id=None, start_index=0, count=0,
                              etag=None, min_id=None, cache=None,
                              fetch_replies=False, fetch_likes=False,
                              fetch_shares=False, fetch_events=False,
                              fetch_mentions=False, search_query=None,
                              scrape=False, cookie=None, **kwargs):
    """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Mentions are never fetched or included because the API doesn't support
    searching for them.
    https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875

    Shares are never fetched included since there is no share feature.

    Instagram only supports search over hashtags, so if search_query is set, it
    must begin with #.

    May populate a custom 'ig_like_count' property in media objects. (Currently
    only when scraping.)

    Args:
      scrape: if True, scrapes HTML from instagram.com instead of using the API.
        Populates the user's actor object in the 'actor' response field.
        Useful for apps that haven't yet been approved in the new permissions
        approval process. Currently only supports group_id=SELF. Also supports
        passing a shortcode as activity_id as well as the internal API id.
        http://developers.instagram.com/post/133424514006/instagram-platform-update
      cookie: string, only used if scrape=True
      **: see :meth:`Source.get_activities_reponse`

    Raises:
      InstagramAPIError
    """
    if scrape or self.scrape:
      if not (activity_id or
              (group_id == source.SELF and user_id) or
              (group_id == source.FRIENDS and cookie)):
        raise NotImplementedError(
          'Scraping only supports activity_id, user_id and group_id=@self, or cookie and group_id=@friends.')
      return self._scrape(user_id=user_id, activity_id=activity_id, cookie=cookie,
                          fetch_extras=fetch_replies or fetch_likes, cache=cache)

    if user_id is None:
      user_id = 'self'
    if group_id is None:
      group_id = source.FRIENDS

    if search_query:
      if search_query.startswith('#'):
        search_query = search_query[1:]
      else:
        raise NotImplementedError(
          'Instagram only supports search over hashtags, so search_query must '
          'begin with the # character.')

    # TODO: paging
    media = []
    kwargs = {}
    if min_id is not None:
      kwargs['min_id'] = min_id

    activities = []
    try:
      media_url = (API_MEDIA_URL % activity_id if activity_id else
                   API_USER_MEDIA_URL % user_id if group_id == source.SELF else
                   API_MEDIA_POPULAR_URL if group_id == source.ALL else
                   API_MEDIA_SEARCH_URL % search_query if group_id == source.SEARCH else
                   API_USER_FEED_URL if group_id == source.FRIENDS else None)
      assert media_url
      media = self.urlopen(util.add_query_params(media_url, kwargs))
      if media:
        if activity_id:
          media = [media]
        activities += [self.media_to_activity(m) for m in util.trim_nulls(media)]

      if group_id == source.SELF and fetch_likes:
        # add the user's own likes
        liked = self.urlopen(
          util.add_query_params(API_USER_LIKES_URL % user_id, kwargs))
        if liked:
          user = self.urlopen(API_USER_URL % user_id)
          activities += [self.like_to_object(user, l['id'], l['link'])
                         for l in liked]

    except urllib2.HTTPError, e:
      code, body = util.interpret_http_exception(e)
      # instagram api should give us back a json block describing the
      # error. but if it's an error for some other reason, it probably won't
      # be properly formatted json.
      try:
        body_obj = json.loads(body) if body else {}
      except ValueError:
        body_obj = {}

      if body_obj.get('meta', {}).get('error_type') == 'APINotFoundError':
        logging.exception(body_obj.get('meta', {}).get('error_message'))
      else:
        raise e
Beispiel #17
0
    def get_activities_response(self,
                                user_id=None,
                                group_id=None,
                                app_id=None,
                                activity_id=None,
                                start_index=0,
                                count=0,
                                etag=None,
                                min_id=None,
                                cache=None,
                                fetch_replies=False,
                                fetch_likes=False,
                                fetch_shares=False,
                                fetch_events=False,
                                fetch_mentions=False,
                                search_query=None,
                                scrape=False,
                                cookie=None,
                                ignore_rate_limit=False,
                                **kwargs):
        """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Mentions are never fetched or included because the API doesn't support
    searching for them.
    https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875

    Shares are never fetched included since there is no share feature.

    Instagram only supports search over hashtags, so if search_query is set, it
    must begin with #.

    May populate a custom 'ig_like_count' property in media objects. (Currently
    only when scraping.)

    Args:
      scrape: if True, scrapes HTML from instagram.com instead of using the API.
        Populates the user's actor object in the 'actor' response field.
        Useful for apps that haven't yet been approved in the new permissions
        approval process. Currently only supports group_id=SELF. Also supports
        passing a shortcode as activity_id as well as the internal API id.
        http://developers.instagram.com/post/133424514006/instagram-platform-update
      cookie: string, only used if scrape=True
      ignore_rate_limit: boolean, for scraping, always make an HTTP request,
        even if we've been rate limited recently
      **: see :meth:`Source.get_activities_response`

    Raises:
      InstagramAPIError
    """
        if group_id is None:
            group_id = source.FRIENDS

        if scrape or self.scrape:
            if not (activity_id or (group_id == source.SELF and user_id) or
                    (group_id == source.FRIENDS and cookie)):
                raise NotImplementedError(
                    'Scraping only supports activity_id, user_id and group_id=@self, or cookie and group_id=@friends.'
                )
            elif fetch_likes and not cookie and not self.cookie:
                raise NotImplementedError('Scraping likes requires a cookie.')

            # cache rate limited responses and short circuit
            global _last_rate_limited, _last_rate_limited_exc
            now = datetime.datetime.now()
            if not ignore_rate_limit and _last_rate_limited:
                retry = _last_rate_limited + RATE_LIMIT_BACKOFF
                if now < retry:
                    logging.info(
                        'Remembered rate limit at %s, waiting until %s to try again.',
                        _last_rate_limited, retry)
                    assert _last_rate_limited_exc
                    raise _last_rate_limited_exc

            try:
                return self._scrape(user_id=user_id,
                                    group_id=group_id,
                                    activity_id=activity_id,
                                    count=count,
                                    cookie=cookie,
                                    fetch_extras=fetch_replies or fetch_likes,
                                    cache=cache)
            except Exception as e:
                code, body = util.interpret_http_exception(e)
                if not ignore_rate_limit and code in ('429', '503'):
                    logging.info('Got rate limited! Remembering for %s',
                                 str(RATE_LIMIT_BACKOFF))
                    _last_rate_limited = now
                    _last_rate_limited_exc = e
                raise

        if user_id is None:
            user_id = 'self'

        if search_query:
            if search_query.startswith('#'):
                search_query = search_query[1:]
            else:
                raise ValueError(
                    'Instagram only supports search over hashtags, so search_query must '
                    'begin with the # character.')

        # TODO: paging
        media = []
        kwargs = {}
        if min_id is not None:
            kwargs['min_id'] = min_id

        activities = []
        try:
            media_url = (
                API_MEDIA_URL %
                activity_id if activity_id else API_USER_MEDIA_URL %
                user_id if group_id == source.SELF else API_MEDIA_POPULAR_URL
                if group_id == source.ALL else API_MEDIA_SEARCH_URL %
                search_query if group_id == source.SEARCH else
                API_USER_FEED_URL if group_id == source.FRIENDS else None)
            assert media_url
            media = self.urlopen(util.add_query_params(media_url, kwargs))
            if media:
                if activity_id:
                    media = [media]
                activities += [
                    self.media_to_activity(m) for m in util.trim_nulls(media)
                ]

            if group_id == source.SELF and fetch_likes:
                # add the user's own likes
                liked = self.urlopen(
                    util.add_query_params(API_USER_LIKES_URL % user_id,
                                          kwargs))
                if liked:
                    user = self.urlopen(API_USER_URL % user_id)
                    activities += [
                        self.like_to_object(user, l['id'], l['link'])
                        for l in liked
                    ]

        except urllib_error.HTTPError as e:
            code, body = util.interpret_http_exception(e)
            # instagram api should give us back a json block describing the
            # error. but if it's an error for some other reason, it probably won't
            # be properly formatted json.
            try:
                body_obj = json.loads(body) if body else {}
            except ValueError:
                body_obj = {}

            if body_obj.get('meta',
                            {}).get('error_type') == 'APINotFoundError':
                logging.warning(body_obj.get('meta', {}).get('error_message'),
                                exc_info=True)
            else:
                raise e

        return self.make_activities_base_response(activities)
Beispiel #18
0
    if fetch_replies:
      self.fetch_replies(tweet_activities, min_id=min_id)

    if fetch_likes:
      for tweet, activity in zip(tweets, tweet_activities):
        id = tweet['id_str']
        count = tweet.get('favorite_count')
        if count and count != cached.get('ATF ' + id):
          url = HTML_FAVORITES_URL % id
          logging.debug('Fetching %s', url)
          try:
            html = json.loads(urllib2.urlopen(url, timeout=HTTP_TIMEOUT).read()
                              ).get('htmlUsers', '')
          except urllib2.URLError, e:
            util.interpret_http_exception(e)  # just log it
            continue
          likes = self.favorites_html_to_likes(tweet, html)
          activity['object'].setdefault('tags', []).extend(likes)
          cache_updates['ATF ' + id] = count

    activities += tweet_activities
    response = self.make_activities_base_response(activities)
    response.update({'total_count': total_count, 'etag': etag})
    if cache_updates and cache is not None:
      cache.set_multi(cache_updates)
    return response

  def fetch_replies(self, activities, min_id=None):
    """Fetches and injects Twitter replies into a list of activities, in place.
Beispiel #19
0
class Twitter(source.Source):
    """Implements the ActivityStreams API for Twitter.
  """

    DOMAIN = 'twitter.com'
    NAME = 'Twitter'
    FRONT_PAGE_TEMPLATE = 'templates/twitter_index.html'

    # HTML snippet for embedding a tweet.
    # https://dev.twitter.com/docs/embedded-tweets
    EMBED_POST = """
  <script async defer src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
  <br />
  <blockquote class="twitter-tweet" lang="en" data-dnt="true">
  <p><a href="%(url)s">%(content)s</a></p>
  </blockquote>
  """

    def __init__(self, access_token_key, access_token_secret):
        """Constructor.

    Twitter now requires authentication in v1.1 of their API. You can get an
    OAuth access token by creating an app here: https://dev.twitter.com/apps/new

    Args:
      access_token_key: string, OAuth access token key
      access_token_secret: string, OAuth access token secret
    """
        self.access_token_key = access_token_key
        self.access_token_secret = access_token_secret

    def get_actor(self, screen_name=None):
        """Returns a user as a JSON ActivityStreams actor dict.

    Args:
      screen_name: string username. Defaults to the current user.
    """
        if screen_name is None:
            url = API_CURRENT_USER_URL
        else:
            url = API_USER_URL % screen_name
        return self.user_to_actor(self.urlopen(url))

    def get_activities_response(self,
                                user_id=None,
                                group_id=None,
                                app_id=None,
                                activity_id=None,
                                start_index=0,
                                count=0,
                                etag=None,
                                min_id=None,
                                cache=None,
                                fetch_replies=False,
                                fetch_likes=False,
                                fetch_shares=False,
                                fetch_events=False,
                                search_query=None):
        """Fetches posts and converts them to ActivityStreams activities.

    XXX HACK: this is currently hacked for bridgy to NOT pass min_id to the
    request for fetching activity tweets themselves, but to pass it to all of
    the requests for filling in replies, retweets, etc. That's because we want
    to find new replies and retweets of older initial tweets.
    TODO: find a better way.

    See method docstring in source.py for details. app_id is ignored.
    min_id is translated to Twitter's since_id.

    The code for handling ETags (and 304 Not Changed responses and setting
    If-None-Match) is here, but unused right now since Twitter evidently doesn't
    support ETags. From https://dev.twitter.com/discussions/5800 :
    "I've confirmed with our team that we're not explicitly supporting this
    family of features."

    Likes (ie favorites) are scraped from twitter.com HTML, since Twitter's REST
    API doesn't offer a way to fetch them. You can also get them from the
    Streaming API, though, and convert them with streaming_event_to_object().
    https://dev.twitter.com/docs/streaming-apis/messages#Events_event

    Shares (ie retweets) are fetched with a separate API call per tweet:
    https://dev.twitter.com/docs/api/1.1/get/statuses/retweets/%3Aid

    However, retweets are only fetched for the first 15 tweets that have them,
    since that's Twitter's rate limit per 15 minute window. :(
    https://dev.twitter.com/docs/rate-limiting/1.1/limits

    Use the group_id @self to retrieve a user_id’s timeline. If user_id is None
    or @me, it will return tweets for the current API user.

    group_id can be used to specify the slug of a list for which to return tweets.
    By default the current API user’s lists will be used, but lists owned by other
    users can be fetched by explicitly passing a username to user_id, e.g. to
    fetch tweets from the list @exampleuser/example-list you would call
    get_activities(user_id='exampleuser', group_id='example-list').
    """
        activities = []
        if activity_id:
            tweets = [self.urlopen(API_STATUS_URL % activity_id)]
            total_count = len(tweets)
        else:
            if group_id == source.SELF:
                if user_id in (None, source.ME):
                    url = API_SELF_TIMELINE_URL % (count + start_index)
                else:
                    url = API_USER_TIMELINE_URL % {
                        'count': count + start_index,
                        'screen_name': user_id,
                    }

                if fetch_likes:
                    liked = self.urlopen(API_FAVORITES_URL % (user_id or ''))
                    if liked:
                        user = self.urlopen(
                            API_USER_URL %
                            user_id if user_id else API_CURRENT_USER_URL)
                        activities += [
                            self._make_like(tweet, user) for tweet in liked
                        ]
            elif group_id == source.SEARCH:
                url = API_SEARCH_URL % {
                    'q': urllib.quote_plus(search_query),
                    'count': count + start_index,
                }
            elif group_id in (None, source.FRIENDS, source.ALL):
                url = API_TIMELINE_URL % (count + start_index)
            else:
                url = API_LIST_TIMELINE_URL % {
                    'count':
                    count + start_index,
                    'slug':
                    group_id,
                    'owner_screen_name':
                    user_id or self.get_actor().get('username')
                }

            headers = {'If-None-Match': etag} if etag else {}
            total_count = None
            try:
                resp = self.urlopen(url, headers=headers, parse_response=False)
                etag = resp.info().get('ETag')
                tweet_obj = json.loads(resp.read())
                if group_id == source.SEARCH:
                    tweet_obj = tweet_obj.get('statuses', [])
                tweets = tweet_obj[start_index:]
            except urllib2.HTTPError, e:
                if e.code == 304:  # Not Modified, from a matching ETag
                    tweets = []
                else:
                    raise

        # batch get memcached counts of favorites and retweets for all tweets
        cached = {}
        if cache is not None:
            keys = itertools.product(('ATR', 'ATF'),
                                     [t['id_str'] for t in tweets])
            cached = cache.get_multi('%s %s' % (prefix, id)
                                     for prefix, id in keys)
        # only update the cache at the end, in case we hit an error before then
        cache_updates = {}

        if fetch_shares:
            retweet_calls = 0
            for tweet in tweets:
                if tweet.get('retweeted'):  # this tweet is itself a retweet
                    continue
                elif retweet_calls >= RETWEET_LIMIT:
                    logging.warning(
                        "Hit Twitter's retweet rate limit (%d) with more to "
                        "fetch! Results will be incomplete!" % RETWEET_LIMIT)
                    break

                # store retweets in the 'retweets' field, which is handled by
                # tweet_to_activity().
                # TODO: make these HTTP requests asynchronous. not easy since we don't
                # (yet) require threading support or use a non-blocking HTTP library.
                #
                # twitter limits this API endpoint to one call per minute per user,
                # which is easy to hit, so we stop before we hit that.
                # https://dev.twitter.com/docs/rate-limiting/1.1/limits
                #
                # can't use the statuses/retweets_of_me endpoint because it only
                # returns the original tweets, not the retweets or their authors.
                id = tweet['id_str']
                count = tweet.get('retweet_count')
                if count and count != cached.get('ATR ' + id):
                    url = API_RETWEETS_URL % id
                    if min_id is not None:
                        url = util.add_query_params(url, {'since_id': min_id})
                    tweet['retweets'] = self.urlopen(url)
                    retweet_calls += 1
                    cache_updates['ATR ' + id] = count

        tweet_activities = [self.tweet_to_activity(t) for t in tweets]

        if fetch_replies:
            self.fetch_replies(tweet_activities, min_id=min_id)

        if fetch_likes:
            for tweet, activity in zip(tweets, tweet_activities):
                id = tweet['id_str']
                count = tweet.get('favorite_count')
                if count and count != cached.get('ATF ' + id):
                    url = HTML_FAVORITES_URL % id
                    logging.debug('Fetching %s', url)
                    try:
                        html = json.loads(
                            urllib2.urlopen(url,
                                            timeout=HTTP_TIMEOUT).read()).get(
                                                'htmlUsers', '')
                    except urllib2.URLError, e:
                        util.interpret_http_exception(e)  # just log it
                        continue
                    likes = self.favorites_html_to_likes(tweet, html)
                    activity['object'].setdefault('tags', []).extend(likes)
                    cache_updates['ATF ' + id] = count
Beispiel #20
0
  def get_activities_response(self, user_id=None, group_id=None, app_id=None,
                              activity_id=None, start_index=0, count=0,
                              etag=None, min_id=None, cache=None,
                              fetch_replies=False, fetch_likes=False,
                              fetch_shares=False, fetch_events=False,
                              fetch_mentions=False, search_query=None,
                              public_only=True, **kwargs):
    """Fetches issues and comments and converts them to ActivityStreams activities.

    See :meth:`Source.get_activities_response` for details.

    *Not comprehensive!* Uses the notifications API (v3 REST).

    Also note that start_index and count are not currently supported.

    https://developer.github.com/v3/activity/notifications/
    https://developer.github.com/v3/issues/
    https://developer.github.com/v3/issues/comments/

    fetch_likes determines whether emoji reactions are fetched:
    https://help.github.com/articles/about-conversations-on-github#reacting-to-ideas-in-comments

    The notifications API call supports Last-Modified/If-Modified-Since headers
    and 304 Not Changed responses. If provided, etag should be an RFC2822
    timestamp, usually the exact value returned in a Last-Modified header. It
    will also be passed to the comments API endpoint as the since= value
    (converted to ISO 8601).
    """
    if fetch_shares or fetch_events or fetch_mentions or search_query:
      raise NotImplementedError()

    since = None
    etag_parsed = email.utils.parsedate(etag)
    if etag_parsed:
      since = datetime.datetime(*etag_parsed[:6])

    activities = []

    if activity_id:
      parts = tuple(activity_id.split(':'))
      if len(parts) != 3:
        raise ValueError('GitHub activity ids must be of the form USER:REPO:ISSUE_OR_PR')
      try:
        issue = self.rest(REST_API_ISSUE % parts).json()
        activities = [self.issue_to_object(issue)]
      except BaseException as e:
        code, body = util.interpret_http_exception(e)
        if code in ('404', '410'):
          activities = []
        else:
          raise

    else:
      resp = self.rest(REST_API_NOTIFICATIONS,
                       headers={'If-Modified-Since': etag} if etag else None)
      etag = resp.headers.get('Last-Modified')
      notifs = [] if resp.status_code == 304 else resp.json()

      for notif in notifs:
        id = notif.get('id')
        subject_url = notif.get('subject').get('url')
        if not subject_url:
          logging.info('Skipping thread %s, missing subject!', id)
          continue
        split = subject_url.split('/')
        if len(split) <= 2 or split[-2] not in ('issues', 'pulls'):
          logging.info(
            'Skipping thread %s with subject %s, only issues and PRs right now',
            id, subject_url)
          continue

        try:
          issue = self.rest(subject_url).json()
        except requests.HTTPError as e:
          if e.response.status_code in (404, 410):
            util.interpret_http_exception(e)
            continue  # the issue/PR or repo was (probably) deleted
          raise

        obj = self.issue_to_object(issue)

        private = notif.get('repository', {}).get('private')
        if private is not None:
          obj['to'] = [{
            'objectType': 'group',
            'alias': '@private' if private else '@public',
          }]

        comments_url = issue.get('comments_url')
        if fetch_replies and comments_url:
          if since:
            comments_url += '?since=%s' % since.isoformat() + 'Z'
          comments = self.rest(comments_url).json()
          comment_objs = list(util.trim_nulls(
            self.comment_to_object(c) for c in comments))
          obj['replies'] = {
            'items': comment_objs,
            'totalItems': len(comment_objs),
          }

        if fetch_likes:
          issue_url = issue['url'].replace('pulls', 'issues')
          reactions = self.rest(issue_url + '/reactions').json()
          obj.setdefault('tags', []).extend(
            self.reaction_to_object(r, obj) for r in reactions)

        activities.append(obj)

    response = self.make_activities_base_response(util.trim_nulls(activities))
    response['etag'] = etag
    return response
Beispiel #21
0
  def get_activities_response(self, user_id=None, group_id=None, app_id=None,
                              activity_id=None, start_index=0, count=0,
                              etag=None, min_id=None, cache=None,
                              fetch_replies=False, fetch_likes=False,
                              fetch_shares=False, fetch_events=False,
                              search_query=None):
    """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Instagram doesn't have a reshare feature, so shares are never included
    since they don't exist. :P

    Instagram only supports search over hashtags, so if search_query is set, it
    must begin with #.

    Raises: InstagramAPIError
    """
    if user_id is None:
      user_id = 'self'
    if group_id is None:
      group_id = source.FRIENDS

    if search_query:
      if search_query.startswith('#'):
        search_query = search_query[1:]
      else:
        raise NotImplementedError(
          'Instagram only supports search over hashtags, so search_query must '
          'begin with the # character.')

    # TODO: paging
    media = []
    kwargs = {}
    if min_id is not None:
      kwargs['min_id'] = min_id

    activities = []
    try:
      media_url = (API_MEDIA_URL % activity_id if activity_id else
                   API_USER_MEDIA_URL % user_id if group_id == source.SELF else
                   API_MEDIA_POPULAR_URL if group_id == source.ALL else
                   API_MEDIA_SEARCH_URL % search_query if group_id == source.SEARCH else
                   API_USER_FEED_URL if group_id == source.FRIENDS else None)
      assert media_url
      media = self.urlopen(util.add_query_params(media_url, kwargs))
      if media:
        if activity_id:
          media = [media]
        activities += [self.media_to_activity(m) for m in util.trim_nulls(media)]

      if group_id == source.SELF and fetch_likes:
        # add the user's own likes
        liked = self.urlopen(
          util.add_query_params(API_USER_LIKES_URL % user_id, kwargs))
        if liked:
          user = self.urlopen(API_USER_URL % user_id)
          activities += [self.like_to_object(user, l['id'], l['link'])
                         for l in liked]

    except urllib2.HTTPError, e:
      code, body = util.interpret_http_exception(e)
      # instagram api should give us back a json block describing the
      # error. but if it's an error for some other reason, it probably won't
      # be properly formatted json.
      try:
        body_obj = json.loads(body) if body else {}
      except ValueError:
        body_obj = {}

      if body_obj.get('meta', {}).get('error_type') == 'APINotFoundError':
        logging.exception(body_obj.get('meta', {}).get('error_message'))
      else:
        raise e
Beispiel #22
0
class Twitter(source.Source):
  """Implements the ActivityStreams API for Twitter.
  """

  DOMAIN = 'twitter.com'
  BASE_URL = 'https://twitter.com/'
  NAME = 'Twitter'
  FRONT_PAGE_TEMPLATE = 'templates/twitter_index.html'

  # HTML snippet for embedding a tweet.
  # https://dev.twitter.com/docs/embedded-tweets
  EMBED_POST = """
  <script async defer src="//platform.twitter.com/widgets.js" charset="utf-8"></script>
  <br />
  <blockquote class="twitter-tweet" lang="en" data-dnt="true">
  <p>%(content)s
  <a href="%(url)s">#</a></p>
  </blockquote>
  """

  def __init__(self, access_token_key, access_token_secret, username=None):
    """Constructor.

    Twitter now requires authentication in v1.1 of their API. You can get an
    OAuth access token by creating an app here: https://dev.twitter.com/apps/new

    Args:
      access_token_key: string, OAuth access token key
      access_token_secret: string, OAuth access token secret
      username: string, optional, the current user. Used in e.g. preview/create.
    """
    self.access_token_key = access_token_key
    self.access_token_secret = access_token_secret
    self.username = username

  def get_actor(self, screen_name=None):
    """Returns a user as a JSON ActivityStreams actor dict.

    Args:
      screen_name: string username. Defaults to the current user.
    """
    if screen_name is None:
      url = API_CURRENT_USER
    else:
      url = API_USER % screen_name
    return self.user_to_actor(self.urlopen(url))

  def get_activities_response(self, user_id=None, group_id=None, app_id=None,
                              activity_id=None, start_index=0, count=0,
                              etag=None, min_id=None, cache=None,
                              fetch_replies=False, fetch_likes=False,
                              fetch_shares=False, fetch_events=False,
                              fetch_mentions=False, search_query=None, **kwargs):
    """Fetches posts and converts them to ActivityStreams activities.

    XXX HACK: this is currently hacked for bridgy to NOT pass min_id to the
    request for fetching activity tweets themselves, but to pass it to all of
    the requests for filling in replies, retweets, etc. That's because we want
    to find new replies and retweets of older initial tweets.
    TODO: find a better way.

    See :meth:`source.Source.get_activities_response()` for details. app_id is
    ignored. min_id is translated to Twitter's since_id.

    The code for handling ETags (and 304 Not Changed responses and setting
    If-None-Match) is here, but unused right now since Twitter evidently doesn't
    support ETags. From https://dev.twitter.com/discussions/5800 :
    "I've confirmed with our team that we're not explicitly supporting this
    family of features."

    Likes (ie favorites) are scraped from twitter.com HTML, since Twitter's REST
    API doesn't offer a way to fetch them. You can also get them from the
    Streaming API, though, and convert them with streaming_event_to_object().
    https://dev.twitter.com/docs/streaming-apis/messages#Events_event

    Shares (ie retweets) are fetched with a separate API call per tweet:
    https://dev.twitter.com/docs/api/1.1/get/statuses/retweets/%3Aid

    However, retweets are only fetched for the first 15 tweets that have them,
    since that's Twitter's rate limit per 15 minute window. :(
    https://dev.twitter.com/docs/rate-limiting/1.1/limits

    Quote tweets are fetched by searching for the possibly quoted tweet's ID,
    using the OR operator to search up to 5 IDs at a time, and then checking
    the quoted_status_id_str field
    https://dev.twitter.com/overview/api/tweets#quoted_status_id_str

    Use the group_id @self to retrieve a user_id’s timeline. If user_id is None
    or @me, it will return tweets for the current API user.

    group_id can be used to specify the slug of a list for which to return tweets.
    By default the current API user’s lists will be used, but lists owned by other
    users can be fetched by explicitly passing a username to user_id, e.g. to
    fetch tweets from the list @exampleuser/example-list you would call
    get_activities(user_id='exampleuser', group_id='example-list').

    Twitter replies default to including a mention of the user they're replying
    to, which overloads mentions a bit. When fetch_shares is True, we determine
    that a tweet mentions the current user if it @-mentions their username and:

    * it's not a reply, OR
    * it's a reply, but not to the current user, AND
      * the tweet it's replying to doesn't @-mention the current user
    """
    if group_id is None:
      group_id = source.FRIENDS

    # nested function for lazily fetching the user object if we need it
    user = []
    def _user():
      if not user:
        user.append(self.urlopen(API_USER % user_id if user_id else API_CURRENT_USER))
      return user[0]

    if count:
      count += start_index

    activities = []
    if activity_id:
      tweets = [self.urlopen(API_STATUS % activity_id)]
      total_count = len(tweets)
    else:
      if group_id == source.SELF:
        if user_id in (None, source.ME):
          user_id = ''
        url = API_USER_TIMELINE % {
          'count': count,
          'screen_name': user_id,
        }

        if fetch_likes:
          liked = self.urlopen(API_FAVORITES % user_id)
          if liked:
            activities += [self._make_like(tweet, _user()) for tweet in liked]
      elif group_id == source.SEARCH:
        url = API_SEARCH % {
          'q': urllib.quote_plus(search_query.encode('utf-8')),
          'count': count,
        }
      elif group_id in (source.FRIENDS, source.ALL):
        url = API_TIMELINE % (count)
      else:
        if not user_id:
          user_id = _user().get('screen_name')
        url = API_LIST_TIMELINE % {
          'count': count,
          'slug': group_id,
          'owner_screen_name': user_id,
        }

      headers = {'If-None-Match': etag} if etag else {}
      total_count = None
      try:
        resp = self.urlopen(url, headers=headers, parse_response=False)
        etag = resp.info().get('ETag')
        tweet_obj = source.load_json(resp.read(), url)
        if group_id == source.SEARCH:
          tweet_obj = tweet_obj.get('statuses', [])
        tweets = tweet_obj[start_index:]
      except urllib2.HTTPError, e:
        if e.code == 304:  # Not Modified, from a matching ETag
          tweets = []
        else:
          raise

    # batch get memcached counts of favorites and retweets for all tweets
    cached = {}
    if cache is not None:
      keys = itertools.product(('ATR', 'ATF'), [t['id_str'] for t in tweets])
      cached = cache.get_multi('%s %s' % (prefix, id) for prefix, id in keys)
    # only update the cache at the end, in case we hit an error before then
    cache_updates = {}

    if fetch_shares:
      retweet_calls = 0
      for tweet in tweets:
        # don't fetch retweets the tweet is itself a retweet or if the
        # author's account is protected. /statuses/retweets 403s with error
        # code 200 (?!) for protected accounts.
        # https://github.com/snarfed/bridgy/issues/688
        if tweet.get('retweeted') or tweet.get('user', {}).get('protected'):
          continue
        elif retweet_calls >= RETWEET_LIMIT:
          logging.warning("Hit Twitter's retweet rate limit (%d) with more to "
                          "fetch! Results will be incomplete!" % RETWEET_LIMIT)
          break

        # store retweets in the 'retweets' field, which is handled by
        # tweet_to_activity().
        # TODO: make these HTTP requests asynchronous. not easy since we don't
        # (yet) require threading support or use a non-blocking HTTP library.
        #
        # twitter limits this API endpoint to one call per minute per user,
        # which is easy to hit, so we stop before we hit that.
        # https://dev.twitter.com/docs/rate-limiting/1.1/limits
        #
        # can't use the statuses/retweets_of_me endpoint because it only
        # returns the original tweets, not the retweets or their authors.
        id = tweet['id_str']
        count = tweet.get('retweet_count')
        if count and count != cached.get('ATR ' + id):
          url = API_RETWEETS % id
          if min_id is not None:
            url = util.add_query_params(url, {'since_id': min_id})

          try:
            tweet['retweets'] = self.urlopen(url)
          except urllib2.URLError, e:
            code, _ = util.interpret_http_exception(e)
            if code != '404':  # 404 means the original tweet was deleted
              raise

          retweet_calls += 1
          cache_updates['ATR ' + id] = count
Beispiel #23
0
  def get_activities_response(self, user_id=None, group_id=None, app_id=None,
                              activity_id=None, start_index=0, count=0,
                              etag=None, min_id=None, cache=None,
                              fetch_replies=False, fetch_likes=False,
                              fetch_shares=False, fetch_events=False,
                              fetch_mentions=False, search_query=None):
    """Fetches posts and converts them to ActivityStreams activities.

    See method docstring in source.py for details. app_id is ignored.
    Supports min_id, but not ETag, since Instagram doesn't support it.

    http://instagram.com/developer/endpoints/users/#get_users_feed
    http://instagram.com/developer/endpoints/users/#get_users_media_recent

    Likes are always included, regardless of the fetch_likes kwarg. They come
    bundled in the 'likes' field of the API Media object:
    http://instagram.com/developer/endpoints/media/#

    Mentions are never fetched or included because the API doesn't support
    searching for them.
    https://github.com/snarfed/bridgy/issues/523#issuecomment-155523875

    Shares are never fetched included since there is no share feature.

    Instagram only supports search over hashtags, so if search_query is set, it
    must begin with #.

    Raises: InstagramAPIError
    """
    if user_id is None:
      user_id = 'self'
    if group_id is None:
      group_id = source.FRIENDS

    if search_query:
      if search_query.startswith('#'):
        search_query = search_query[1:]
      else:
        raise NotImplementedError(
          'Instagram only supports search over hashtags, so search_query must '
          'begin with the # character.')

    # TODO: paging
    media = []
    kwargs = {}
    if min_id is not None:
      kwargs['min_id'] = min_id

    activities = []
    try:
      media_url = (API_MEDIA_URL % activity_id if activity_id else
                   API_USER_MEDIA_URL % user_id if group_id == source.SELF else
                   API_MEDIA_POPULAR_URL if group_id == source.ALL else
                   API_MEDIA_SEARCH_URL % search_query if group_id == source.SEARCH else
                   API_USER_FEED_URL if group_id == source.FRIENDS else None)
      assert media_url
      media = self.urlopen(util.add_query_params(media_url, kwargs))
      if media:
        if activity_id:
          media = [media]
        activities += [self.media_to_activity(m) for m in util.trim_nulls(media)]

      if group_id == source.SELF and fetch_likes:
        # add the user's own likes
        liked = self.urlopen(
          util.add_query_params(API_USER_LIKES_URL % user_id, kwargs))
        if liked:
          user = self.urlopen(API_USER_URL % user_id)
          activities += [self.like_to_object(user, l['id'], l['link'])
                         for l in liked]

    except urllib2.HTTPError, e:
      code, body = util.interpret_http_exception(e)
      # instagram api should give us back a json block describing the
      # error. but if it's an error for some other reason, it probably won't
      # be properly formatted json.
      try:
        body_obj = json.loads(body) if body else {}
      except ValueError:
        body_obj = {}

      if body_obj.get('meta', {}).get('error_type') == 'APINotFoundError':
        logging.exception(body_obj.get('meta', {}).get('error_message'))
      else:
        raise e
Beispiel #24
0
      # fetch mentions *after* replies so that we don't get replies to mentions
      # https://github.com/snarfed/bridgy/issues/631
      mentions = self.fetch_mentions(_user().get('screen_name'), tweets,
                                     min_id=min_id)
      tweet_activities += [self.tweet_to_activity(m) for m in mentions]

    if fetch_likes:
      for tweet, activity in zip(tweets, tweet_activities):
        id = tweet['id_str']
        count = tweet.get('favorite_count')
        if self.is_public(activity) and count and count != cached.get('ATF ' + id):
          url = HTML_FAVORITES % id
          try:
            html = json.loads(util.urlopen(url).read()).get('htmlUsers', '')
          except urllib2.URLError, e:
            util.interpret_http_exception(e)  # just log it
            continue
          likes = self.favorites_html_to_likes(tweet, html)
          activity['object'].setdefault('tags', []).extend(likes)
          cache_updates['ATF ' + id] = count

    activities += tweet_activities
    response = self.make_activities_base_response(activities)
    response.update({'total_count': total_count, 'etag': etag})
    if cache_updates and cache is not None:
      cache.set_multi(cache_updates)
    return response

  def fetch_replies(self, activities, min_id=None):
    """Fetches and injects Twitter replies into a list of activities, in place.
Beispiel #25
0
def send_webmentions(activity_wrapped, proxy=None, **response_props):
    """Sends webmentions for an incoming Salmon slap or ActivityPub inbox delivery.
    Args:
      activity_wrapped: dict, AS1 activity
      response_props: passed through to the newly created Responses
    """
    activity = redirect_unwrap(activity_wrapped)

    verb = activity.get('verb')
    if verb and verb not in SUPPORTED_VERBS:
        error(f'{verb} activities are not supported yet.')

    # extract source and targets
    source = activity.get('url') or activity.get('id')
    obj = activity.get('object')
    obj_url = util.get_url(obj)

    targets = util.get_list(activity, 'inReplyTo')
    if isinstance(obj, dict):
        if not source or verb in ('create', 'post', 'update'):
            source = obj_url or obj.get('id')
        targets.extend(util.get_list(obj, 'inReplyTo'))

    tags = util.get_list(activity_wrapped, 'tags')
    obj_wrapped = activity_wrapped.get('object')
    if isinstance(obj_wrapped, dict):
        tags.extend(util.get_list(obj_wrapped, 'tags'))
    for tag in tags:
        if tag.get('objectType') == 'mention':
            url = tag.get('url')
            if url and url.startswith(request.host_url):
                targets.append(redirect_unwrap(url))

    if verb in ('follow', 'like', 'share'):
        targets.append(obj_url)

    targets = util.dedupe_urls(util.get_url(t) for t in targets)
    if not source:
        error("Couldn't find original post URL")
    if not targets:
        error(
            "Couldn't find any target URLs in inReplyTo, object, or mention tags"
        )

    # send webmentions and store Responses
    errors = []  # stores (code, body) tuples
    for target in targets:
        if util.domain_from_link(target) == util.domain_from_link(source):
            logging.info(
                f'Skipping same-domain webmention from {source} to {target}')
            continue

        response = Response(source=source,
                            target=target,
                            direction='in',
                            **response_props)
        response.put()
        wm_source = (response.proxy_url() if
                     verb in ('follow', 'like', 'share') or proxy else source)
        logging.info(f'Sending webmention from {wm_source} to {target}')

        try:
            endpoint = webmention.discover(target, headers=HEADERS).endpoint
            if endpoint:
                webmention.send(endpoint, wm_source, target, headers=HEADERS)
                response.status = 'complete'
                logging.info('Success!')
            else:
                response.status = 'ignored'
                logging.info('Ignoring.')
        except BaseException as e:
            errors.append(util.interpret_http_exception(e))
        response.put()

    if errors:
        msg = 'Errors: ' + ', '.join(f'{code} {body}' for code, body in errors)
        error(msg, status=int(errors[0][0] or 502))
Beispiel #26
0
    def get(self):
        cookie = 'sessionid=%s' % urllib.parse.quote(
            util.get_required_param(self, 'sessionid').encode('utf-8'))
        logging.info('Fetching with Cookie: %s', cookie)

        host_url = self.request.host_url + '/'
        ig = instagram.Instagram()
        try:
            resp = ig.get_activities_response(group_id=source.FRIENDS,
                                              scrape=True,
                                              cookie=cookie)
        except Exception as e:
            status, text = util.interpret_http_exception(e)
            if status in ('403', ):
                self.response.headers['Content-Type'] = 'application/atom+xml'
                self.response.out.write(
                    atom.activities_to_atom([{
                        'object': {
                            'url':
                            self.request.url,
                            'content':
                            'Your instagram-atom cookie isn\'t working. <a href="%s">Click here to regenerate your feed!</a>'
                            % host_url,
                        },
                    }], {},
                                            title='instagram-atom',
                                            host_url=host_url,
                                            request_url=self.request.path_url))
                return
            elif status == '401':
                # IG returns 401 sometimes as a form of rate limiting or bot detection
                self.response.status = '429'
            elif status:
                self.response.status = status
            else:
                logging.exception('oops!')
                self.response.status = 500

            self.response.text = text or 'Unknown error.'
            return

        actor = resp.get('actor')
        if actor:
            logging.info('Logged in as %s (%s)', actor.get('username'),
                         actor.get('displayName'))
        else:
            logging.warning("Couldn't determine Instagram user!")

        activities = resp.get('items', [])
        format = self.request.get('format', 'atom')
        if format == 'atom':
            title = 'instagram-atom feed for %s' % ig.actor_name(actor)
            self.response.headers['Content-Type'] = 'application/atom+xml'
            self.response.out.write(
                atom.activities_to_atom(activities,
                                        actor,
                                        title=title,
                                        host_url=host_url,
                                        request_url=self.request.path_url,
                                        xml_base='https://www.instagram.com/'))
        elif format == 'html':
            self.response.headers['Content-Type'] = 'text/html'
            self.response.out.write(
                microformats2.activities_to_html(activities))
        else:
            self.abort(400,
                       'format must be either atom or html; got %s' % format)