Exemple #1
0
  def test_update_scheme(self):
    for orig in 'http', 'https':
      for new in 'http', 'https':
        self.handler.request.scheme = new
        updated = util.update_scheme(orig + '://foo', self.handler)
        self.assertEqual(new + '://foo', updated)

    self.handler.request.scheme = 'https'
    self.assertEqual(
      'https://distillery.s3.amazonaws.com/profiles/xyz.jpg',
      util.update_scheme('http://images.ak.instagram.com/profiles/xyz.jpg',
                         self.handler))
    self.assertEqual(
      'https://igcdn-photos-e-a.akamaihd.net/hphotos-ak-xpf1/123_a.jpg',
      util.update_scheme('http://photos-e.ak.instagram.com/hphotos-ak-xpf1/123_a.jpg',
                         self.handler))
Exemple #2
0
  def test_update_scheme(self):
    for orig in 'http', 'https':
      for new in 'http', 'https':
        self.handler.request.scheme = new
        updated = util.update_scheme(orig + '://foo', self.handler)
        self.assertEqual(new + '://foo', updated)

    self.handler.request.scheme = 'https'
    self.assertEqual(
      'https://distillery.s3.amazonaws.com/profiles/xyz.jpg',
      util.update_scheme('http://images.ak.instagram.com/profiles/xyz.jpg',
                         self.handler))
    self.assertEqual(
      'https://igcdn-photos-e-a.akamaihd.net/hphotos-ak-xpf1/123_a.jpg',
      util.update_scheme('http://photos-e.ak.instagram.com/hphotos-ak-xpf1/123_a.jpg',
                         self.handler))
Exemple #3
0
  def template_vars(self):
    vars = super(UserHandler, self).template_vars()
    vars.update({
        'source': self.source,
        'EPOCH': util.EPOCH,
        'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
        'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
        })
    if not self.source:
      return vars

    if isinstance(self.source, instagram.Instagram):
      auth = self.source.auth_entity
      vars['indieauth_me'] = (
        auth.id if isinstance(auth, indieauth.IndieAuth)
        else self.source.domain_urls[0] if self.source.domain_urls
        else None)

    # Blog webmention promos
    if 'webmention' not in self.source.features:
      if self.source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'):
        vars[self.source.SHORT_NAME + '_promo'] = True
      else:
        for domain in self.source.domains:
          if ('.blogspot.' in domain and  # Blogger uses country TLDs
              not Blogger.query(Blogger.domains == domain).get()):
            vars['blogger_promo'] = True
          elif (domain.endswith('tumblr.com') and
                not Tumblr.query(Tumblr.domains == domain).get()):
            vars['tumblr_promo'] = True
          elif (domain.endswith('wordpress.com') and
                not WordPress.query(WordPress.domains == domain).get()):
            vars['wordpress_promo'] = True

    # Responses
    if 'listen' in self.source.features:
      vars['responses'] = []
      query = Response.query().filter(Response.source == self.source.key)

      # if there's a paging param (responses_before or responses_after), update
      # query with it
      def get_paging_param(param):
        val = self.request.get(param)
        try:
          return util.parse_iso8601(val) if val else None
        except:
          msg = "Couldn't parse %s %r as ISO8601" % (param, val)
          logging.exception(msg)
          self.abort(400, msg)

      before = get_paging_param('responses_before')
      after = get_paging_param('responses_after')
      if before and after:
        self.abort(400, "can't handle both responses_before and responses_after")
      elif after:
        query = query.filter(Response.updated > after).order(Response.updated)
      elif before:
        query = query.filter(Response.updated < before).order(-Response.updated)
      else:
        query = query.order(-Response.updated)

      query_iter = query.iter()
      for i, r in enumerate(query_iter):
        r.response = json.loads(r.response_json)
        r.activities = [json.loads(a) for a in r.activities_json]

        if (not self.source.is_activity_public(r.response) or
            not all(self.source.is_activity_public(a) for a in r.activities)):
          continue
        elif r.type == 'post':
          r.activities = []

        r.actor = r.response.get('author') or r.response.get('actor', {})

        for a in r.activities + [r.response]:
          if not a.get('content'):
            a['content'] = a.get('object', {}).get('content')

        if not r.response.get('content'):
          phrases = {
            'like': 'liked this',
            'repost': 'reposted this',
            'rsvp-yes': 'is attending',
            'rsvp-no': 'is not attending',
            'rsvp-maybe': 'might attend',
            'rsvp-interested': 'is interested',
            'invite': 'is invited',
          }
          r.response['content'] = '%s %s.' % (
            r.actor.get('displayName') or '',
            phrases.get(r.type) or phrases.get(r.response.get('verb')))

        # convert image URL to https if we're serving over SSL
        image_url = r.actor.setdefault('image', {}).get('url')
        if image_url:
          r.actor['image']['url'] = util.update_scheme(image_url, self)

        # generate original post links
        r.links = self.process_webmention_links(r)
        r.original_links = [util.pretty_link(url, new_tab=True)
                            for url in r.original_posts]

        vars['responses'].append(r)
        if len(vars['responses']) >= 10 or i > 200:
          break

      vars['responses'].sort(key=lambda r: r.updated, reverse=True)

      # calculate new paging param(s)
      new_after = (
        before if before else
        vars['responses'][0].updated if
          vars['responses'] and query_iter.probably_has_next() and (before or after)
        else None)
      if new_after:
        vars['responses_after_link'] = ('?responses_after=%s#responses' %
                                         new_after.isoformat())

      new_before = (
        after if after else
        vars['responses'][-1].updated if
          vars['responses'] and query_iter.probably_has_next()
        else None)
      if new_before:
        vars['responses_before_link'] = ('?responses_before=%s#responses' %
                                         new_before.isoformat())

      vars['next_poll'] = max(
        self.source.last_poll_attempt + self.source.poll_period(),
        # lower bound is 1 minute from now
        util.now_fn() + datetime.timedelta(seconds=90))

    # Publishes
    if 'publish' in self.source.features:
      publishes = Publish.query().filter(Publish.source == self.source.key)\
                                 .order(-Publish.updated)\
                                 .fetch(10)
      for p in publishes:
        p.pretty_page = util.pretty_link(
          p.key.parent().id().decode('utf-8'),
          attrs={'class': 'original-post u-url u-name'},
          new_tab=True)

      vars['publishes'] = publishes

    if 'webmention' in self.source.features:
      # Blog posts
      blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\
                                  .order(-BlogPost.created)\
                                  .fetch(10)
      for b in blogposts:
        b.links = self.process_webmention_links(b)
        try:
          text = b.feed_item.get('title')
        except ValueError:
          text = None
        b.pretty_url = util.pretty_link(
          b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'},
          max_length=40, new_tab=True)

      # Blog webmentions
      webmentions = BlogWebmention.query()\
          .filter(BlogWebmention.source == self.source.key)\
          .order(-BlogWebmention.updated)\
          .fetch(10)
      for w in webmentions:
        w.pretty_source = util.pretty_link(
          w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
        try:
          target_is_source = (urlparse.urlparse(w.target_url()).netloc in
                              self.source.domains)
        except BaseException:
          target_is_source = False
        w.pretty_target = util.pretty_link(
          w.target_url(), attrs={'class': 'original-post'}, new_tab=True,
          keep_host=target_is_source)

      vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return vars
Exemple #4
0
  def template_vars(self):
    vars = super(UserHandler, self).template_vars()
    vars.update({
        'source': self.source,
        'EPOCH': util.EPOCH,
        'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
        'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
        })
    if not self.source:
      return vars

    if isinstance(self.source, instagram.Instagram):
      auth = self.source.auth_entity
      vars['indieauth_me'] = (
        auth.id if isinstance(auth, indieauth.IndieAuth)
        else self.source.domain_urls[0] if self.source.domain_urls
        else None)

    # Blog webmention promos
    if 'webmention' not in self.source.features:
      if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'):
        vars[self.source.SHORT_NAME + '_promo'] = True
      else:
        for domain in self.source.domains:
          if ('.blogspot.' in domain and  # Blogger uses country TLDs
              not Blogger.query(Blogger.domains == domain).get()):
            vars['blogger_promo'] = True
          elif (domain.endswith('tumblr.com') and
                not Tumblr.query(Tumblr.domains == domain).get()):
            vars['tumblr_promo'] = True
          elif (domain.endswith('wordpress.com') and
                not WordPress.query(WordPress.domains == domain).get()):
            vars['wordpress_promo'] = True

    # Responses
    if 'listen' in self.source.features:
      vars['responses'] = []
      query = Response.query().filter(Response.source == self.source.key)

      # if there's a paging param (responses_before or responses_after), update
      # query with it
      def get_paging_param(param):
        val = self.request.get(param)
        try:
          return util.parse_iso8601(val) if val else None
        except:
          msg = "Couldn't parse %s %r as ISO8601" % (param, val)
          logging.exception(msg)
          self.abort(400, msg)

      before = get_paging_param('responses_before')
      after = get_paging_param('responses_after')
      if before and after:
        self.abort(400, "can't handle both responses_before and responses_after")
      elif after:
        query = query.filter(Response.updated > after).order(Response.updated)
      elif before:
        query = query.filter(Response.updated < before).order(-Response.updated)
      else:
        query = query.order(-Response.updated)

      query_iter = query.iter()
      for i, r in enumerate(query_iter):
        r.response = json.loads(r.response_json)
        r.activities = [json.loads(a) for a in r.activities_json]

        if (not self.source.is_activity_public(r.response) or
            not all(self.source.is_activity_public(a) for a in r.activities)):
          continue
        elif r.type == 'post':
          r.activities = []

        r.actor = r.response.get('author') or r.response.get('actor', {})

        for a in r.activities + [r.response]:
          if not a.get('content'):
            a['content'] = a.get('object', {}).get('content')

        if not r.response.get('content'):
          phrases = {
            'like': 'liked this',
            'repost': 'reposted this',
            'rsvp-yes': 'is attending',
            'rsvp-no': 'is not attending',
            'rsvp-maybe': 'might attend',
            'rsvp-interested': 'is interested',
            'invite': 'is invited',
          }
          r.response['content'] = '%s %s.' % (
            r.actor.get('displayName') or '',
            phrases.get(r.type) or phrases.get(r.response.get('verb')))

        # convert image URL to https if we're serving over SSL
        image_url = r.actor.setdefault('image', {}).get('url')
        if image_url:
          r.actor['image']['url'] = util.update_scheme(image_url, self)

        # generate original post links
        r.links = self.process_webmention_links(r)
        r.original_links = [util.pretty_link(url, new_tab=True)
                            for url in r.original_posts]

        vars['responses'].append(r)
        if len(vars['responses']) >= 10 or i > 200:
          break

      vars['responses'].sort(key=lambda r: r.updated, reverse=True)

      # calculate new paging param(s)
      new_after = (
        before if before else
        vars['responses'][0].updated if
          vars['responses'] and query_iter.probably_has_next() and (before or after)
        else None)
      if new_after:
        vars['responses_after_link'] = ('?responses_after=%s#responses' %
                                         new_after.isoformat())

      new_before = (
        after if after else
        vars['responses'][-1].updated if
          vars['responses'] and query_iter.probably_has_next()
        else None)
      if new_before:
        vars['responses_before_link'] = ('?responses_before=%s#responses' %
                                         new_before.isoformat())

      vars['next_poll'] = max(
        self.source.last_poll_attempt + self.source.poll_period(),
        # lower bound is 1 minute from now
        util.now_fn() + datetime.timedelta(seconds=90))

    # Publishes
    if 'publish' in self.source.features:
      publishes = Publish.query().filter(Publish.source == self.source.key)\
                                 .order(-Publish.updated)\
                                 .fetch(10)
      for p in publishes:
        p.pretty_page = util.pretty_link(
          p.key.parent().id(), attrs={'class': 'original-post u-url u-name'},
          new_tab=True)

      vars['publishes'] = publishes

    if 'webmention' in self.source.features:
      # Blog posts
      blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\
                                  .order(-BlogPost.created)\
                                  .fetch(10)
      for b in blogposts:
        b.links = self.process_webmention_links(b)
        try:
          text = b.feed_item.get('title')
        except ValueError:
          text = None
        b.pretty_url = util.pretty_link(
          b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'},
          max_length=40, new_tab=True)

      # Blog webmentions
      webmentions = BlogWebmention.query()\
          .filter(BlogWebmention.source == self.source.key)\
          .order(-BlogWebmention.updated)\
          .fetch(10)
      for w in webmentions:
        w.pretty_source = util.pretty_link(
          w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
        try:
          target_is_source = (urlparse.urlparse(w.target_url()).netloc in
                              self.source.domains)
        except BaseException:
          target_is_source = False
        w.pretty_target = util.pretty_link(
          w.target_url(), attrs={'class': 'original-post'}, new_tab=True,
          keep_host=target_is_source)

      vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return vars
Exemple #5
0
    def get(self, type, source_short_name, string_id, *ids):
        source_cls = models.sources.get(source_short_name)
        if not source_cls:
            self.abort(
                400, "Source type '%s' not found. Known sources: %s" %
                (source_short_name, filter(None, models.sources.keys())))

        self.source = source_cls.get_by_id(string_id)
        if not self.source:
            self.abort(
                400, 'Source %s %s not found' % (source_short_name, string_id))
        elif (self.source.status == 'disabled'
              or ('listen' not in self.source.features
                  and 'email' not in self.source.features)):
            self.abort(
                400, 'Source %s is disabled for backfeed' %
                self.source.bridgy_path())

        format = self.request.get('format', 'html')
        if format not in ('html', 'json'):
            self.abort(400,
                       'Invalid format %s, expected html or json' % format)

        for id in ids:
            if not self.VALID_ID.match(id):
                self.abort(404, 'Invalid id %s' % id)

        label = '%s:%s %s %s' % (source_short_name, string_id, type, ids)
        cache_key = 'H ' + label
        obj = memcache.get(cache_key)
        if obj and not appengine_config.DEBUG:
            logging.info('Using cached object for %s', label)
        else:
            logging.info('Fetching %s', label)
            try:
                obj = self.get_item(*ids)
            except models.DisableSource as e:
                self.abort(
                    401,
                    "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!"
                )
            except ValueError as e:
                self.abort(400,
                           '%s error:\n%s' % (self.source.GR_CLASS.NAME, e))
            except Exception as e:
                # pass through all API HTTP errors if we can identify them
                code, body = util.interpret_http_exception(e)
                # temporary, trying to debug a flaky test failure
                # eg https://circleci.com/gh/snarfed/bridgy/769
                if code:
                    self.response.status_int = int(code)
                    self.response.headers['Content-Type'] = 'text/plain'
                    self.response.write('%s error:\n%s' %
                                        (self.source.GR_CLASS.NAME, body))
                    return
                else:
                    raise
            memcache.set(cache_key, obj, time=CACHE_TIME)

        if not obj:
            self.abort(404, label)

        if self.source.is_blocked(obj):
            self.abort(410, 'That user is currently blocked')

        # use https for profile pictures so we don't cause SSL mixed mode errors
        # when serving over https.
        author = obj.get('author', {})
        image = author.get('image', {})
        url = image.get('url')
        if url:
            image['url'] = util.update_scheme(url, self)

        mf2_json = microformats2.object_to_json(obj, synthesize_content=False)

        # try to include the author's silo profile url
        author = first_props(mf2_json.get('properties', {})).get('author', {})
        author_uid = first_props(author.get('properties', {})).get('uid', '')
        if author_uid:
            parsed = util.parse_tag_uri(author_uid)
            if parsed:
                silo_url = self.source.gr_source.user_url(parsed[1])
                urls = author.get('properties', {}).setdefault('url', [])
                if silo_url not in microformats2.get_string_urls(urls):
                    urls.append(silo_url)

        # write the response!
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        if format == 'html':
            self.response.headers['Content-Type'] = 'text/html; charset=utf-8'
            url = obj.get('url', '')
            self.response.out.write(
                TEMPLATE.substitute({
                    'refresh':
                    (('<meta http-equiv="refresh" content="0;url=%s">' %
                      url) if url else ''),
                    'url':
                    url,
                    'body':
                    microformats2.json_to_html(mf2_json),
                    'title':
                    self.get_title(obj),
                }))
        elif format == 'json':
            self.response.headers[
                'Content-Type'] = 'application/json; charset=utf-8'
            self.response.out.write(json.dumps(mf2_json, indent=2))
Exemple #6
0
  def get(self, type, source_short_name, string_id, *ids):
    source_cls = models.sources.get(source_short_name)
    if not source_cls:
      self.abort(400, "Source type '%s' not found. Known sources: %s" %
                 (source_short_name, filter(None, models.sources.keys())))

    self.source = source_cls.get_by_id(string_id)
    if not self.source:
      self.abort(400, 'Source %s %s not found' % (source_short_name, string_id))
    elif self.source.status == 'disabled' or 'listen' not in self.source.features:
      self.abort(400, 'Source %s is disabled for backfeed' % self.source.bridgy_path())

    format = self.request.get('format', 'html')
    if format not in ('html', 'json'):
      self.abort(400, 'Invalid format %s, expected html or json' % format)

    for id in ids:
      if not self.VALID_ID.match(id):
        self.abort(404, 'Invalid id %s' % id)

    label = '%s:%s %s %s' % (source_short_name, string_id, type, ids)
    cache_key = 'H ' + label
    obj = memcache.get(cache_key)
    if obj:
      logging.info('Using cached object for %s', label)
    else:
      logging.info('Fetching %s', label)
      try:
        obj = self.get_item(*ids)
      except models.DisableSource as e:
        self.abort(401, "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!")
      except Exception as e:
        # pass through all API HTTP errors if we can identify them
        code, body = util.interpret_http_exception(e)
        if not code and util.is_connection_failure(e):
          code = 503
          body = str(e)
        if code:
          self.response.status_int = int(code)
          self.response.headers['Content-Type'] = 'text/plain'
          self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body))
          return
        else:
          raise
      memcache.set(cache_key, obj, time=CACHE_TIME)

    if not obj:
      self.abort(404, label)

    # use https for profile pictures so we don't cause SSL mixed mode errors
    # when serving over https.
    author = obj.get('author', {})
    image = author.get('image', {})
    url = image.get('url')
    if url:
      image['url'] = util.update_scheme(url, self)

    mf2_json = microformats2.object_to_json(obj, synthesize_content=False)

    # try to include the author's silo profile url
    author = first_props(mf2_json.get('properties', {})).get('author', {})
    author_uid = first_props(author.get('properties', {})).get('uid', '')
    if author_uid:
      parsed = util.parse_tag_uri(author_uid)
      if parsed:
        silo_url = self.source.gr_source.user_url(parsed[1])
        urls = author.get('properties', {}).setdefault('url', [])
        if silo_url not in microformats2.get_string_urls(urls):
          urls.append(silo_url)

    # write the response!
    self.response.headers['Access-Control-Allow-Origin'] = '*'
    if format == 'html':
      self.response.headers['Content-Type'] = 'text/html; charset=utf-8'
      self.response.out.write(TEMPLATE.substitute({
            'url': obj.get('url', ''),
            'body': microformats2.json_to_html(mf2_json),
            'title': self.get_title(obj),
            }))
    elif format == 'json':
      self.response.headers['Content-Type'] = 'application/json; charset=utf-8'
      self.response.out.write(json.dumps(mf2_json, indent=2))
Exemple #7
0
  def template_vars(self):
    if not self.source:
      return {}

    vars = super(UserHandler, self).template_vars()
    vars.update({
        'source': self.source,
        'epoch': util.EPOCH,
        })

    # Blog webmention promos
    if 'webmention' not in self.source.features:
      if self.source.SHORT_NAME in ('blogger', 'tumblr', 'wordpress'):
        vars[self.source.SHORT_NAME + '_promo'] = True
      else:
        for domain in self.source.domains:
          if ('.blogspot.' in domain and  # Blogger uses country TLDs
              not Blogger.query(Blogger.domains == domain).get()):
            vars['blogger_promo'] = True
          elif (domain.endswith('tumblr.com') and
                not Tumblr.query(Tumblr.domains == domain).get()):
            vars['tumblr_promo'] = True
          elif (domain.endswith('wordpress.com') and
                not WordPress.query(WordPress.domains == domain).get()):
            vars['wordpress_promo'] = True

    # Responses
    if 'listen' in self.source.features:
      vars['responses'] = []
      for i, r in enumerate(Response.query()
                              .filter(Response.source == self.source.key)\
                              .order(-Response.updated)):
        r.response = json.loads(r.response_json)
        if r.activity_json:  # handle old entities
          r.activities_json.append(r.activity_json)
        r.activities = [json.loads(a) for a in r.activities_json]

        if (not gr_source.Source.is_public(r.response) or
            not all(gr_source.Source.is_public(a) for a in r.activities)):
          continue

        r.actor = r.response.get('author') or r.response.get('actor', {})
        if not r.response.get('content'):
          phrases = {
            'like': 'liked this',
            'repost': 'reposted this',
            'rsvp-yes': 'is attending',
            'rsvp-no': 'is not attending',
            'rsvp-maybe': 'might attend',
            'invite': 'is invited',
          }
          r.response['content'] = '%s %s.' % (
            r.actor.get('displayName') or '',
            phrases.get(r.type) or phrases.get(r.response.get('verb')))

        # convert image URL to https if we're serving over SSL
        image_url = r.actor.setdefault('image', {}).get('url')
        if image_url:
          r.actor['image']['url'] = util.update_scheme(image_url, self)

        # generate original post links
        r.links = self.process_webmention_links(r)

        vars['responses'].append(r)
        if len(vars['responses']) >= 10 or i > 200:
          break

    # Publishes
    if 'publish' in self.source.features:
      publishes = Publish.query().filter(Publish.source == self.source.key)\
                                 .order(-Publish.updated)\
                                 .fetch(10)
      for p in publishes:
        p.pretty_page = util.pretty_link(
          p.key.parent().id(), a_class='original-post', new_tab=True)

      vars['publishes'] = publishes

    if 'webmention' in self.source.features:
      # Blog posts
      blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\
                                  .order(-BlogPost.created)\
                                  .fetch(10)
      for b in blogposts:
        b.links = self.process_webmention_links(b)
        try:
          text = b.feed_item.get('title')
        except ValueError:
          text = None
        b.pretty_url = util.pretty_link(b.key.id(), text=text,
                                        a_class='original-post', max_length=40,
                                        new_tab=True)

      # Blog webmentions
      webmentions = BlogWebmention.query()\
          .filter(BlogWebmention.source == self.source.key)\
          .order(-BlogWebmention.updated)\
          .fetch(10)
      for w in webmentions:
        w.pretty_source = util.pretty_link(w.source_url(), a_class='original-post',
                                           new_tab=True)
        try:
          target_is_source = (urlparse.urlparse(w.target_url()).netloc in
                              self.source.domains)
        except BaseException:
          target_is_source = False
        w.pretty_target = util.pretty_link(w.target_url(), a_class='original-post',
                                           new_tab=True, keep_host=target_is_source)

      vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return vars
Exemple #8
0
class ItemHandler(webapp2.RequestHandler):
    """Fetches a post, repost, like, or comment and serves it as mf2 HTML or JSON.
  """
    handle_exception = handlers.handle_exception
    source = None

    VALID_ID = re.compile(r'^[\w.+:@-]+$')

    def head(self, *args):
        """Return an empty 200 with no caching directives."""

    def get_item(self, id):
        """Fetches and returns an object from the given source.

    To be implemented by subclasses.

    Args:
      source: bridgy.Source subclass
      id: string

    Returns: ActivityStreams object dict
    """
        raise NotImplementedError()

    def get_title(self, obj):
        """Returns the string to be used in the <title> tag.

    Args:
      obj: ActivityStreams object
    """
        return obj.get('title') or obj.get('content') or 'Bridgy Response'

    def get_post(self, id, **kwargs):
        """Fetch a post.

    Args:
      id: string, site-specific post id
      is_event: bool
      kwargs: passed through to get_activities

    Returns: ActivityStreams object dict
    """
        try:
            posts = self.source.get_activities(activity_id=id,
                                               user_id=self.source.key.id(),
                                               **kwargs)
            if posts:
                return posts[0]
            logging.warning('Source post %s not found', id)
        except Exception as e:
            util.interpret_http_exception(e)

    def get(self, type, source_short_name, string_id, *ids):
        source_cls = models.sources.get(source_short_name)
        if not source_cls:
            self.abort(
                400, "Source type '%s' not found. Known sources: %s" %
                (source_short_name, filter(None, models.sources.keys())))

        self.source = source_cls.get_by_id(string_id)
        if not self.source:
            self.abort(
                400, 'Source %s %s not found' % (source_short_name, string_id))

        format = self.request.get('format', 'html')
        if format not in ('html', 'json'):
            self.abort(400,
                       'Invalid format %s, expected html or json' % format)

        for id in ids:
            if not self.VALID_ID.match(id):
                self.abort(404, 'Invalid id %s' % id)

        label = '%s:%s %s %s' % (source_short_name, string_id, type, ids)
        cache_key = 'H ' + label
        obj = memcache.get(cache_key)
        if obj:
            logging.info('Using cached object for %s', label)
        else:
            logging.info('Fetching %s', label)
            try:
                obj = self.get_item(*ids)
            except Exception, e:
                # pass through all API HTTP errors if we can identify them
                code, body = util.interpret_http_exception(e)
                if not code and util.is_connection_failure(e):
                    code = 503
                    body = str(e)
                if code:
                    self.response.status_int = int(code)
                    self.response.headers['Content-Type'] = 'text/plain'
                    self.response.write('%s error:\n%s' %
                                        (self.source.GR_CLASS.NAME, body))
                    return
                else:
                    raise
            memcache.set(cache_key, obj, time=CACHE_TIME)

        if not obj:
            self.abort(404, label)

        # use https for profile pictures so we don't cause SSL mixed mode errors
        # when serving over https.
        author = obj.get('author', {})
        image = author.get('image', {})
        url = image.get('url')
        if url:
            image['url'] = util.update_scheme(url, self)

        mf2_json = microformats2.object_to_json(obj, synthesize_content=False)

        # try to include the author's silo profile url
        author = first_props(mf2_json.get('properties', {})).get('author', {})
        author_uid = first_props(author.get('properties', {})).get('uid', '')
        if author_uid:
            parsed = util.parse_tag_uri(author_uid)
            if parsed:
                silo_url = self.source.gr_source.user_url(parsed[1])
                urls = author.get('properties', {}).setdefault('url', [])
                if silo_url not in microformats2.get_string_urls(urls):
                    urls.append(silo_url)

        # write the response!
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        if format == 'html':
            self.response.headers['Content-Type'] = 'text/html; charset=utf-8'
            self.response.out.write(
                TEMPLATE.substitute({
                    'url':
                    obj.get('url', ''),
                    'body':
                    microformats2.json_to_html(mf2_json),
                    'title':
                    self.get_title(obj),
                }))
        elif format == 'json':
            self.response.headers[
                'Content-Type'] = 'application/json; charset=utf-8'
            self.response.out.write(json.dumps(mf2_json, indent=2))
Exemple #9
0
        self.response.headers['Content-Type'] = 'text/plain'
        self.response.write('%s error:\n%s' % (self.source.GR_CLASS.NAME, body))
        return
      else:
        raise

    if not obj:
      self.abort(404, label)

    # use https for profile pictures so we don't cause SSL mixed mode errors
    # when serving over https.
    author = obj.get('author', {})
    image = author.get('image', {})
    url = image.get('url')
    if url:
      image['url'] = util.update_scheme(url, self)

    mf2_json = microformats2.object_to_json(obj)

    # try to include the author's silo profile url
    author = first_props(mf2_json.get('properties', {})).get('author', {})
    author_uid = first_props(author.get('properties', {})).get('uid', '')
    if author_uid:
      parsed = util.parse_tag_uri(author_uid)
      if parsed:
        silo_url = self.source.gr_source.user_url(parsed[1])
        urls = author.get('properties', {}).setdefault('url', [])
        if silo_url not in microformats2.get_string_urls(urls):
          urls.append(silo_url)

    # write the response!
Exemple #10
0
    def dispatch_request(self, site, key_id, **kwargs):
        """Handle HTTP request."""
        source_cls = models.sources.get(site)
        if not source_cls:
            error(
                f"Source type '{site}' not found. Known sources: {[s for s in models.sources.keys() if s]}"
            )

        self.source = source_cls.get_by_id(key_id)
        if not self.source:
            error(f'Source {site} {key_id} not found')
        elif (self.source.status == 'disabled'
              or 'listen' not in self.source.features):
            error(
                f'Source {self.source.bridgy_path()} is disabled for backfeed')

        format = request.values.get('format', 'html')
        if format not in ('html', 'json'):
            error(f'Invalid format {format}, expected html or json')

        for id in kwargs.values():
            if not self.VALID_ID.match(id):
                error(f'Invalid id {id}', 404)

        try:
            obj = self.get_item(**kwargs)
        except models.DisableSource:
            error(
                "Bridgy's access to your account has expired. Please visit https://brid.gy/ to refresh it!",
                401)
        except ValueError as e:
            error(f'{self.source.GR_CLASS.NAME} error: {e}')

        if not obj:
            error(f'Not found: {site}:{key_id} {kwargs}', 404)

        if self.source.is_blocked(obj):
            error('That user is currently blocked', 410)

        # use https for profile pictures so we don't cause SSL mixed mode errors
        # when serving over https.
        author = obj.get('author', {})
        image = author.get('image', {})
        url = image.get('url')
        if url:
            image['url'] = util.update_scheme(url, request)

        mf2_json = microformats2.object_to_json(obj, synthesize_content=False)

        # try to include the author's silo profile url
        author = first_props(mf2_json.get('properties', {})).get('author', {})
        author_uid = first_props(author.get('properties', {})).get('uid', '')
        if author_uid:
            parsed = util.parse_tag_uri(author_uid)
            if parsed:
                urls = author.get('properties', {}).setdefault('url', [])
                try:
                    silo_url = self.source.gr_source.user_url(parsed[1])
                    if silo_url not in microformats2.get_string_urls(urls):
                        urls.append(silo_url)
                except NotImplementedError:  # from gr_source.user_url()
                    pass

        # write the response!
        if format == 'html':
            url = obj.get('url', '')
            return TEMPLATE.substitute({
                'refresh':
                (f'<meta http-equiv="refresh" content="0;url={url}">'
                 if url else ''),
                'url':
                url,
                'body':
                microformats2.json_to_html(mf2_json),
                'title':
                obj.get('title') or obj.get('content') or 'Bridgy Response',
            })
        elif format == 'json':
            return mf2_json
Exemple #11
0
def user(site, id):
    """View for a user page."""
    cls = models.sources.get(site)
    if not cls:
        return render_template('user_not_found.html'), 404

    source = cls.lookup(id)

    if not source:
        key = cls.query(
            ndb.OR(*[
                ndb.GenericProperty(prop) == id
                for prop in ('domains', 'inferred_username', 'name',
                             'username')
            ])).get(keys_only=True)
        if key:
            return redirect(cls(key=key).bridgy_path(), code=301)

    if not source or not source.features:
        return render_template('user_not_found.html'), 404

    source.verify()
    source = util.preprocess_source(source)

    vars = {
        'source': source,
        'logs': logs,
        'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
        'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
    }

    # Blog webmention promos
    if 'webmention' not in source.features:
        if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'):
            vars[source.SHORT_NAME + '_promo'] = True
        else:
            for domain in source.domains:
                if ('.blogspot.' in domain and  # Blogger uses country TLDs
                        not Blogger.query(Blogger.domains == domain).get()):
                    vars['blogger_promo'] = True
                elif (util.domain_or_parent_in(domain, ['tumblr.com'])
                      and not Tumblr.query(Tumblr.domains == domain).get()):
                    vars['tumblr_promo'] = True
                elif (util.domain_or_parent_in(domain, 'wordpress.com') and
                      not WordPress.query(WordPress.domains == domain).get()):
                    vars['wordpress_promo'] = True

    # Responses
    if 'listen' in source.features or 'email' in source.features:
        vars['responses'] = []
        query = Response.query().filter(Response.source == source.key)

        # if there's a paging param (responses_before or responses_after), update
        # query with it
        def get_paging_param(param):
            val = request.values.get(param)
            try:
                return util.parse_iso8601(val.replace(' ',
                                                      '+')) if val else None
            except BaseException:
                error(f"Couldn't parse {param}, {val!r} as ISO8601")

        before = get_paging_param('responses_before')
        after = get_paging_param('responses_after')
        if before and after:
            error("can't handle both responses_before and responses_after")
        elif after:
            query = query.filter(Response.updated > after).order(
                Response.updated)
        elif before:
            query = query.filter(
                Response.updated < before).order(-Response.updated)
        else:
            query = query.order(-Response.updated)

        query_iter = query.iter()
        for i, r in enumerate(query_iter):
            r.response = json_loads(r.response_json)
            r.activities = [json_loads(a) for a in r.activities_json]

            if (not source.is_activity_public(r.response) or not all(
                    source.is_activity_public(a) for a in r.activities)):
                continue
            elif r.type == 'post':
                r.activities = []

            verb = r.response.get('verb')
            r.actor = (r.response.get('object')
                       if verb == 'invite' else r.response.get('author')
                       or r.response.get('actor')) or {}

            activity_content = ''
            for a in r.activities + [r.response]:
                if not a.get('content'):
                    obj = a.get('object', {})
                    a['content'] = activity_content = (
                        obj.get('content') or obj.get('displayName') or
                        # historical, from a Reddit bug fixed in granary@4f9df7c
                        obj.get('name') or '')

            response_content = r.response.get('content')
            phrases = {
                'like': 'liked this',
                'repost': 'reposted this',
                'rsvp-yes': 'is attending',
                'rsvp-no': 'is not attending',
                'rsvp-maybe': 'might attend',
                'rsvp-interested': 'is interested',
                'invite': 'is invited',
            }
            phrase = phrases.get(r.type) or phrases.get(verb)
            if phrase and (r.type != 'repost'
                           or activity_content.startswith(response_content)):
                r.response[
                    'content'] = f'{r.actor.get("displayName") or ""} {phrase}.'

            # convert image URL to https if we're serving over SSL
            image_url = r.actor.setdefault('image', {}).get('url')
            if image_url:
                r.actor['image']['url'] = util.update_scheme(
                    image_url, request)

            # generate original post links
            r.links = process_webmention_links(r)
            r.original_links = [
                util.pretty_link(url, new_tab=True) for url in r.original_posts
            ]

            vars['responses'].append(r)
            if len(vars['responses']) >= 10 or i > 200:
                break

        vars['responses'].sort(key=lambda r: r.updated, reverse=True)

        # calculate new paging param(s)
        new_after = (before if before else vars['responses'][0].updated if
                     vars['responses'] and query_iter.probably_has_next() and
                     (before or after) else None)
        if new_after:
            vars[
                'responses_after_link'] = f'?responses_after={new_after.isoformat()}#responses'

        new_before = (after if after else
                      vars['responses'][-1].updated if vars['responses']
                      and query_iter.probably_has_next() else None)
        if new_before:
            vars[
                'responses_before_link'] = f'?responses_before={new_before.isoformat()}#responses'

        vars['next_poll'] = max(
            source.last_poll_attempt + source.poll_period(),
            # lower bound is 1 minute from now
            util.now_fn() + datetime.timedelta(seconds=90))

    # Publishes
    if 'publish' in source.features:
        publishes = Publish.query().filter(Publish.source == source.key)\
                                   .order(-Publish.updated)\
                                   .fetch(10)
        for p in publishes:
            p.pretty_page = util.pretty_link(
                p.key.parent().id(),
                attrs={'class': 'original-post u-url u-name'},
                new_tab=True)

        vars['publishes'] = publishes

    if 'webmention' in source.features:
        # Blog posts
        blogposts = BlogPost.query().filter(BlogPost.source == source.key)\
                                    .order(-BlogPost.created)\
                                    .fetch(10)
        for b in blogposts:
            b.links = process_webmention_links(b)
            try:
                text = b.feed_item.get('title')
            except ValueError:
                text = None
            b.pretty_url = util.pretty_link(
                b.key.id(),
                text=text,
                attrs={'class': 'original-post u-url u-name'},
                max_length=40,
                new_tab=True)

        # Blog webmentions
        webmentions = BlogWebmention.query()\
            .filter(BlogWebmention.source == source.key)\
            .order(-BlogWebmention.updated)\
            .fetch(10)
        for w in webmentions:
            w.pretty_source = util.pretty_link(
                w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
            try:
                target_is_source = (urllib.parse.urlparse(
                    w.target_url()).netloc in source.domains)
            except BaseException:
                target_is_source = False
            w.pretty_target = util.pretty_link(
                w.target_url(),
                attrs={'class': 'original-post'},
                new_tab=True,
                keep_host=target_is_source)

        vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return render_template(f'{source.SHORT_NAME}_user.html', **vars)
Exemple #12
0
                self.response.headers["Content-Type"] = "text/plain"
                self.response.write("%s error:\n%s" % (self.source.GR_CLASS.NAME, body))
                return
            else:
                raise

        if not obj:
            self.abort(404, label)

        # use https for profile pictures so we don't cause SSL mixed mode errors
        # when serving over https.
        author = obj.get("author", {})
        image = author.get("image", {})
        url = image.get("url")
        if url:
            image["url"] = util.update_scheme(url, self)

        mf2_json = microformats2.object_to_json(obj, synthesize_content=False)

        # try to include the author's silo profile url
        author = first_props(mf2_json.get("properties", {})).get("author", {})
        author_uid = first_props(author.get("properties", {})).get("uid", "")
        if author_uid:
            parsed = util.parse_tag_uri(author_uid)
            if parsed:
                silo_url = self.source.gr_source.user_url(parsed[1])
                urls = author.get("properties", {}).setdefault("url", [])
                if silo_url not in microformats2.get_string_urls(urls):
                    urls.append(silo_url)

        # write the response!