Exemple #1
0
  def post(self):
    # load source
    try:
      source = ndb.Key(urlsafe=util.get_required_param(self, 'source_key')).get()
      if not source:
        self.abort(400, 'Source key not found')
    except ProtocolBufferDecodeError:
      logging.exception('Bad value for source_key')
      self.abort(400, 'Bad value for source_key')

    # validate URL, find silo post
    url = util.get_required_param(self, 'url')
    domain = util.domain_from_link(url)
    msg = 'Discovering now. Refresh in a minute to see the results!'

    if domain == source.GR_CLASS.DOMAIN:
      post_id = source.GR_CLASS.post_id(url)
      util.add_discover_task(source, post_id)
    elif util.domain_or_parent_in(domain, source.domains):
      synd_links = original_post_discovery.process_entry(source, url, {}, False, [])
      if synd_links:
        for link in synd_links:
          util.add_discover_task(source, source.GR_CLASS.post_id(link))
      else:
        msg = 'Failed to fetch %s or find a %s syndication link.' % (
          util.pretty_link(url), source.GR_CLASS.NAME)
    else:
      msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME

    self.messages.add(msg)
    self.redirect(source.bridgy_url(self))
Exemple #2
0
def discover():
    source = util.load_source()

    # validate URL, find silo post
    url = request.form['url']
    domain = util.domain_from_link(url)
    path = urllib.parse.urlparse(url).path
    msg = 'Discovering now. Refresh in a minute to see the results!'

    gr_source = source.gr_source
    if domain == gr_source.DOMAIN:
        post_id = gr_source.post_id(url)
        if post_id:
            type = 'event' if path.startswith('/events/') else None
            util.add_discover_task(source, post_id, type=type)
        else:
            msg = f"Sorry, that doesn't look like a {gr_source.NAME} post URL."

    elif util.domain_or_parent_in(domain, source.domains):
        synd_links = original_post_discovery.process_entry(
            source, url, {}, False, [])
        if synd_links:
            for link in synd_links:
                util.add_discover_task(source, gr_source.post_id(link))
            source.updates = {'last_syndication_url': util.now_fn()}
            models.Source.put_updates(source)
        else:
            msg = f'Failed to fetch {util.pretty_link(url)} or find a {gr_source.NAME} syndication link.'

    else:
        msg = f'Please enter a URL on either your web site or {gr_source.NAME}.'

    flash(msg)
    return redirect(source.bridgy_url())
Exemple #3
0
    def post(self):
        source = self.load_source()

        # validate URL, find silo post
        url = util.get_required_param(self, 'url')
        domain = util.domain_from_link(url)
        path = urllib.parse.urlparse(url).path
        msg = 'Discovering now. Refresh in a minute to see the results!'

        if domain == source.GR_CLASS.DOMAIN:
            post_id = source.GR_CLASS.post_id(url)
            if post_id:
                type = 'event' if path.startswith('/events/') else None
                util.add_discover_task(source, post_id, type=type)
            else:
                msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME

        elif util.domain_or_parent_in(domain, source.domains):
            synd_links = original_post_discovery.process_entry(
                source, url, {}, False, [])
            if synd_links:
                for link in synd_links:
                    util.add_discover_task(source,
                                           source.GR_CLASS.post_id(link))
                source.updates = {'last_syndication_url': util.now_fn()}
                models.Source.put_updates(source)
            else:
                msg = 'Failed to fetch %s or find a %s syndication link.' % (
                    util.pretty_link(url), source.GR_CLASS.NAME)

        else:
            msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME

        self.messages.add(msg)
        self.redirect(source.bridgy_url(self))
Exemple #4
0
  def post(self):
    source = self.load_source()

    # validate URL, find silo post
    url = util.get_required_param(self, 'url')
    domain = util.domain_from_link(url)
    path = urlparse.urlparse(url).path
    msg = 'Discovering now. Refresh in a minute to see the results!'

    if domain == source.GR_CLASS.DOMAIN:
      post_id = source.GR_CLASS.post_id(url)
      if post_id:
        type = 'event' if path.startswith('/events/') else None
        util.add_discover_task(source, post_id, type=type)
      else:
        msg = "Sorry, that doesn't look like a %s post URL." % source.GR_CLASS.NAME

    elif util.domain_or_parent_in(domain, source.domains):
      synd_links = original_post_discovery.process_entry(source, url, {}, False, [])
      if synd_links:
        for link in synd_links:
          util.add_discover_task(source, source.GR_CLASS.post_id(link))
        source.updates = {'last_syndication_url': util.now_fn()}
        models.Source.put_updates(source)
      else:
        msg = 'Failed to fetch %s or find a %s syndication link.' % (
          util.pretty_link(url), source.GR_CLASS.NAME)

    else:
      msg = 'Please enter a URL on either your web site or %s.' % source.GR_CLASS.NAME

    self.messages.add(msg)
    self.redirect(source.bridgy_url(self))
Exemple #5
0
    def _find_source(self, source_cls, url, domain):
        """Returns the source that should publish a post URL, or None if not found.

    Args:
      source_cls: :class:`models.Source` subclass for this silo
      url: string
      domain: string, url's domain

    Returns: :class:`models.Source`
    """
        domain = domain.lower()
        if util.domain_or_parent_in(domain, util.DOMAINS):
            return self.error(
                f'Source URL should be on your own site, not {domain}')

        sources = source_cls.query().filter(
            source_cls.domains == domain).fetch(100)
        if not sources:
            msg = f'Could not find <b>{source_cls.GR_CLASS.NAME}</b> account for <b>{domain}</b>. Check that your {source_cls.GR_CLASS.NAME} profile has {domain} in its <em>web site</em> or <em>link</em> field, then try signing up again.'
            return self.error(msg, html=msg)

        current_url = ''
        sources_ready = []
        best_match = None
        for source in sources:
            logging.info(
                f'Source: {source.bridgy_url()} , features {source.features}, status {source.status}, poll status {source.poll_status}'
            )
            if source.status != 'disabled' and 'publish' in source.features:
                # use a source that has a domain_url matching the url provided,
                # including path. find the source with the closest match.
                sources_ready.append(source)
                schemeless_url = util.schemeless(url.lower()).strip('/')
                for domain_url in source.domain_urls:
                    schemeless_domain_url = util.schemeless(
                        domain_url.lower()).strip('/')
                    if (schemeless_url.startswith(schemeless_domain_url)
                            and len(domain_url) > len(current_url)):
                        current_url = domain_url
                        best_match = source

        if best_match:
            return best_match

        if sources_ready:
            msg = f'No account found that matches {util.pretty_link(url)}. Check that <a href="{util.host_url("/about#profile-link")}">the web site URL is in your silo profile</a>, then <a href="{request.host_url}">sign up again</a>.'
        else:
            msg = f'Publish is not enabled for your account. <a href="{request.host_url}">Try signing up!</a>'
        self.error(msg, html=msg)
Exemple #6
0
 def test_domain_or_parent_in(self):
   for expected, inputs in (
       (False, [
         ('', []), ('', ['']), ('x', []), ('x', ['']), ('x.y', []),
         ('x.y', ['']), ('', ['x', 'y']), ('', ['x.y']), ('x', ['y']),
         ('xy', ['y', 'x']), ('x', ['yx']), ('v.w.x', ['v.w', 'x.w']),
         ('x', ['', 'y', 'xy', 'yx', 'xx', 'xxx']),
       ]),
       (True, [
         ('x', ['x']), ('x', ['x', 'y']), ('x', ['y', 'x']),
         ('w.x', ['x']), ('u.v.w.x', ['y', 'v.w.x']),
       ])):
     for input, domains in inputs:
       self.assertEquals(expected, util.domain_or_parent_in(input, domains),
                         `input, domains, expected`)
Exemple #7
0
  def template_vars(self):
    vars = super(UserHandler, self).template_vars()
    vars.update({
      'source': self.source,
      'sources': models.sources,
      'logs': logs,
      'EPOCH': util.EPOCH,
      'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
      'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
    })
    if not self.source:
      return vars

    # Blog webmention promos
    if 'webmention' not in self.source.features:
      if self.source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'):
        vars[self.source.SHORT_NAME + '_promo'] = True
      else:
        for domain in self.source.domains:
          if ('.blogspot.' in domain and  # Blogger uses country TLDs
              not Blogger.query(Blogger.domains == domain).get()):
            vars['blogger_promo'] = True
          elif (util.domain_or_parent_in(domain, ['tumblr.com']) and
                not Tumblr.query(Tumblr.domains == domain).get()):
            vars['tumblr_promo'] = True
          elif (util.domain_or_parent_in(domain, 'wordpress.com') and
                not WordPress.query(WordPress.domains == domain).get()):
            vars['wordpress_promo'] = True

    # Responses
    if 'listen' in self.source.features or 'email' in self.source.features:
      vars['responses'] = []
      query = Response.query().filter(Response.source == self.source.key)

      # if there's a paging param (responses_before or responses_after), update
      # query with it
      def get_paging_param(param):
        val = self.request.get(param)
        try:
          return util.parse_iso8601(val) if val else None
        except:
          msg = "Couldn't parse %s %r as ISO8601" % (param, val)
          logging.warning(msg, stack_info=True)
          self.abort(400, msg)

      before = get_paging_param('responses_before')
      after = get_paging_param('responses_after')
      if before and after:
        self.abort(400, "can't handle both responses_before and responses_after")
      elif after:
        query = query.filter(Response.updated > after).order(Response.updated)
      elif before:
        query = query.filter(Response.updated < before).order(-Response.updated)
      else:
        query = query.order(-Response.updated)

      query_iter = query.iter()
      for i, r in enumerate(query_iter):
        r.response = json_loads(r.response_json)
        r.activities = [json_loads(a) for a in r.activities_json]

        if (not self.source.is_activity_public(r.response) or
            not all(self.source.is_activity_public(a) for a in r.activities)):
          continue
        elif r.type == 'post':
          r.activities = []

        verb = r.response.get('verb')
        r.actor = (r.response.get('object') if verb == 'invite'
                   else r.response.get('author') or r.response.get('actor')
                  ) or {}

        activity_content = ''
        for a in r.activities + [r.response]:
          if not a.get('content'):
            obj = a.get('object', {})
            a['content'] = activity_content = (
              obj.get('content') or obj.get('displayName') or
              # historical, from a Reddit bug fixed in granary@4f9df7c
              obj.get('name') or '')

        response_content = r.response.get('content')
        phrases = {
          'like': 'liked this',
          'repost': 'reposted this',
          'rsvp-yes': 'is attending',
          'rsvp-no': 'is not attending',
          'rsvp-maybe': 'might attend',
          'rsvp-interested': 'is interested',
          'invite': 'is invited',
        }
        phrase = phrases.get(r.type) or phrases.get(verb)
        if phrase and (r.type != 'repost' or
                       activity_content.startswith(response_content)):
          r.response['content'] = '%s %s.' % (
            r.actor.get('displayName') or '', phrase)

        # convert image URL to https if we're serving over SSL
        image_url = r.actor.setdefault('image', {}).get('url')
        if image_url:
          r.actor['image']['url'] = util.update_scheme(image_url, self)

        # generate original post links
        r.links = self.process_webmention_links(r)
        r.original_links = [util.pretty_link(url, new_tab=True)
                            for url in r.original_posts]

        vars['responses'].append(r)
        if len(vars['responses']) >= 10 or i > 200:
          break

      vars['responses'].sort(key=lambda r: r.updated, reverse=True)

      # calculate new paging param(s)
      new_after = (
        before if before else
        vars['responses'][0].updated if
          vars['responses'] and query_iter.probably_has_next() and (before or after)
        else None)
      if new_after:
        vars['responses_after_link'] = ('?responses_after=%s#responses' %
                                         new_after.isoformat())

      new_before = (
        after if after else
        vars['responses'][-1].updated if
          vars['responses'] and query_iter.probably_has_next()
        else None)
      if new_before:
        vars['responses_before_link'] = ('?responses_before=%s#responses' %
                                         new_before.isoformat())

      vars['next_poll'] = max(
        self.source.last_poll_attempt + self.source.poll_period(),
        # lower bound is 1 minute from now
        util.now_fn() + datetime.timedelta(seconds=90))

    # Publishes
    if 'publish' in self.source.features:
      publishes = Publish.query().filter(Publish.source == self.source.key)\
                                 .order(-Publish.updated)\
                                 .fetch(10)
      for p in publishes:
        p.pretty_page = util.pretty_link(
          p.key.parent().id(),
          attrs={'class': 'original-post u-url u-name'},
          new_tab=True)

      vars['publishes'] = publishes

    if 'webmention' in self.source.features:
      # Blog posts
      blogposts = BlogPost.query().filter(BlogPost.source == self.source.key)\
                                  .order(-BlogPost.created)\
                                  .fetch(10)
      for b in blogposts:
        b.links = self.process_webmention_links(b)
        try:
          text = b.feed_item.get('title')
        except ValueError:
          text = None
        b.pretty_url = util.pretty_link(
          b.key.id(), text=text, attrs={'class': 'original-post u-url u-name'},
          max_length=40, new_tab=True)

      # Blog webmentions
      webmentions = BlogWebmention.query()\
          .filter(BlogWebmention.source == self.source.key)\
          .order(-BlogWebmention.updated)\
          .fetch(10)
      for w in webmentions:
        w.pretty_source = util.pretty_link(
          w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
        try:
          target_is_source = (urllib.parse.urlparse(w.target_url()).netloc in
                              self.source.domains)
        except BaseException:
          target_is_source = False
        w.pretty_target = util.pretty_link(
          w.target_url(), attrs={'class': 'original-post'}, new_tab=True,
          keep_host=target_is_source)

      vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return vars
Exemple #8
0
def user(site, id):
    """View for a user page."""
    cls = models.sources.get(site)
    if not cls:
        return render_template('user_not_found.html'), 404

    source = cls.lookup(id)

    if not source:
        key = cls.query(
            ndb.OR(*[
                ndb.GenericProperty(prop) == id
                for prop in ('domains', 'inferred_username', 'name',
                             'username')
            ])).get(keys_only=True)
        if key:
            return redirect(cls(key=key).bridgy_path(), code=301)

    if not source or not source.features:
        return render_template('user_not_found.html'), 404

    source.verify()
    source = util.preprocess_source(source)

    vars = {
        'source': source,
        'logs': logs,
        'REFETCH_HFEED_TRIGGER': models.REFETCH_HFEED_TRIGGER,
        'RECENT_PRIVATE_POSTS_THRESHOLD': RECENT_PRIVATE_POSTS_THRESHOLD,
    }

    # Blog webmention promos
    if 'webmention' not in source.features:
        if source.SHORT_NAME in ('blogger', 'medium', 'tumblr', 'wordpress'):
            vars[source.SHORT_NAME + '_promo'] = True
        else:
            for domain in source.domains:
                if ('.blogspot.' in domain and  # Blogger uses country TLDs
                        not Blogger.query(Blogger.domains == domain).get()):
                    vars['blogger_promo'] = True
                elif (util.domain_or_parent_in(domain, ['tumblr.com'])
                      and not Tumblr.query(Tumblr.domains == domain).get()):
                    vars['tumblr_promo'] = True
                elif (util.domain_or_parent_in(domain, 'wordpress.com') and
                      not WordPress.query(WordPress.domains == domain).get()):
                    vars['wordpress_promo'] = True

    # Responses
    if 'listen' in source.features or 'email' in source.features:
        vars['responses'] = []
        query = Response.query().filter(Response.source == source.key)

        # if there's a paging param (responses_before or responses_after), update
        # query with it
        def get_paging_param(param):
            val = request.values.get(param)
            try:
                return util.parse_iso8601(val.replace(' ',
                                                      '+')) if val else None
            except BaseException:
                error(f"Couldn't parse {param}, {val!r} as ISO8601")

        before = get_paging_param('responses_before')
        after = get_paging_param('responses_after')
        if before and after:
            error("can't handle both responses_before and responses_after")
        elif after:
            query = query.filter(Response.updated > after).order(
                Response.updated)
        elif before:
            query = query.filter(
                Response.updated < before).order(-Response.updated)
        else:
            query = query.order(-Response.updated)

        query_iter = query.iter()
        for i, r in enumerate(query_iter):
            r.response = json_loads(r.response_json)
            r.activities = [json_loads(a) for a in r.activities_json]

            if (not source.is_activity_public(r.response) or not all(
                    source.is_activity_public(a) for a in r.activities)):
                continue
            elif r.type == 'post':
                r.activities = []

            verb = r.response.get('verb')
            r.actor = (r.response.get('object')
                       if verb == 'invite' else r.response.get('author')
                       or r.response.get('actor')) or {}

            activity_content = ''
            for a in r.activities + [r.response]:
                if not a.get('content'):
                    obj = a.get('object', {})
                    a['content'] = activity_content = (
                        obj.get('content') or obj.get('displayName') or
                        # historical, from a Reddit bug fixed in granary@4f9df7c
                        obj.get('name') or '')

            response_content = r.response.get('content')
            phrases = {
                'like': 'liked this',
                'repost': 'reposted this',
                'rsvp-yes': 'is attending',
                'rsvp-no': 'is not attending',
                'rsvp-maybe': 'might attend',
                'rsvp-interested': 'is interested',
                'invite': 'is invited',
            }
            phrase = phrases.get(r.type) or phrases.get(verb)
            if phrase and (r.type != 'repost'
                           or activity_content.startswith(response_content)):
                r.response[
                    'content'] = f'{r.actor.get("displayName") or ""} {phrase}.'

            # convert image URL to https if we're serving over SSL
            image_url = r.actor.setdefault('image', {}).get('url')
            if image_url:
                r.actor['image']['url'] = util.update_scheme(
                    image_url, request)

            # generate original post links
            r.links = process_webmention_links(r)
            r.original_links = [
                util.pretty_link(url, new_tab=True) for url in r.original_posts
            ]

            vars['responses'].append(r)
            if len(vars['responses']) >= 10 or i > 200:
                break

        vars['responses'].sort(key=lambda r: r.updated, reverse=True)

        # calculate new paging param(s)
        new_after = (before if before else vars['responses'][0].updated if
                     vars['responses'] and query_iter.probably_has_next() and
                     (before or after) else None)
        if new_after:
            vars[
                'responses_after_link'] = f'?responses_after={new_after.isoformat()}#responses'

        new_before = (after if after else
                      vars['responses'][-1].updated if vars['responses']
                      and query_iter.probably_has_next() else None)
        if new_before:
            vars[
                'responses_before_link'] = f'?responses_before={new_before.isoformat()}#responses'

        vars['next_poll'] = max(
            source.last_poll_attempt + source.poll_period(),
            # lower bound is 1 minute from now
            util.now_fn() + datetime.timedelta(seconds=90))

    # Publishes
    if 'publish' in source.features:
        publishes = Publish.query().filter(Publish.source == source.key)\
                                   .order(-Publish.updated)\
                                   .fetch(10)
        for p in publishes:
            p.pretty_page = util.pretty_link(
                p.key.parent().id(),
                attrs={'class': 'original-post u-url u-name'},
                new_tab=True)

        vars['publishes'] = publishes

    if 'webmention' in source.features:
        # Blog posts
        blogposts = BlogPost.query().filter(BlogPost.source == source.key)\
                                    .order(-BlogPost.created)\
                                    .fetch(10)
        for b in blogposts:
            b.links = process_webmention_links(b)
            try:
                text = b.feed_item.get('title')
            except ValueError:
                text = None
            b.pretty_url = util.pretty_link(
                b.key.id(),
                text=text,
                attrs={'class': 'original-post u-url u-name'},
                max_length=40,
                new_tab=True)

        # Blog webmentions
        webmentions = BlogWebmention.query()\
            .filter(BlogWebmention.source == source.key)\
            .order(-BlogWebmention.updated)\
            .fetch(10)
        for w in webmentions:
            w.pretty_source = util.pretty_link(
                w.source_url(), attrs={'class': 'original-post'}, new_tab=True)
            try:
                target_is_source = (urllib.parse.urlparse(
                    w.target_url()).netloc in source.domains)
            except BaseException:
                target_is_source = False
            w.pretty_target = util.pretty_link(
                w.target_url(),
                attrs={'class': 'original-post'},
                new_tab=True,
                keep_host=target_is_source)

        vars.update({'blogposts': blogposts, 'webmentions': webmentions})

    return render_template(f'{source.SHORT_NAME}_user.html', **vars)