Ejemplo n.º 1
0
    def test_in_webmention_blocklist(self):
        for bad in 't.co', 'x.t.co', 'X.Y.T.CO', 'abc.onion':
            self.assertTrue(util.in_webmention_blocklist(bad), bad)

        for good in 'snarfed.org', 'www.snarfed.org', 't.co.com':
            self.assertFalse(util.in_webmention_blocklist(good), good)

        self.mox.StubOutWithMock(util, 'LOCAL')
        util.LOCAL = False
        self.assertTrue(util.in_webmention_blocklist('localhost'))
        util.LOCAL = True
        self.assertFalse(util.in_webmention_blocklist('localhost'))
Ejemplo n.º 2
0
  def verify(self, force=False):
    """Checks that this source is ready to be used.

    For blog and listen sources, this fetches their front page HTML and
    discovers their webmention endpoint. For publish sources, this checks that
    they have a domain.

    May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`.

    Args:
      force: if True, fully verifies (e.g. re-fetches the blog's HTML and
        performs webmention discovery) even we already think this source is
        verified.
    """
    author_urls = [u for u, d in zip(self.get_author_urls(), self.domains)
                   if not util.in_webmention_blocklist(d)]
    if ((self.verified() and not force) or self.status == 'disabled' or
        not self.features or not author_urls):
      return

    author_url = author_urls[0]
    try:
      got = webmention.discover(author_url, timeout=util.HTTP_TIMEOUT)
      self.webmention_endpoint = got.endpoint
      self._fetched_html = got.response.text
    except BaseException as e:
      logger.info('Error discovering webmention endpoint', exc_info=e)
      self.webmention_endpoint = None

    self.put()
Ejemplo n.º 3
0
    def verify(self, force=False):
        """Checks that this source is ready to be used.

    For blog and listen sources, this fetches their front page HTML and
    discovers their webmention endpoint. For publish sources, this checks that
    they have a domain.

    May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`.

    Args:
      force: if True, fully verifies (e.g. re-fetches the blog's HTML and
        performs webmention discovery) even we already think this source is
        verified.
    """
        author_urls = [
            u for u, d in zip(self.get_author_urls(), self.domains)
            if not util.in_webmention_blocklist(d)
        ]
        if ((self.verified() and not force) or self.status == 'disabled'
                or not self.features or not author_urls):
            return

        author_url = author_urls[0]
        logging.info('Attempting to discover webmention endpoint on %s',
                     author_url)
        mention = send.WebmentionSend('https://brid.gy/', author_url)
        mention.requests_kwargs = {
            'timeout': util.HTTP_TIMEOUT,
            'headers': util.REQUEST_HEADERS
        }
        try:
            mention._discoverEndpoint()
        except BaseException as e:
            logging.info('Error discovering webmention endpoint', exc_info=e)
            mention.error = {'code': 'EXCEPTION'}

        self._fetched_html = getattr(mention, 'html', None)
        error = getattr(mention, 'error', None)
        endpoint = getattr(mention, 'receiver_endpoint', None)
        if error or not endpoint:
            logging.info("No webmention endpoint found: %s %r", error,
                         endpoint)
            self.webmention_endpoint = None
        else:
            logging.info("Discovered webmention endpoint %s", endpoint)
            self.webmention_endpoint = endpoint

        self.put()
Ejemplo n.º 4
0
  def search_for_links(self):
    """Searches for activities with links to any of this source's web sites.

    Returns:
      sequence of ActivityStreams activity dicts
    """
    urls = {util.schemeless(util.fragmentless(url), slashes=False)
            for url in self.domain_urls
            if not util.in_webmention_blocklist(util.domain_from_link(url))}
    if not urls:
      return []

    # Search syntax: https://www.reddit.com/wiki/search
    url_query = ' OR '.join(f'site:"{u}" OR selftext:"{u}"' for u in urls)
    return self.get_activities(
      search_query=url_query, group_id=gr_source.SEARCH, etag=self.last_activities_etag,
      fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)
Ejemplo n.º 5
0
    def search_for_links(self):
        """Searches for activities with links to any of this source's web sites.

    Returns:
      sequence of ActivityStreams activity dicts
    """
        urls = set(
            util.schemeless(util.fragmentless(url), slashes=False)
            for url in self.domain_urls
            if not util.in_webmention_blocklist(util.domain_from_link(url)))
        if not urls:
            return []

        url_query = ' OR '.join([f'"{u}"' for u in urls])
        return self.get_activities(search_query=url_query,
                                   group_id=gr_source.SEARCH,
                                   etag=self.last_activities_etag,
                                   fetch_replies=True,
                                   fetch_likes=False,
                                   fetch_shares=False,
                                   count=50)
Ejemplo n.º 6
0
  def search_for_links(self):
    """Searches for activities with links to any of this source's web sites.

    Twitter search supports OR:
    https://dev.twitter.com/rest/public/search

    ...but it only returns complete(ish) results if we strip scheme from URLs,
    ie search for example.com instead of http://example.com/, and that also
    returns false positivies, so we check that the returned tweets actually have
    matching links. https://github.com/snarfed/bridgy/issues/565

    Returns:
      sequence of ActivityStreams activity dicts
    """
    urls = {util.schemeless(util.fragmentless(url), slashes=False)
            for url in self.domain_urls
            if not util.in_webmention_blocklist(util.domain_from_link(url))}
    if not urls:
      return []

    query = ' OR '.join(sorted(urls))
    candidates = self.get_activities(
      search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag,
      fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)

    # filter out retweets and search false positives that don't actually link to us
    results = []
    for candidate in candidates:
      if candidate.get('verb') == 'share':
        continue
      obj = candidate['object']
      tags = obj.get('tags', [])
      atts = obj.get('attachments', [])
      for url in urls:
        if (any(util.schemeless(t.get('url', ''), slashes=False).startswith(url)
                for t in tags + atts)):
          results.append(candidate)
          break

    return results
Ejemplo n.º 7
0
    def test_in_webmention_blocklist(self):
        for bad in 't.co', 'x.t.co', 'x.y.t.co', 'abc.onion':
            self.assertTrue(util.in_webmention_blocklist(bad), bad)

        for good in 'snarfed.org', 'www.snarfed.org', 't.co.com':
            self.assertFalse(util.in_webmention_blocklist(good), good)