Beispiel #1
0
    def search_for_links(self):
        """Searches for activities with links to any of this source's web sites.

    Only searches for root domain web site URLs! Skips URLs with paths; they
    tend to generate false positive results in G+'s search. Not sure why yet.

    G+ search supports OR:
    https://developers.google.com/+/api/latest/activities/search

    Returns:
      sequence of ActivityStreams activity dicts
    """
        urls = [
            '"%s"' % util.fragmentless(url) for url in self.domain_urls
            if not util.in_webmention_blacklist(util.domain_from_link(url))
            and urlparse.urlparse(url).path in ('', '/')
        ][:models.MAX_AUTHOR_URLS]

        if urls:
            return self.get_activities(search_query=' OR '.join(urls),
                                       group_id=gr_source.SEARCH,
                                       etag=self.last_activities_etag,
                                       fetch_replies=False,
                                       fetch_likes=False,
                                       fetch_shares=False,
                                       count=50)

        return []
Beispiel #2
0
 def test_fragmentless(self):
   for expected, url in (
       ('', ''),
       ('/path', '/path'),
       ('http://foo', 'http://foo'),
       ('http://foo', 'http://foo#bar'),
       ('http://foo/bar?baz', 'http://foo/bar?baz#baj'),
     ):
     self.assertEqual(expected, util.fragmentless(url))
Beispiel #3
0
  def search_for_links(self):
    """Searches for activities with links to any of this source's web sites.

    G+ search supports OR:
    https://developers.google.com/+/api/latest/activities/search

    Returns: sequence of ActivityStreams activity dicts
    """
    query = ' OR '.join(
      '"%s"' % util.fragmentless(url) for url in self.domain_urls
      if not util.in_webmention_blacklist(util.domain_from_link(url)))
    return self.get_activities(
      search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag,
      fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)
Beispiel #4
0
    def search_for_links(self):
        """Searches for activities with links to any of this source's web sites.

    Twitter search supports OR:
    https://dev.twitter.com/rest/public/search

    ...but it only returns complete(ish) results if we strip scheme from URLs,
    ie search for example.com instead of http://example.com/, and that also
    returns false positivies, so we check that the returned tweets actually have
    matching links. https://github.com/snarfed/bridgy/issues/565

    Returns:
      sequence of ActivityStreams activity dicts
    """
        urls = set(
            util.fragmentless(url) for url in self.domain_urls
            if not util.in_webmention_blacklist(util.domain_from_link(url)))
        if not urls:
            return []

        query = ' OR '.join('"%s"' % util.schemeless(url, slashes=False)
                            for url in urls)
        candidates = self.get_activities(search_query=query,
                                         group_id=gr_source.SEARCH,
                                         etag=self.last_activities_etag,
                                         fetch_replies=False,
                                         fetch_likes=False,
                                         fetch_shares=False,
                                         count=50)

        # filter out retweets and search false positives that don't actually link to us
        results = []
        for candidate in candidates:
            if candidate.get('verb') == 'share':
                continue
            obj = candidate['object']
            tags = obj.get('tags', [])
            atts = obj.get('attachments', [])
            for url in urls:
                if (url in obj.get('content', '') or any(
                        t.get('url', '').startswith(url)
                        for t in tags + atts)):
                    id = candidate['id']
                    results.append(candidate)
                    break

        return results
Beispiel #5
0
  def search_for_links(self):
    """Searches for activities with links to any of this source's web sites.

    Returns:
      sequence of ActivityStreams activity dicts
    """
    urls = {util.schemeless(util.fragmentless(url), slashes=False)
            for url in self.domain_urls
            if not util.in_webmention_blocklist(util.domain_from_link(url))}
    if not urls:
      return []

    # Search syntax: https://www.reddit.com/wiki/search
    url_query = ' OR '.join(f'site:"{u}" OR selftext:"{u}"' for u in urls)
    return self.get_activities(
      search_query=url_query, group_id=gr_source.SEARCH, etag=self.last_activities_etag,
      fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)
Beispiel #6
0
    def search_for_links(self):
        """Searches for activities with links to any of this source's web sites.

    Returns:
      sequence of ActivityStreams activity dicts
    """
        urls = set(
            util.schemeless(util.fragmentless(url), slashes=False)
            for url in self.domain_urls
            if not util.in_webmention_blocklist(util.domain_from_link(url)))
        if not urls:
            return []

        url_query = ' OR '.join([f'"{u}"' for u in urls])
        return self.get_activities(search_query=url_query,
                                   group_id=gr_source.SEARCH,
                                   etag=self.last_activities_etag,
                                   fetch_replies=True,
                                   fetch_likes=False,
                                   fetch_shares=False,
                                   count=50)
Beispiel #7
0
  def search_for_links(self):
    """Searches for activities with links to any of this source's web sites.

    Twitter search supports OR:
    https://dev.twitter.com/rest/public/search

    ...but it only returns complete(ish) results if we strip scheme from URLs,
    ie search for example.com instead of http://example.com/, and that also
    returns false positivies, so we check that the returned tweets actually have
    matching links. https://github.com/snarfed/bridgy/issues/565

    Returns:
      sequence of ActivityStreams activity dicts
    """
    urls = set(util.fragmentless(url) for url in self.domain_urls
               if not util.in_webmention_blacklist(util.domain_from_link(url)))
    if not urls:
      return []

    query = ' OR '.join('"%s"' % util.schemeless(url, slashes=False) for url in urls)
    candidates = self.get_activities(
      search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag,
      fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50)

    # filter out retweets and search false positives that don't actually link to us
    results = []
    for candidate in candidates:
      if candidate.get('verb') == 'share':
        continue
      obj = candidate['object']
      tags = obj.get('tags', [])
      atts = obj.get('attachments', [])
      for url in urls:
        if (url in obj.get('content', '') or
            any(t.get('url', '').startswith(url) for t in tags + atts)):
          id = candidate['id']
          results.append(candidate)
          break

    return results
Beispiel #8
0
  def search_for_links(self):
    """Searches for activities with links to any of this source's web sites.

    Only searches for root domain web site URLs! Skips URLs with paths; they
    tend to generate false positive results in G+'s search. Not sure why yet.

    G+ search supports OR:
    https://developers.google.com/+/api/latest/activities/search

    Returns: sequence of ActivityStreams activity dicts
    """
    urls = ['"%s"' % util.fragmentless(url) for url in self.domain_urls
            if not util.in_webmention_blacklist(util.domain_from_link(url))
            and urlparse.urlparse(url).path in ('', '/')
           ][:models.MAX_AUTHOR_URLS]

    if urls:
      return self.get_activities(
        search_query=' OR '.join(urls), group_id=gr_source.SEARCH,
        etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False,
        fetch_shares=False, count=50)

    return []