Ejemplo n.º 1
0
  def preprocess_for_publish(self, obj):
    """Preprocess an object before trying to publish it.

    By default this tries to massage person tags so that the tag's
    "url" points to the person's profile on this service (as opposed
    to a person's homepage).

    The object is modified in place.

    Args:
      obj: ActivityStreams activity or object dict
    """
    for tag in obj.get('tags', []):
      if tag.get('objectType') == 'person':
        silo_url = None
        for url in microformats2.object_urls(tag):
          silo_url = url and self.infer_profile_url(url)
          if silo_url:
            break
        if silo_url:
          tag['url'] = silo_url

    # recurse on contained object(s)
    for obj in util.get_list(obj, 'object'):
      self.preprocess_for_publish(obj)
Ejemplo n.º 2
0
    def preprocess_for_publish(self, obj):
        """Preprocess an object before trying to publish it.

    By default this tries to massage person tags so that the tag's
    "url" points to the person's profile on this service (as opposed
    to a person's homepage).

    The object is modified in place.

    Args:
      obj: ActivityStreams activity or object dict
    """
        for tag in obj.get('tags', []):
            if tag.get('objectType') == 'person':
                silo_url = None
                for url in microformats2.object_urls(tag):
                    silo_url = url and self.infer_profile_url(url)
                    if silo_url:
                        break
                if silo_url:
                    tag['url'] = silo_url

        # recurse on contained object(s)
        for obj in util.get_list(obj, 'object'):
            self.preprocess_for_publish(obj)
Ejemplo n.º 3
0
  def _urls_and_domains(self, auth_entity, user_url):
    """Returns this user's valid (not webmention-blacklisted) URLs and domains.

    Converts the auth entity's user_json to an ActivityStreams actor and uses
    its 'urls' and 'url' fields. May be overridden by subclasses.

    Args:
      auth_entity: oauth_dropins.models.BaseAuth
      user_url: string, optional URL passed in when authorizing

    Returns: ([string url, ...], [string domain, ...])
    """
    actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json))
    logging.debug('Converted to actor: %s', json.dumps(actor, indent=2))

    candidates = util.trim_nulls(util.uniquify(
        [user_url] + microformats2.object_urls(actor)))

    if len(candidates) > MAX_AUTHOR_URLS:
      logging.warning('Too many profile links! Only resolving the first %s: %s',
                      MAX_AUTHOR_URLS, candidates)

    urls = []
    for i, url in enumerate(candidates):
      url, domain, send = util.get_webmention_target(url, resolve=i < MAX_AUTHOR_URLS)
      if send:
        urls.append(url)

    urls = util.dedupe_urls(urls)  # normalizes domains to lower case
    domains = [util.domain_from_link(url) for url in urls]
    return urls, domains
Ejemplo n.º 4
0
 def test_object_urls(self):
     for expected, actor in (
         ([], {}),
         ([], {
             'displayName': 'foo'
         }),
         ([], {
             'url': None,
             'urls': []
         }),
         (['http://foo'], {
             'url': 'http://foo'
         }),
         (['http://foo'], {
             'urls': [{
                 'value': 'http://foo'
             }]
         }),
         (['http://foo', 'https://bar', 'http://baz'], {
             'url':
             'http://foo',
             'urls': [
                 {
                     'value': 'https://bar'
                 },
                 {
                     'value': 'http://foo'
                 },
                 {
                     'value': 'http://baz'
                 },
             ],
         }),
     ):
         self.assertEqual(expected, microformats2.object_urls(actor))
Ejemplo n.º 5
0
    def _urls_and_domains(self, auth_entity, user_url):
        """Returns this user's valid (not webmention-blacklisted) URLs and domains.

    Converts the auth entity's user_json to an ActivityStreams actor and uses
    its 'urls' and 'url' fields. May be overridden by subclasses.

    Args:
      auth_entity: :class:`oauth_dropins.models.BaseAuth`
      user_url: string, optional URL passed in when authorizing

    Returns:
      ([string url, ...], [string domain, ...])
    """
        actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json))
        logging.debug('Converted to actor: %s', json.dumps(actor, indent=2))

        candidates = util.trim_nulls(
            util.uniquify([user_url] + microformats2.object_urls(actor)))

        if len(candidates) > MAX_AUTHOR_URLS:
            logging.warning(
                'Too many profile links! Only resolving the first %s: %s',
                MAX_AUTHOR_URLS, candidates)

        urls = []
        for i, url in enumerate(candidates):
            url, domain, send = util.get_webmention_target(
                url, resolve=i < MAX_AUTHOR_URLS)
            if send:
                urls.append(url)

        urls = util.dedupe_urls(urls)  # normalizes domains to lower case
        domains = [util.domain_from_link(url) for url in urls]
        return urls, domains
Ejemplo n.º 6
0
    def check_token_for_actor(self, actor):
        """Checks that the given actor is public and matches the request's token.

    Raises: :class:`HTTPException` with HTTP 400
    """
        if not actor:
            self.abort(400, f'Missing actor!')

        if not gr_source.Source.is_public(actor):
            self.abort(
                400,
                f'Your {self.gr_source().NAME} account is private. Bridgy only supports public accounts.'
            )

        token = util.get_required_param(self, 'token')
        domains = set(
            util.domain_from_link(util.replace_test_domains_with_localhost(u))
            for u in microformats2.object_urls(actor))
        domains.discard(self.source_class().GR_CLASS.DOMAIN)

        logging.info(f'Checking token against domains {domains}')
        for domain in ndb.get_multi(ndb.Key(Domain, d) for d in domains):
            if domain and token in domain.tokens:
                return

        self.abort(403,
                   f'Token {token} is not authorized for any of: {domains}')
Ejemplo n.º 7
0
    def finish(self, auth_entity, state=None):
        if auth_entity:
            user_json = json.loads(auth_entity.user_json)

            # find instagram profile URL
            urls = user_json.get('rel-me', [])
            logging.info('rel-mes: %s', urls)
            for url in util.trim_nulls(urls):
                if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN:
                    username = urllib.parse.urlparse(url).path.strip('/')
                    break
            else:
                self.messages.add(
                    'No Instagram profile found. Please <a href="https://indieauth.com/setup">add an Instagram rel-me link</a>, then try again.'
                )
                return self.redirect('/')

            # check that instagram profile links to web site
            try:
                actor = gr_instagram.Instagram(scrape=True).get_actor(
                    username, ignore_rate_limit=True)
            except Exception as e:
                code, _ = util.interpret_http_exception(e)
                if code in Instagram.RATE_LIMIT_HTTP_CODES:
                    self.messages.add(
                        '<a href="https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427">Apologies, Instagram is temporarily blocking us.</a> Please try again later!'
                    )
                    return self.redirect('/')
                else:
                    raise

            if not actor:
                self.messages.add(
                    "Couldn't find Instagram user '%s'. Please check your site's rel-me link and your Instagram account."
                    % username)
                return self.redirect('/')

            canonicalize = util.UrlCanonicalizer(redirects=False)
            website = canonicalize(auth_entity.key.id())
            urls = [canonicalize(u) for u in microformats2.object_urls(actor)]
            logging.info('Looking for %s in %s', website, urls)
            if website not in urls:
                self.messages.add(
                    "Please add %s to your Instagram profile's website or bio field and try again."
                    % website)
                return self.redirect('/')

            # check that the instagram account is public
            if not gr_source.Source.is_public(actor):
                self.messages.add(
                    'Your Instagram account is private. Bridgy only supports public accounts.'
                )
                return self.redirect('/')

        self.maybe_add_or_delete_source(Instagram,
                                        auth_entity,
                                        state,
                                        actor=actor)
Ejemplo n.º 8
0
    def _urls_and_domains(self, auth_entity, user_url):
        """Returns this user's valid (not webmention-blacklisted) URLs and domains.

    Converts the auth entity's user_json to an ActivityStreams actor and uses
    its 'urls' and 'url' fields. May be overridden by subclasses.

    Args:
      auth_entity: :class:`oauth_dropins.models.BaseAuth`
      user_url: string, optional URL passed in when authorizing

    Returns:
      ([string url, ...], [string domain, ...])
    """
        actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json))
        logging.debug('Converted to actor: %s', json.dumps(actor, indent=2))

        candidates = util.trim_nulls(
            util.uniquify([user_url] + microformats2.object_urls(actor)))

        if len(candidates) > MAX_AUTHOR_URLS:
            logging.info(
                'Too many profile links! Only resolving the first %s: %s',
                MAX_AUTHOR_URLS, candidates)

        urls = []
        for i, url in enumerate(candidates):
            final, domain, ok = util.get_webmention_target(
                url, resolve=i < MAX_AUTHOR_URLS)
            if ok:
                final = final.lower()
                if util.schemeless(final).startswith(
                        util.schemeless(url.lower())):
                    # redirected to a deeper path. use the original higher level URL. #652
                    final = url
                # If final has a path segment check if root has a matching rel=me.
                match = re.match(r'^(https?://[^/]+)/.+', final)
                if match and i < MAX_AUTHOR_URLS:
                    root = match.group(1)
                    resp = util.requests_get(root)
                    resp.raise_for_status()
                    data = util.mf2py_parse(resp.text, root)
                    me_urls = data.get('rels', {}).get('me', [])
                    if final in me_urls:
                        final = root
                urls.append(final)

        urls = util.dedupe_urls(urls)  # normalizes domains to lower case
        domains = [util.domain_from_link(url) for url in urls]
        return urls, domains
Ejemplo n.º 9
0
  def urls_and_domains(self, auth_entity, user_url, actor=None,
                       resolve_source_domain=True):
    """Returns this user's valid (not webmention-blocklisted) URLs and domains.

    Converts the auth entity's user_json to an ActivityStreams actor and uses
    its 'urls' and 'url' fields. May be overridden by subclasses.

    Args:
      auth_entity: :class:`oauth_dropins.models.BaseAuth`
      user_url: string, optional URL passed in when authorizing
      actor: dict, optional AS actor for the user. If provided, overrides
        auth_entity
      resolve_source_domain: boolean, whether to follow redirects on URLs on
        this source's domain

    Returns:
      ([string url, ...], [string domain, ...])
    """
    if not actor:
      actor = self.gr_source.user_to_actor(json_loads(auth_entity.user_json))
    logger.debug(f'Extracting URLs and domains from actor: {json_dumps(actor, indent=2)}')

    candidates = util.trim_nulls(util.uniquify(
        [user_url] + microformats2.object_urls(actor)))

    if len(candidates) > MAX_AUTHOR_URLS:
      logger.info(f'Too many profile links! Only resolving the first {MAX_AUTHOR_URLS}: {candidates}')

    urls = []
    for i, url in enumerate(candidates):
      on_source_domain = util.domain_from_link(url) == self.gr_source.DOMAIN
      resolve = ((resolve_source_domain or not on_source_domain) and
                 i < MAX_AUTHOR_URLS)
      resolved = self.resolve_profile_url(url, resolve=resolve)
      if resolved:
        urls.append(resolved)

    final_urls = []
    domains = []
    for url in util.dedupe_urls(urls):  # normalizes domains to lower case
      # skip links on this source's domain itself. only currently needed for
      # Mastodon; the other silo domains are in the webmention blocklist.
      domain = util.domain_from_link(url)
      if domain != self.gr_source.DOMAIN:
        final_urls.append(url)
        domains.append(domain)

    return final_urls, domains
Ejemplo n.º 10
0
 def test_object_urls(self):
   for expected, actor in (
       ([], {}),
       ([], {'displayName': 'foo'}),
       ([], {'url': None, 'urls': []}),
       (['http://foo'], {'url': 'http://foo'}),
       (['http://foo'], {'urls': [{'value': 'http://foo'}]}),
       (['http://foo', 'https://bar', 'http://baz'], {
         'url': 'http://foo',
         'urls': [{'value': 'https://bar'},
                  {'value': 'http://foo'},
                  {'value': 'http://baz'},
         ],
       }),
   ):
     self.assertEquals(expected, microformats2.object_urls(actor))
Ejemplo n.º 11
0
    def finish(self, auth_entity, state=None):
        if auth_entity:
            user_json = json.loads(auth_entity.user_json)

            # find instagram profile URL
            urls = user_json.get('rel-me', [])
            logging.info('rel-mes: %s', urls)
            for url in util.trim_nulls(urls):
                if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN:
                    username = urlparse.urlparse(url).path.strip('/')
                    break
            else:
                self.messages.add(
                    'No Instagram profile found. Please <a href="https://indieauth.com/setup">'
                    'add an Instagram rel-me link</a>, then try again.')
                return self.redirect('/')

            # check that instagram profile links to web site
            actor = gr_instagram.Instagram(scrape=True).get_actor(
                username, ignore_rate_limit=True)
            if not actor:
                self.messages.add(
                    "Couldn't find Instagram user '%s'. Please check your site's rel-me "
                    "link and your Instagram account." % username)
                return self.redirect('/')

            canonicalize = util.UrlCanonicalizer(redirects=False)
            website = canonicalize(auth_entity.key.id())
            urls = [canonicalize(u) for u in microformats2.object_urls(actor)]
            logging.info('Looking for %s in %s', website, urls)
            if website not in urls:
                self.messages.add(
                    "Please add %s to your Instagram profile's website or "
                    'bio field and try again.' % website)
                return self.redirect('/')

            # check that the instagram account is public
            if not gr_source.Source.is_public(actor):
                self.messages.add('Your Instagram account is private. '
                                  'Bridgy only supports public accounts.')
                return self.redirect('/')

        self.maybe_add_or_delete_source(Instagram,
                                        auth_entity,
                                        state,
                                        actor=actor)
Ejemplo n.º 12
0
    def _urls_and_domains(self, auth_entity, user_url):
        """Returns this user's valid (not webmention-blacklisted) URLs and domains.

    Converts the auth entity's user_json to an ActivityStreams actor and uses
    its 'urls' and 'url' fields. May be overridden by subclasses.

    Args:
      auth_entity: :class:`oauth_dropins.models.BaseAuth`
      user_url: string, optional URL passed in when authorizing

    Returns:
      ([string url, ...], [string domain, ...])
    """
        user = json_loads(auth_entity.user_json)
        actor = (
            user.get('actor')  # for Instagram; its user_json is IndieAuth
            or self.gr_source.user_to_actor(user))
        logging.debug('Extracting URLs and domains from actor: %s',
                      json_dumps(actor, indent=2))

        candidates = util.trim_nulls(
            util.uniquify([user_url] + microformats2.object_urls(actor)))

        if len(candidates) > MAX_AUTHOR_URLS:
            logging.info(
                'Too many profile links! Only resolving the first %s: %s',
                MAX_AUTHOR_URLS, candidates)

        urls = []
        for i, url in enumerate(candidates):
            resolved = self.resolve_profile_url(url,
                                                resolve=i < MAX_AUTHOR_URLS)
            if resolved:
                urls.append(resolved)

        final_urls = []
        domains = []
        for url in util.dedupe_urls(urls):  # normalizes domains to lower case
            # skip links on this source's domain itself. only currently needed for
            # Mastodon; the other silo domains are in the webmention blacklist.
            domain = util.domain_from_link(url)
            if domain != self.gr_source.DOMAIN:
                final_urls.append(url)
                domains.append(domain)

        return final_urls, domains
Ejemplo n.º 13
0
  def _urls_and_domains(self, auth_entity, user_url):
    """Returns this user's valid (not webmention-blacklisted) URLs and domains.

    Converts the auth entity's user_json to an ActivityStreams actor and uses
    its 'urls' and 'url' fields. May be overridden by subclasses.

    Args:
      auth_entity: :class:`oauth_dropins.models.BaseAuth`
      user_url: string, optional URL passed in when authorizing

    Returns:
      ([string url, ...], [string domain, ...])
    """
    actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json))
    logging.debug('Converted to actor: %s', json.dumps(actor, indent=2))

    candidates = util.trim_nulls(util.uniquify(
        [user_url] + microformats2.object_urls(actor)))

    if len(candidates) > MAX_AUTHOR_URLS:
      logging.info('Too many profile links! Only resolving the first %s: %s',
                   MAX_AUTHOR_URLS, candidates)

    urls = []
    for i, url in enumerate(candidates):
      final, domain, ok = util.get_webmention_target(url, resolve=i < MAX_AUTHOR_URLS)
      if ok:
        final = final.lower()
        if util.schemeless(final).startswith(util.schemeless(url.lower())):
          # redirected to a deeper path. use the original higher level URL. #652
          final = url
        # If final has a path segment check if root has a matching rel=me.
        match = re.match(r'^(https?://[^/]+)/.+', final)
        if match and i < MAX_AUTHOR_URLS:
          root = match.group(1)
          resp = util.requests_get(root)
          resp.raise_for_status()
          data = util.mf2py_parse(resp.text, root)
          me_urls = data.get('rels', {}).get('me', [])
          if final in me_urls:
            final = root
        urls.append(final)

    urls = util.dedupe_urls(urls)  # normalizes domains to lower case
    domains = [util.domain_from_link(url) for url in urls]
    return urls, domains
Ejemplo n.º 14
0
  def finish(self, auth_entity, state=None):
    if auth_entity:
      user_json = json.loads(auth_entity.user_json)

      # find instagram profile URL
      urls = user_json.get('rel-me', [])
      logging.info('rel-mes: %s', urls)
      for url in util.trim_nulls(urls):
        if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN:
          username = urlparse.urlparse(url).path.strip('/')
          break
      else:
        self.messages.add(
          'No Instagram profile found. Please <a href="https://indieauth.com/setup">'
          'add an Instagram rel-me link</a>, then try again.')
        return self.redirect_home_or_user_page(state)

      # check that instagram profile links to web site
      actor = gr_instagram.Instagram(scrape=True).get_actor(username)
      if not actor:
        self.messages.add(
          "Couldn't find Instagram user '%s'. Please check your site's rel-me "
          "link and your Instagram account." % username)
        return self.redirect_home_or_user_page(state)

      canonicalize = util.UrlCanonicalizer(redirects=False)
      website = canonicalize(auth_entity.key.id())
      urls = [canonicalize(u) for u in microformats2.object_urls(actor)]
      logging.info('Looking for %s in %s', website, urls)
      if website not in urls:
        self.messages.add("Please add %s to your Instagram profile's website or "
                          'bio field and try again.' % website)
        return self.redirect_home_or_user_page(state)

      # check that the instagram account is public
      if not gr_source.Source.is_public(actor):
        self.messages.add('Your Instagram account is private. '
                          'Bridgy only supports public accounts.')
        return self.redirect_home_or_user_page(state)

    source = self.maybe_add_or_delete_source(Instagram, auth_entity, state,
                                             actor=actor)
Ejemplo n.º 15
0
    def new(handler, auth_entity=None, actor=None, **kwargs):
        """Creates and returns an :class:`Instagram` for the logged in user.

    Args:
      handler: the current :class:`webapp2.RequestHandler`
      auth_entity: :class:`oauth_dropins.instagram.InstagramAuth`
    """
        user = json.loads(auth_entity.user_json)
        user['actor'] = actor
        auth_entity.user_json = json.dumps(user)
        auth_entity.put()

        username = actor['username']
        if not kwargs.get('features'):
            kwargs['features'] = ['listen']
        urls = microformats2.object_urls(actor)
        return Instagram(id=username,
                         auth_entity=auth_entity.key,
                         name=actor.get('displayName'),
                         picture=actor.get('image', {}).get('url'),
                         url=gr_instagram.Instagram.user_url(username),
                         domain_urls=urls,
                         domains=[util.domain_from_link(url) for url in urls],
                         **kwargs)
Ejemplo n.º 16
0
  def new(handler, auth_entity=None, actor=None, **kwargs):
    """Creates and returns a InstagramPage for the logged in user.

    Args:
      handler: the current RequestHandler
      auth_entity: oauth_dropins.instagram.InstagramAuth
    """
    user = json.loads(auth_entity.user_json)
    user['actor'] = actor
    auth_entity.user_json = json.dumps(user)
    auth_entity.put()

    username = actor['username']
    if not kwargs.get('features'):
      kwargs['features'] = ['listen']
    urls = microformats2.object_urls(actor)
    return Instagram(id=username,
                     auth_entity=auth_entity.key,
                     name=actor.get('displayName'),
                     picture=actor.get('image', {}).get('url'),
                     url=gr_instagram.Instagram.user_url(username),
                     domain_urls=urls,
                     domains=[util.domain_from_link(url) for url in urls],
                     **kwargs)