def preprocess_for_publish(self, obj): """Preprocess an object before trying to publish it. By default this tries to massage person tags so that the tag's "url" points to the person's profile on this service (as opposed to a person's homepage). The object is modified in place. Args: obj: ActivityStreams activity or object dict """ for tag in obj.get('tags', []): if tag.get('objectType') == 'person': silo_url = None for url in microformats2.object_urls(tag): silo_url = url and self.infer_profile_url(url) if silo_url: break if silo_url: tag['url'] = silo_url # recurse on contained object(s) for obj in util.get_list(obj, 'object'): self.preprocess_for_publish(obj)
def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blacklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: oauth_dropins.models.BaseAuth user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json)) logging.debug('Converted to actor: %s', json.dumps(actor, indent=2)) candidates = util.trim_nulls(util.uniquify( [user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.warning('Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): url, domain, send = util.get_webmention_target(url, resolve=i < MAX_AUTHOR_URLS) if send: urls.append(url) urls = util.dedupe_urls(urls) # normalizes domains to lower case domains = [util.domain_from_link(url) for url in urls] return urls, domains
def test_object_urls(self): for expected, actor in ( ([], {}), ([], { 'displayName': 'foo' }), ([], { 'url': None, 'urls': [] }), (['http://foo'], { 'url': 'http://foo' }), (['http://foo'], { 'urls': [{ 'value': 'http://foo' }] }), (['http://foo', 'https://bar', 'http://baz'], { 'url': 'http://foo', 'urls': [ { 'value': 'https://bar' }, { 'value': 'http://foo' }, { 'value': 'http://baz' }, ], }), ): self.assertEqual(expected, microformats2.object_urls(actor))
def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blacklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json)) logging.debug('Converted to actor: %s', json.dumps(actor, indent=2)) candidates = util.trim_nulls( util.uniquify([user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.warning( 'Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): url, domain, send = util.get_webmention_target( url, resolve=i < MAX_AUTHOR_URLS) if send: urls.append(url) urls = util.dedupe_urls(urls) # normalizes domains to lower case domains = [util.domain_from_link(url) for url in urls] return urls, domains
def check_token_for_actor(self, actor): """Checks that the given actor is public and matches the request's token. Raises: :class:`HTTPException` with HTTP 400 """ if not actor: self.abort(400, f'Missing actor!') if not gr_source.Source.is_public(actor): self.abort( 400, f'Your {self.gr_source().NAME} account is private. Bridgy only supports public accounts.' ) token = util.get_required_param(self, 'token') domains = set( util.domain_from_link(util.replace_test_domains_with_localhost(u)) for u in microformats2.object_urls(actor)) domains.discard(self.source_class().GR_CLASS.DOMAIN) logging.info(f'Checking token against domains {domains}') for domain in ndb.get_multi(ndb.Key(Domain, d) for d in domains): if domain and token in domain.tokens: return self.abort(403, f'Token {token} is not authorized for any of: {domains}')
def finish(self, auth_entity, state=None): if auth_entity: user_json = json.loads(auth_entity.user_json) # find instagram profile URL urls = user_json.get('rel-me', []) logging.info('rel-mes: %s', urls) for url in util.trim_nulls(urls): if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN: username = urllib.parse.urlparse(url).path.strip('/') break else: self.messages.add( 'No Instagram profile found. Please <a href="https://indieauth.com/setup">add an Instagram rel-me link</a>, then try again.' ) return self.redirect('/') # check that instagram profile links to web site try: actor = gr_instagram.Instagram(scrape=True).get_actor( username, ignore_rate_limit=True) except Exception as e: code, _ = util.interpret_http_exception(e) if code in Instagram.RATE_LIMIT_HTTP_CODES: self.messages.add( '<a href="https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427">Apologies, Instagram is temporarily blocking us.</a> Please try again later!' ) return self.redirect('/') else: raise if not actor: self.messages.add( "Couldn't find Instagram user '%s'. Please check your site's rel-me link and your Instagram account." % username) return self.redirect('/') canonicalize = util.UrlCanonicalizer(redirects=False) website = canonicalize(auth_entity.key.id()) urls = [canonicalize(u) for u in microformats2.object_urls(actor)] logging.info('Looking for %s in %s', website, urls) if website not in urls: self.messages.add( "Please add %s to your Instagram profile's website or bio field and try again." % website) return self.redirect('/') # check that the instagram account is public if not gr_source.Source.is_public(actor): self.messages.add( 'Your Instagram account is private. Bridgy only supports public accounts.' ) return self.redirect('/') self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)
def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blacklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json)) logging.debug('Converted to actor: %s', json.dumps(actor, indent=2)) candidates = util.trim_nulls( util.uniquify([user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.info( 'Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): final, domain, ok = util.get_webmention_target( url, resolve=i < MAX_AUTHOR_URLS) if ok: final = final.lower() if util.schemeless(final).startswith( util.schemeless(url.lower())): # redirected to a deeper path. use the original higher level URL. #652 final = url # If final has a path segment check if root has a matching rel=me. match = re.match(r'^(https?://[^/]+)/.+', final) if match and i < MAX_AUTHOR_URLS: root = match.group(1) resp = util.requests_get(root) resp.raise_for_status() data = util.mf2py_parse(resp.text, root) me_urls = data.get('rels', {}).get('me', []) if final in me_urls: final = root urls.append(final) urls = util.dedupe_urls(urls) # normalizes domains to lower case domains = [util.domain_from_link(url) for url in urls] return urls, domains
def urls_and_domains(self, auth_entity, user_url, actor=None, resolve_source_domain=True): """Returns this user's valid (not webmention-blocklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing actor: dict, optional AS actor for the user. If provided, overrides auth_entity resolve_source_domain: boolean, whether to follow redirects on URLs on this source's domain Returns: ([string url, ...], [string domain, ...]) """ if not actor: actor = self.gr_source.user_to_actor(json_loads(auth_entity.user_json)) logger.debug(f'Extracting URLs and domains from actor: {json_dumps(actor, indent=2)}') candidates = util.trim_nulls(util.uniquify( [user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logger.info(f'Too many profile links! Only resolving the first {MAX_AUTHOR_URLS}: {candidates}') urls = [] for i, url in enumerate(candidates): on_source_domain = util.domain_from_link(url) == self.gr_source.DOMAIN resolve = ((resolve_source_domain or not on_source_domain) and i < MAX_AUTHOR_URLS) resolved = self.resolve_profile_url(url, resolve=resolve) if resolved: urls.append(resolved) final_urls = [] domains = [] for url in util.dedupe_urls(urls): # normalizes domains to lower case # skip links on this source's domain itself. only currently needed for # Mastodon; the other silo domains are in the webmention blocklist. domain = util.domain_from_link(url) if domain != self.gr_source.DOMAIN: final_urls.append(url) domains.append(domain) return final_urls, domains
def test_object_urls(self): for expected, actor in ( ([], {}), ([], {'displayName': 'foo'}), ([], {'url': None, 'urls': []}), (['http://foo'], {'url': 'http://foo'}), (['http://foo'], {'urls': [{'value': 'http://foo'}]}), (['http://foo', 'https://bar', 'http://baz'], { 'url': 'http://foo', 'urls': [{'value': 'https://bar'}, {'value': 'http://foo'}, {'value': 'http://baz'}, ], }), ): self.assertEquals(expected, microformats2.object_urls(actor))
def finish(self, auth_entity, state=None): if auth_entity: user_json = json.loads(auth_entity.user_json) # find instagram profile URL urls = user_json.get('rel-me', []) logging.info('rel-mes: %s', urls) for url in util.trim_nulls(urls): if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN: username = urlparse.urlparse(url).path.strip('/') break else: self.messages.add( 'No Instagram profile found. Please <a href="https://indieauth.com/setup">' 'add an Instagram rel-me link</a>, then try again.') return self.redirect('/') # check that instagram profile links to web site actor = gr_instagram.Instagram(scrape=True).get_actor( username, ignore_rate_limit=True) if not actor: self.messages.add( "Couldn't find Instagram user '%s'. Please check your site's rel-me " "link and your Instagram account." % username) return self.redirect('/') canonicalize = util.UrlCanonicalizer(redirects=False) website = canonicalize(auth_entity.key.id()) urls = [canonicalize(u) for u in microformats2.object_urls(actor)] logging.info('Looking for %s in %s', website, urls) if website not in urls: self.messages.add( "Please add %s to your Instagram profile's website or " 'bio field and try again.' % website) return self.redirect('/') # check that the instagram account is public if not gr_source.Source.is_public(actor): self.messages.add('Your Instagram account is private. ' 'Bridgy only supports public accounts.') return self.redirect('/') self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)
def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blacklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ user = json_loads(auth_entity.user_json) actor = ( user.get('actor') # for Instagram; its user_json is IndieAuth or self.gr_source.user_to_actor(user)) logging.debug('Extracting URLs and domains from actor: %s', json_dumps(actor, indent=2)) candidates = util.trim_nulls( util.uniquify([user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.info( 'Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): resolved = self.resolve_profile_url(url, resolve=i < MAX_AUTHOR_URLS) if resolved: urls.append(resolved) final_urls = [] domains = [] for url in util.dedupe_urls(urls): # normalizes domains to lower case # skip links on this source's domain itself. only currently needed for # Mastodon; the other silo domains are in the webmention blacklist. domain = util.domain_from_link(url) if domain != self.gr_source.DOMAIN: final_urls.append(url) domains.append(domain) return final_urls, domains
def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blacklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json)) logging.debug('Converted to actor: %s', json.dumps(actor, indent=2)) candidates = util.trim_nulls(util.uniquify( [user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.info('Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): final, domain, ok = util.get_webmention_target(url, resolve=i < MAX_AUTHOR_URLS) if ok: final = final.lower() if util.schemeless(final).startswith(util.schemeless(url.lower())): # redirected to a deeper path. use the original higher level URL. #652 final = url # If final has a path segment check if root has a matching rel=me. match = re.match(r'^(https?://[^/]+)/.+', final) if match and i < MAX_AUTHOR_URLS: root = match.group(1) resp = util.requests_get(root) resp.raise_for_status() data = util.mf2py_parse(resp.text, root) me_urls = data.get('rels', {}).get('me', []) if final in me_urls: final = root urls.append(final) urls = util.dedupe_urls(urls) # normalizes domains to lower case domains = [util.domain_from_link(url) for url in urls] return urls, domains
def finish(self, auth_entity, state=None): if auth_entity: user_json = json.loads(auth_entity.user_json) # find instagram profile URL urls = user_json.get('rel-me', []) logging.info('rel-mes: %s', urls) for url in util.trim_nulls(urls): if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN: username = urlparse.urlparse(url).path.strip('/') break else: self.messages.add( 'No Instagram profile found. Please <a href="https://indieauth.com/setup">' 'add an Instagram rel-me link</a>, then try again.') return self.redirect_home_or_user_page(state) # check that instagram profile links to web site actor = gr_instagram.Instagram(scrape=True).get_actor(username) if not actor: self.messages.add( "Couldn't find Instagram user '%s'. Please check your site's rel-me " "link and your Instagram account." % username) return self.redirect_home_or_user_page(state) canonicalize = util.UrlCanonicalizer(redirects=False) website = canonicalize(auth_entity.key.id()) urls = [canonicalize(u) for u in microformats2.object_urls(actor)] logging.info('Looking for %s in %s', website, urls) if website not in urls: self.messages.add("Please add %s to your Instagram profile's website or " 'bio field and try again.' % website) return self.redirect_home_or_user_page(state) # check that the instagram account is public if not gr_source.Source.is_public(actor): self.messages.add('Your Instagram account is private. ' 'Bridgy only supports public accounts.') return self.redirect_home_or_user_page(state) source = self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)
def new(handler, auth_entity=None, actor=None, **kwargs): """Creates and returns an :class:`Instagram` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.instagram.InstagramAuth` """ user = json.loads(auth_entity.user_json) user['actor'] = actor auth_entity.user_json = json.dumps(user) auth_entity.put() username = actor['username'] if not kwargs.get('features'): kwargs['features'] = ['listen'] urls = microformats2.object_urls(actor) return Instagram(id=username, auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=gr_instagram.Instagram.user_url(username), domain_urls=urls, domains=[util.domain_from_link(url) for url in urls], **kwargs)
def new(handler, auth_entity=None, actor=None, **kwargs): """Creates and returns a InstagramPage for the logged in user. Args: handler: the current RequestHandler auth_entity: oauth_dropins.instagram.InstagramAuth """ user = json.loads(auth_entity.user_json) user['actor'] = actor auth_entity.user_json = json.dumps(user) auth_entity.put() username = actor['username'] if not kwargs.get('features'): kwargs['features'] = ['listen'] urls = microformats2.object_urls(actor) return Instagram(id=username, auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=gr_instagram.Instagram.user_url(username), domain_urls=urls, domains=[util.domain_from_link(url) for url in urls], **kwargs)