class Instagram(browser.BrowserSource): """An Instagram account. The key name is the username. Instagram usernames may have ASCII letters (case insensitive), numbers, periods, and underscores: https://stackoverflow.com/questions/15470180 """ GR_CLASS = gr_instagram.Instagram SHORT_NAME = 'instagram' OAUTH_START_HANDLER = oauth_instagram.StartHandler URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, subdomain='www', approve=r'https://www.instagram.com/p/[^/?]+/$', trailing_slash=True, headers=util.REQUEST_HEADERS) # no reject regexp; non-private Instagram post URLs just 404 # blank granary Instagram object, shared across all instances gr_source = gr_instagram.Instagram() @classmethod def key_id_from_actor(cls, actor): """Returns the actor's username field to be used as this entity's key id.""" return actor['username'] def silo_url(self): """Returns the Instagram account URL, e.g. https://instagram.com/foo.""" return self.gr_source.user_url(self.key.id()) def label_name(self): """Returns the username.""" return self.key_id()
def finish(self, auth_entity, state=None): if auth_entity: user_json = json.loads(auth_entity.user_json) # find instagram profile URL urls = user_json.get('rel-me', []) logging.info('rel-mes: %s', urls) for url in util.trim_nulls(urls): if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN: username = urllib.parse.urlparse(url).path.strip('/') break else: self.messages.add( 'No Instagram profile found. Please <a href="https://indieauth.com/setup">add an Instagram rel-me link</a>, then try again.' ) return self.redirect('/') # check that instagram profile links to web site try: actor = gr_instagram.Instagram(scrape=True).get_actor( username, ignore_rate_limit=True) except Exception as e: code, _ = util.interpret_http_exception(e) if code in Instagram.RATE_LIMIT_HTTP_CODES: self.messages.add( '<a href="https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427">Apologies, Instagram is temporarily blocking us.</a> Please try again later!' ) return self.redirect('/') else: raise if not actor: self.messages.add( "Couldn't find Instagram user '%s'. Please check your site's rel-me link and your Instagram account." % username) return self.redirect('/') canonicalize = util.UrlCanonicalizer(redirects=False) website = canonicalize(auth_entity.key.id()) urls = [canonicalize(u) for u in microformats2.object_urls(actor)] logging.info('Looking for %s in %s', website, urls) if website not in urls: self.messages.add( "Please add %s to your Instagram profile's website or bio field and try again." % website) return self.redirect('/') # check that the instagram account is public if not gr_source.Source.is_public(actor): self.messages.add( 'Your Instagram account is private. Bridgy only supports public accounts.' ) return self.redirect('/') self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)
class GitHub(Source): """A GitHub user. The key name is the GitHub username. """ GR_CLASS = gr_github.GitHub OAUTH_START_HANDLER = oauth_github.StartHandler SHORT_NAME = 'github' TYPE_LABELS = { 'post': 'issue', 'like': 'star', } BACKFEED_REQUIRES_SYNDICATION_LINK = True DISABLE_HTTP_CODES = Source.DISABLE_HTTP_CODES + ('403', ) CAN_PUBLISH = True URL_CANONICALIZER = util.UrlCanonicalizer(domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS, fragment=True) # This makes us backfeed issue/PR comments to previous comments on the same # issue/PR. IGNORE_SYNDICATION_LINK_FRAGMENTS = True @staticmethod def new(handler, auth_entity=None, **kwargs): """Creates and returns a :class:`GitHub` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.github.GitHubAuth` kwargs: property values """ user = json_loads(auth_entity.user_json) gr_source = gr_github.GitHub(access_token=auth_entity.access_token()) actor = gr_source.user_to_actor(user) return GitHub(id=auth_entity.key_id(), auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=actor.get('url'), **kwargs) def silo_url(self): """Returns the GitHub account URL, e.g. https://github.com/foo.""" return self.gr_source.user_url(self.key_id()) def label_name(self): """Returns the username.""" return self.key_id() def get_activities_response(self, *args, **kwargs): """Drop kwargs that granary doesn't currently support for github.""" kwargs.update({ 'fetch_shares': None, 'fetch_mentions': None, }) return self.gr_source.get_activities_response(*args, **kwargs)
class FakeSource(Source): GR_CLASS = FakeGrSource OAUTH_START_HANDLER = OAuthStartHandler SHORT_NAME = 'fake' TYPE_LABELS = {'post': 'FakeSource post label'} RATE_LIMITED_POLL = datetime.timedelta(hours=30) URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS) PATH_BLACKLIST = (re.compile('^/blocklisted/.*'),) HAS_BLOCKS = True string_id_counter = 1 gr_source = FakeGrSource() username = ndb.StringProperty() is_saved = False def is_beta_user(self): return True def silo_url(self): return 'http://fa.ke/profile/url' def feed_url(self): return 'fake feed url' def search_for_links(self): return copy.deepcopy(FakeGrSource.search_results) @classmethod def new(cls, handler, **props): id = None if 'url' not in props: props['url'] = 'http://fake/url' auth_entity = props.get('auth_entity') if auth_entity: props['auth_entity'] = auth_entity.key if auth_entity.user_json: user_obj = json_loads(auth_entity.user_json) if 'name' not in props: props['name'] = user_obj.get('name') id = user_obj.get('id') if not props.get('name'): props['name'] = 'fake' if not id: id = cls.string_id_counter cls.string_id_counter += 1 return cls(id=str(id), **props) def put(self, **kwargs): self.is_saved = True return super(FakeSource, self).put(**kwargs) @classmethod def next_key(cls): return ndb.Key(cls, str(cls.string_id_counter))
class GitHub(models.Source): """A GitHub user. The key name is the GitHub username. """ GR_CLASS = gr_github.GitHub SHORT_NAME = 'github' TYPE_LABELS = { 'post': 'issue', 'like': 'star', } URL_CANONICALIZER = util.UrlCanonicalizer(domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS) @staticmethod def new(handler, auth_entity=None, **kwargs): """Creates and returns a :class:`GitHub` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.github.GitHubAuth` kwargs: property values """ user = json.loads(auth_entity.user_json) gr_source = gr_github.GitHub(access_token=auth_entity.access_token()) actor = gr_source.user_to_actor(user) # temporary! util.email_me(subject='New Bridgy GitHub user!', body=json.dumps(auth_entity.user_json, indent=2)) return GitHub(id=auth_entity.key.id(), auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=actor.get('url'), **kwargs) def silo_url(self): """Returns the GitHub account URL, e.g. https://github.com/foo.""" return self.gr_source.user_url(self.key.id()) def label_name(self): """Returns the username.""" return self.key.id() def get_activities_response(self, *args, **kwargs): """Drop kwargs that granary doesn't currently support for github.""" kwargs.update({ 'fetch_shares': None, 'fetch_mentions': None, }) return self.gr_source.get_activities_response(*args, **kwargs)
class FakeSource(Source): GR_CLASS = FakeGrSource SHORT_NAME = 'fake' TYPE_LABELS = {'post': 'FakeSource post label'} RATE_LIMITED_POLL = datetime.timedelta(hours=30) URL_CANONICALIZER = util.UrlCanonicalizer(domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS) string_id_counter = 1 gr_source = FakeGrSource() username = ndb.StringProperty() is_saved = False def is_beta_user(self): return True def silo_url(self): return 'http://fa.ke/profile/url' def feed_url(self): return 'fake feed url' def poll_period(self): return (self.RATE_LIMITED_POLL if self.rate_limited else super( FakeSource, self).poll_period()) def search_for_links(self): return copy.deepcopy(FakeGrSource.search_results) @classmethod def new(cls, handler, **props): id = None if 'url' not in props: props['url'] = 'http://fake/url' auth_entity = props.get('auth_entity') if auth_entity: props['auth_entity'] = auth_entity.key if auth_entity.user_json: user_obj = json.loads(auth_entity.user_json) if 'name' not in props: props['name'] = user_obj.get('name') id = user_obj.get('id') if not props.get('name'): props['name'] = 'fake' if not id: id = str(cls.string_id_counter) cls.string_id_counter += 1 return cls(id=id, **props) def put(self, **kwargs): self.is_saved = True return super(FakeSource, self).put(**kwargs)
def finish(self, auth_entity, state=None): if auth_entity: user_json = json.loads(auth_entity.user_json) # find instagram profile URL urls = user_json.get('rel-me', []) logging.info('rel-mes: %s', urls) for url in util.trim_nulls(urls): if util.domain_from_link(url) == gr_instagram.Instagram.DOMAIN: username = urlparse.urlparse(url).path.strip('/') break else: self.messages.add( 'No Instagram profile found. Please <a href="https://indieauth.com/setup">' 'add an Instagram rel-me link</a>, then try again.') return self.redirect('/') # check that instagram profile links to web site actor = gr_instagram.Instagram(scrape=True).get_actor( username, ignore_rate_limit=True) if not actor: self.messages.add( "Couldn't find Instagram user '%s'. Please check your site's rel-me " "link and your Instagram account." % username) return self.redirect('/') canonicalize = util.UrlCanonicalizer(redirects=False) website = canonicalize(auth_entity.key.id()) urls = [canonicalize(u) for u in microformats2.object_urls(actor)] logging.info('Looking for %s in %s', website, urls) if website not in urls: self.messages.add( "Please add %s to your Instagram profile's website or " 'bio field and try again.' % website) return self.redirect('/') # check that the instagram account is public if not gr_source.Source.is_public(actor): self.messages.add('Your Instagram account is private. ' 'Bridgy only supports public accounts.') return self.redirect('/') self.maybe_add_or_delete_source(Instagram, auth_entity, state, actor=actor)
class Meetup(Source): GR_CLASS = gr_meetup.Meetup OAUTH_START_HANDLER = oauth_meetup.StartHandler SHORT_NAME = 'meetup' BACKFEED_REQUIRES_SYNDICATION_LINK = True CAN_LISTEN = False CAN_PUBLISH = True URL_CANONICALIZER = util.UrlCanonicalizer(domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS) @staticmethod def new(handler, auth_entity=None, **kwargs): """Creates and returns a :class:`Meetup` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.meetup.MeetupAuth` kwargs: property values """ user = json_loads(auth_entity.user_json) gr_source = gr_meetup.Meetup(access_token=auth_entity.access_token()) actor = gr_source.user_to_actor(user) return Meetup(id=auth_entity.key.id(), auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=actor.get('url'), **kwargs) def silo_url(self): """Returns the Meetup account URL, e.g. https://meetup.com/members/....""" return self.gr_source.user_url(self.key.id()) def label_name(self): """Returns the username.""" return self.name
def URL_CANONICALIZER(self): """Generate URL_CANONICALIZER dynamically to use the instance's domain.""" return util.UrlCanonicalizer(domain=self.gr_source.DOMAIN, headers=util.REQUEST_HEADERS)
class Flickr(models.Source): """A Flickr account. The key name is the nsid. """ # Fetching comments and likes is extremely request-intensive, so let's dial # back the frequency for now. FAST_POLL = datetime.timedelta(minutes=60) GR_CLASS = gr_flickr.Flickr OAUTH_START_HANDLER = oauth_flickr.StartHandler SHORT_NAME = 'flickr' TRANSIENT_ERROR_HTTP_CODES = ('400', ) CAN_PUBLISH = True URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, approve=r'https://www\.flickr\.com/(photos|people)/[^/?]+/([^/?]+/)?$', reject=r'https://login\.yahoo\.com/.*', subdomain='www', trailing_slash=True, headers=util.REQUEST_HEADERS) # unique name optionally used in URLs instead of nsid (e.g., # flickr.com/photos/username) username = ndb.StringProperty() @staticmethod def new(handler, auth_entity=None, **kwargs): """Creates and returns a :class:`Flickr` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.flickr.FlickrAuth` """ person = json_loads(auth_entity.user_json).get('person', {}) return Flickr( id=person.get('nsid'), auth_entity=auth_entity.key, name=person.get('realname', {}).get('_content'), # path_alias, if it exists, is the actual thing that shows up in the url. # I think this is an artifact of the conversion to Yahoo. username=(person.get('path_alias') or person.get('username', {}).get('_content')), picture='https://farm{}.staticflickr.com/{}/buddyicons/{}.jpg'. format(person.get('iconfarm'), person.get('iconserver'), person.get('nsid')), url=person.get('profileurl', {}).get('_content'), **kwargs) def silo_url(self): """Returns the Flickr account URL, e.g. https://www.flickr.com/people/foo/.""" return self.url def user_tag_id(self): """Returns the tag URI for this source, e.g. 'tag:flickr.com:123456'.""" return self.gr_source.tag_uri(self.username) def label_name(self): """Human-readable name, username, or id for this source.""" return self.name or self.username or self.key_id() def get_activities_response(self, *args, **kwargs): """Discard min_id because we still want new comments/likes on old photos.""" kwargs.setdefault('group_id', SELF) if 'min_id' in kwargs: del kwargs['min_id'] return self.gr_source.get_activities_response(*args, **kwargs) def canonicalize_url(self, url, activity=None, **kwargs): if not url.endswith('/'): url = url + '/' if self.username: url = url.replace('flickr.com/photos/%s/' % self.username, 'flickr.com/photos/%s/' % self.key_id()) url = url.replace('flickr.com/people/%s/' % self.username, 'flickr.com/people/%s/' % self.key_id()) return super(Flickr, self).canonicalize_url(url, **kwargs)
class Source(StringIdModel): """A silo account, e.g. a Facebook or Google+ account. Each concrete silo class should subclass this class. """ __metaclass__ = SourceMeta # Turn off NDB instance and memcache caching. # https://developers.google.com/appengine/docs/python/ndb/cache # https://github.com/snarfed/bridgy/issues/558 # https://github.com/snarfed/bridgy/issues/68 _use_cache = False STATUSES = ('enabled', 'disabled', 'error') # 'error' is deprecated POLL_STATUSES = ('ok', 'error', 'polling') FEATURES = ('listen', 'publish', 'webmention') # short name for this site type. used in URLs, etc. SHORT_NAME = None # the corresponding granary class GR_CLASS = None # how often to poll for responses FAST_POLL = datetime.timedelta(minutes=30) # how often to poll sources that have never sent a webmention SLOW_POLL = datetime.timedelta(days=1) # how often to poll sources that are currently rate limited by their silo RATE_LIMITED_POLL = SLOW_POLL # how long to wait after signup for a successful webmention before dropping to # the lower frequency poll FAST_POLL_GRACE_PERIOD = datetime.timedelta(days=7) # how often refetch author url to look for updated syndication links FAST_REFETCH = datetime.timedelta(hours=6) # refetch less often (this often) if it's been >2w since the last synd link SLOW_REFETCH = datetime.timedelta(days=2) # Maps Publish.type (e.g. 'like') to source-specific human readable type label # (e.g. 'favorite'). Subclasses should override this. TYPE_LABELS = {} # subclasses should override this URL_CANONICALIZER = util.UrlCanonicalizer(headers=util.REQUEST_HEADERS) created = ndb.DateTimeProperty(auto_now_add=True, required=True) url = ndb.StringProperty() status = ndb.StringProperty(choices=STATUSES, default='enabled') poll_status = ndb.StringProperty(choices=POLL_STATUSES, default='ok') rate_limited = ndb.BooleanProperty(default=False) name = ndb.StringProperty() # full human-readable name picture = ndb.StringProperty() domains = ndb.StringProperty(repeated=True) domain_urls = ndb.StringProperty(repeated=True) features = ndb.StringProperty(repeated=True, choices=FEATURES) superfeedr_secret = ndb.StringProperty() webmention_endpoint = ndb.StringProperty() # points to an oauth-dropins auth entity. The model class should be a subclass # of oauth_dropins.BaseAuth. # the token should be generated with the offline_access scope so that it # doesn't expire. details: http://developers.facebook.com/docs/authentication/ auth_entity = ndb.KeyProperty() # # listen-only properties # last_polled = ndb.DateTimeProperty(default=util.EPOCH) last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH) last_webmention_sent = ndb.DateTimeProperty() last_public_post = ndb.DateTimeProperty() recent_private_posts = ndb.IntegerProperty() # the last time we re-fetched the author's url looking for updated # syndication links last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH) # the last time we've seen a rel=syndication link for this Source. # we won't spend the time to re-fetch and look for updates if there's # never been one last_syndication_url = ndb.DateTimeProperty() # the last time we saw a syndication link in an h-feed, as opposed to just on # permalinks. background: https://github.com/snarfed/bridgy/issues/624 last_feed_syndication_url = ndb.DateTimeProperty() last_activity_id = ndb.StringProperty() last_activities_etag = ndb.StringProperty() last_activities_cache_json = ndb.TextProperty() seen_responses_cache_json = ndb.TextProperty(compressed=True) # maps updated property names to values that put_updates() writes back to the # datastore transactionally. set this to {} before beginning. updates = None # gr_source is *not* set to None by default here, since it needs to be unset # for __getattr__ to run when it's accessed. @classmethod def new(cls, handler, **kwargs): """Factory method. Creates and returns a new instance for the current user. To be implemented by subclasses. """ raise NotImplementedError() def __getattr__(self, name): """Lazily load the auth entity and instantiate :attr:`self.gr_source`. Once :attr:`self.gr_source` is set, this method will *not* be called; :attr:`gr_source` will be returned normally. """ if name == 'gr_source' and self.auth_entity: auth_entity = self.auth_entity.get() token = auth_entity.access_token() if not isinstance(token, tuple): token = (token, ) kwargs = {} if self.key.kind( ) == 'FacebookPage' and auth_entity.type == 'user': kwargs = {'user_id': self.key.id()} elif self.key.kind() == 'Instagram': kwargs = {'scrape': True} elif self.key.kind() == 'Twitter': kwargs = {'username': self.key.id()} self.gr_source = self.GR_CLASS(*token, **kwargs) return self.gr_source return getattr(super(Source, self), name) @classmethod def lookup(cls, id): """Returns the entity with the given id. By default, interprets id as just the key id. Subclasses may extend this to support usernames, etc. """ return ndb.Key(cls, id).get() def user_tag_id(self): """Returns the tag URI for this source, e.g. 'tag:plus.google.com:123456'.""" return self.gr_source.tag_uri(self.key.id()) def bridgy_path(self): """Returns the Bridgy page URL path for this source.""" return '/%s/%s' % (self.SHORT_NAME, self.key.string_id()) def bridgy_url(self, handler): """Returns the Bridgy page URL for this source.""" return handler.request.host_url + self.bridgy_path() def silo_url(self, handler): """Returns the silo account URL, e.g. https://twitter.com/foo.""" raise NotImplementedError() def label(self): """Human-readable label for this source.""" return '%s (%s)' % (self.label_name(), self.GR_CLASS.NAME) def label_name(self): """Human-readable name or username for this source, whichever is preferred.""" return self.name @classmethod @ndb.transactional def put_updates(cls, source): """Writes source.updates to the datastore transactionally. Returns: source: :class:`Source` Returns: the updated :class:`Source` """ if not source.updates: return source logging.info( 'Updating %s %s : %r', source.label(), source.bridgy_path(), { k: v for k, v in source.updates.items() if not k.endswith('_json') }) updates = source.updates source = source.key.get() source.updates = updates # because FacebookPage._pre_put_hook uses it for name, val in updates.items(): setattr(source, name, val) if source.status == 'error': # deprecated logging.warning('Resetting status from error to enabled') source.status = 'enabled' source.put() return source def poll_period(self): """Returns the poll frequency for this source, as a :class:`datetime.timedelta`. Defaults to ~15m, depending on silo. If we've never sent a webmention for this source, or the last one we sent was over a month ago, we drop them down to ~1d after a week long grace period. """ now = datetime.datetime.now() if self.rate_limited: return self.RATE_LIMITED_POLL elif now < self.created + self.FAST_POLL_GRACE_PERIOD: return self.FAST_POLL elif not self.last_webmention_sent: return self.SLOW_POLL elif self.last_webmention_sent > now - datetime.timedelta(days=7): return self.FAST_POLL elif self.last_webmention_sent > now - datetime.timedelta(days=30): return self.FAST_POLL * 10 else: return self.SLOW_POLL def should_refetch(self): """Returns True if we should run OPD refetch on this source now.""" now = datetime.datetime.now() if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER: return True elif not self.last_syndication_url: return False period = (self.FAST_REFETCH if self.last_syndication_url > now - datetime.timedelta(days=14) else self.SLOW_REFETCH) return self.last_poll_attempt >= self.last_hfeed_refetch + period @classmethod def bridgy_webmention_endpoint(cls, domain='brid.gy'): """Returns the Bridgy webmention endpoint for this source type.""" return 'https://%s/webmention/%s' % (domain, cls.SHORT_NAME) def has_bridgy_webmention_endpoint(self): """Returns True if this source uses Bridgy's webmention endpoint.""" return self.webmention_endpoint in (self.bridgy_webmention_endpoint(), self.bridgy_webmention_endpoint( domain='www.brid.gy')) def get_author_urls(self): """Determine the author urls for a particular source. In debug mode, replace test domains with localhost. Return: a list of string URLs, possibly empty """ return [ util.replace_test_domains_with_localhost(u) for u in self.domain_urls ] def search_for_links(self): """Searches for activities with links to any of this source's web sites. https://github.com/snarfed/bridgy/issues/456 https://github.com/snarfed/bridgy/issues/565 Returns: sequence of ActivityStreams activity dicts """ return [] def get_activities_response(self, **kwargs): """Returns recent posts and embedded comments for this source. May be overridden by subclasses. """ kwargs.setdefault('group_id', gr_source.SELF) resp = self.gr_source.get_activities_response(**kwargs) for activity in resp['items']: self._inject_user_urls(activity) return resp def get_activities(self, **kwargs): return self.get_activities_response(**kwargs)['items'] def get_comment(self, comment_id, **kwargs): """Returns a comment from this source. Passes through to granary by default. May be overridden by subclasses. Args: comment_id: string, site-specific comment id kwargs: passed to :meth:`granary.source.Source.get_comment` Returns: dict, decoded ActivityStreams comment object, or None """ comment = self.gr_source.get_comment(comment_id, **kwargs) if comment: self._inject_user_urls(comment) return comment def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs): """Returns an ActivityStreams 'like' activity object. Passes through to granary by default. May be overridden by subclasses. Args: activity_user_id: string id of the user who posted the original activity activity_id: string activity id like_user_id: string id of the user who liked the activity kwargs: passed to granary.Source.get_comment """ return self.gr_source.get_like(activity_user_id, activity_id, like_user_id, **kwargs) def _inject_user_urls(self, activity): """Adds this user's web site URLs to their user mentions (in tags), in place.""" obj = activity.get('object') or activity user_tag_id = self.user_tag_id() for tag in obj.get('tags', []): if tag.get('id') == user_tag_id: tag.setdefault('urls', []).extend([{ 'value': u } for u in self.domain_urls]) def create_comment(self, post_url, author_name, author_url, content): """Creates a new comment in the source silo. Must be implemented by subclasses. Args: post_url: string author_name: string author_url: string content: string Returns: response dict with at least 'id' field """ raise NotImplementedError() def feed_url(self): """Returns the RSS or Atom (or similar) feed URL for this source. Must be implemented by subclasses. Currently only implemented by :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. Returns: string URL """ raise NotImplementedError() def edit_template_url(self): """Returns the URL for editing this blog's template HTML. Must be implemented by subclasses. Currently only implemented by :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. Returns: string URL """ raise NotImplementedError() @classmethod def create_new(cls, handler, user_url=None, **kwargs): """Creates and saves a new :class:`Source` and adds a poll task for it. Args: handler: the current :class:`webapp2.RequestHandler` user_url: a string, optional. if provided, supersedes other urls when determining the author_url **kwargs: passed to :meth:`new()` """ source = cls.new(handler, **kwargs) if source is None: return None new_features = source.features or ['listen'] if not source.domain_urls: # defer to the source if it already set this auth_entity = kwargs.get('auth_entity') if auth_entity and hasattr(auth_entity, 'user_json'): source.domain_urls, source.domains = source._urls_and_domains( auth_entity, user_url) logging.debug('URLs/domains: %s %s', source.domain_urls, source.domains) if ('publish' in new_features and (not source.domain_urls or not source.domains)): handler.messages = { 'No valid web sites found in your %s profile. ' 'Please update it and try again!' % cls.GR_CLASS.NAME } return None # check if this source already exists existing = source.key.get() if existing: # merge some fields source.features = set(source.features + existing.features) source.populate(**existing.to_dict( include=('created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled', 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret'))) verb = 'Updated' else: verb = 'Added' author_urls = source.get_author_urls() link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0] if author_urls else 'http://indiewebify.me/#send-webmentions') blurb = '%s %s. %s' % ( verb, source.label(), { 'listen': "Refresh in a minute to see what we've found!", 'publish': 'Try previewing a post from your web site!', 'webmention': '<a href="%s">Try a webmention!</a>' % link, }.get(new_features[0], '')) logging.info('%s %s', blurb, source.bridgy_url(handler)) # uncomment to send email notification for each new user # if not existing: # util.email_me(subject=blurb, body=source.bridgy_url(handler)) source.verify() if source.verified(): handler.messages = {blurb} # TODO: ugh, *all* of this should be transactional source.put() if 'webmention' in source.features: superfeedr.subscribe(source, handler) if 'listen' in source.features: util.add_poll_task(source, now=True) util.add_poll_task(source, countdown=source.poll_period().total_seconds()) return source def verified(self): """Returns True if this source is ready to be used, false otherwise. See :meth:`verify()` for details. May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. """ if not self.domains or not self.domain_urls: return False if 'webmention' in self.features and not self.webmention_endpoint: return False if ('listen' in self.features and not (self.webmention_endpoint or self.last_webmention_sent)): return False return True def verify(self, force=False): """Checks that this source is ready to be used. For blog and listen sources, this fetches their front page HTML and discovers their webmention endpoint. For publish sources, this checks that they have a domain. May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. Args: force: if True, fully verifies (e.g. re-fetches the blog's HTML and performs webmention discovery) even we already think this source is verified. """ author_urls = self.get_author_urls() if ((self.verified() and not force) or self.status == 'disabled' or not self.features or not author_urls): return author_url = author_urls[0] logging.info('Attempting to discover webmention endpoint on %s', author_url) mention = send.WebmentionSend('https://brid.gy/', author_url) mention.requests_kwargs = { 'timeout': HTTP_TIMEOUT, 'headers': util.REQUEST_HEADERS } try: mention._discoverEndpoint() except BaseException: logging.info('Error discovering webmention endpoint', exc_info=True) mention.error = {'code': 'EXCEPTION'} self._fetched_html = getattr(mention, 'html', None) error = getattr(mention, 'error', None) endpoint = getattr(mention, 'receiver_endpoint', None) if error or not endpoint: logging.info("No webmention endpoint found: %s %r", error, endpoint) self.webmention_endpoint = None else: logging.info("Discovered webmention endpoint %s", endpoint) self.webmention_endpoint = endpoint self.put() def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blacklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ actor = self.gr_source.user_to_actor(json.loads(auth_entity.user_json)) logging.debug('Converted to actor: %s', json.dumps(actor, indent=2)) candidates = util.trim_nulls( util.uniquify([user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.info( 'Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): final, domain, ok = util.get_webmention_target( url, resolve=i < MAX_AUTHOR_URLS) if ok: final = final.lower() if util.schemeless(final).startswith( util.schemeless(url.lower())): # redirected to a deeper path. use the original higher level URL. #652 final = url # If final has a path segment check if root has a matching rel=me. match = re.match(r'^(https?://[^/]+)/.+', final) if match and i < MAX_AUTHOR_URLS: root = match.group(1) resp = util.requests_get(root) resp.raise_for_status() data = util.mf2py_parse(resp.text, root) me_urls = data.get('rels', {}).get('me', []) if final in me_urls: final = root urls.append(final) urls = util.dedupe_urls(urls) # normalizes domains to lower case domains = [util.domain_from_link(url) for url in urls] return urls, domains def canonicalize_url(self, url, activity=None, **kwargs): """Canonicalizes a post or object URL. Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`. """ return self.URL_CANONICALIZER( url, **kwargs) if self.URL_CANONICALIZER else url def infer_profile_url(self, url): """Given an arbitrary URL representing a person, try to find their profile URL for *this* service. Queries Bridgy's registered accounts for users with a particular domain in their silo profile. Args: url: string, a person's URL Return: a string URL for their profile on this service (or None) """ domain = util.domain_from_link(url) if domain == self.gr_source.DOMAIN: return url user = self.__class__.query(self.__class__.domains == domain).get() if user: return self.gr_source.user_url(user.key.id()) def preprocess_for_publish(self, obj): """Preprocess an object before trying to publish it. By default this tries to massage person tags so that the tag's "url" points to the person's profile on this service (as opposed to a person's homepage). The object is modified in place. Args: obj: ActivityStreams activity or object dict """ for tag in obj.get('tags', []): if tag.get('objectType') == 'person': silo_url = None for url in microformats2.object_urls(tag): silo_url = url and self.infer_profile_url(url) if silo_url: break if silo_url: tag['url'] = silo_url # recurse on contained object(s) for obj in util.get_list(obj, 'object'): self.preprocess_for_publish(obj) def on_new_syndicated_post(self, syndpost): """Called when a new :class:`SyndicatedPost` is stored for this source. Args: syndpost: :class:`SyndicatedPost` """ pass def is_private(self): """Returns True if this source is private aka protected. ...ie their posts are not public. """ return False def is_activity_public(self, activity): """Returns True if the given activity is public, False otherwise. Just wraps :meth:`granary.source.Source.is_public`. Subclasses may override. """ return gr_source.Source.is_public(activity) def is_beta_user(self): """Returns True if this is a "beta" user opted into new features. Beta users come from beta_users.txt. """ return self.bridgy_path() in util.BETA_USER_PATHS def is_blocked(self, obj): """Returns True if an object's author is being blocked. ...ie they're in this user's block list. """ return False
class Instagram(Source): """An Instagram account. The key name is the username. Instagram usernames may have ASCII letters (case insensitive), numbers, periods, and underscores: https://stackoverflow.com/questions/15470180 """ GR_CLASS = gr_instagram.Instagram SHORT_NAME = 'instagram' FAST_POLL = datetime.timedelta(minutes=120) RATE_LIMITED_POLL = Source.SLOW_POLL RATE_HTTP_LIMIT_CODES = Source.RATE_LIMIT_HTTP_CODES + ('503', ) URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, subdomain='www', approve=r'https://www.instagram.com/p/[^/?]+/$', trailing_slash=True, headers=util.REQUEST_HEADERS) # no reject regexp; non-private Instagram post URLs just 404 @staticmethod def new(handler, auth_entity=None, actor=None, **kwargs): """Creates and returns an :class:`Instagram` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.instagram.InstagramAuth` """ user = json.loads(auth_entity.user_json) user['actor'] = actor auth_entity.user_json = json.dumps(user) auth_entity.put() username = actor['username'] if not kwargs.get('features'): kwargs['features'] = ['listen'] urls = microformats2.object_urls(actor) return Instagram(id=username, auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=gr_instagram.Instagram.user_url(username), domain_urls=urls, domains=[util.domain_from_link(url) for url in urls], **kwargs) def silo_url(self): """Returns the Instagram account URL, e.g. https://instagram.com/foo.""" return self.url def user_tag_id(self): """Returns the tag URI for this source, e.g. 'tag:instagram.com:123456'.""" user = json.loads(self.auth_entity.get().user_json) return (user.get('actor', {}).get('id') or self.gr_source.tag_uri(user.get('id') or self.key.id())) def label_name(self): """Returns the username.""" return self.key.id() def get_activities_response(self, *args, **kwargs): """Set user_id because scraping requires it.""" kwargs.setdefault('group_id', gr_source.SELF) kwargs.setdefault('user_id', self.key.id()) return self.gr_source.get_activities_response(*args, **kwargs)
class FacebookPage(models.Source): """A Facebook profile or page. The key name is the Facebook id. """ GR_CLASS = gr_facebook.Facebook SHORT_NAME = 'facebook' URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, subdomain='www', query=True, approve=r'https://www\.facebook\.com/[^/?]+/posts/[^/?]+$', headers=util.REQUEST_HEADERS) # no reject regexp; non-private FB post URLs just 404 # unique name used in FB URLs, e.g. facebook.com/[username] username = ndb.StringProperty() # inferred from syndication URLs if username isn't available inferred_username = ndb.StringProperty() # inferred application-specific user IDs (from other applications) inferred_user_ids = ndb.StringProperty(repeated=True) # maps string FB post id to string FB object id or None. background: # https://github.com/snarfed/bridgy/pull/513#issuecomment-149312879 resolved_object_ids_json = ndb.TextProperty(compressed=True) # maps string FB post id to True or False for whether the post is public # or private. only contains posts with *known* privacy. background: # https://github.com/snarfed/bridgy/issues/633#issuecomment-198806909 post_publics_json = ndb.TextProperty(compressed=True) @staticmethod def new(handler, auth_entity=None, **kwargs): """Creates and returns a :class:`FacebookPage` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.facebook.FacebookAuth` kwargs: property values """ user = json.loads(auth_entity.user_json) gr_source = gr_facebook.Facebook(access_token=auth_entity.access_token()) actor = gr_source.user_to_actor(user) return FacebookPage(id=user['id'], auth_entity=auth_entity.key, name=actor.get('displayName'), username=actor.get('username'), picture=actor.get('image', {}).get('url'), url=actor.get('url'), **kwargs) @classmethod def lookup(cls, id): """Returns the entity with the given id or username.""" return ndb.Key(cls, id).get() or cls.query(cls.username == id).get() def silo_url(self): """Returns the Facebook account URL, e.g. https://facebook.com/foo. Facebook profile URLS with app-scoped user ids (eg www.facebook.com/ID) no longer work as of April 2018, so if that's all we have, return None instead. https://developers.facebook.com/blog/post/2018/04/19/facebook-login-changes-address-abuse/ """ if self.username or self.inferred_username: return self.gr_source.user_url(self.username or self.inferred_username) for id in [self.key.id()] + self.inferred_user_ids: if util.is_int(id) and int(id) < MIN_APP_SCOPED_ID: return self.gr_source.user_url(id) def get_activities_response(self, **kwargs): type = self.auth_entity.get().type kwargs.setdefault('fetch_events', True) kwargs.setdefault('fetch_news', type == 'user') kwargs.setdefault('event_owner_id', self.key.id()) try: activities = super(FacebookPage, self).get_activities_response(**kwargs) except urllib2.HTTPError as e: code, body = util.interpret_http_exception(e) # use a function so any new exceptions (JSON decoding, missing keys) don't # clobber the original exception so we can re-raise it below. def dead_token(): try: err = json.loads(body)['error'] return (err.get('code') in DEAD_TOKEN_ERROR_CODES or err.get('error_subcode') in DEAD_TOKEN_ERROR_SUBCODES or err.get('message') in DEAD_TOKEN_ERROR_MESSAGES) except: logging.warning("Couldn't determine whether token is still valid", exc_info=True) return False if code == '401': if not dead_token() and type == 'user': # ask the user to reauthenticate. if this API call fails, it will raise # urllib2.HTTPError instead of DisableSource, so that we don't disable # the source without notifying. # # TODO: for pages, fetch the owners/admins and notify them. self.gr_source.create_notification( self.key.id(), "Bridgy's access to your account has expired. Click here to renew it now!", 'https://brid.gy/facebook/start') raise models.DisableSource() raise # update the resolved_object_ids and post_publics caches def parsed_post_id(id): parsed = gr_facebook.Facebook.parse_id(id) return parsed.post if parsed.post else id resolved = self._load_cache('resolved_object_ids') for activity in activities['items']: obj = activity.get('object', {}) obj_id = parsed_post_id(obj.get('fb_id')) ids = obj.get('fb_object_for_ids') if obj_id and ids: resolved[obj_id] = obj_id for id in ids: resolved[parsed_post_id(id)] = obj_id for activity in activities['items']: self.is_activity_public(activity) return activities def canonicalize_url(self, url, activity=None, **kwargs): """Facebook-specific standardization of syndicated urls. Canonical form is https://www.facebook.com/USERID/posts/POSTID Args: url: a string, the url of the syndicated content activity: the activity this URL came from. If it has an fb_object_id, we'll use that instead of fetching the post from Facebook kwargs: unused Return: a string, the canonical form of the syndication url """ if util.domain_from_link(url) != self.gr_source.DOMAIN: return None def post_url(id): return 'https://www.facebook.com/%s/posts/%s' % (self.key.id(), id) parsed = urlparse.urlparse(url) params = urlparse.parse_qs(parsed.query) url_id = self.gr_source.post_id(url) ids = params.get('story_fbid') or params.get('fbid') if ids: url = post_url(ids[0]) elif url_id: if parsed.path.startswith('/notes/'): url = post_url(url_id) else: object_id = self.cached_resolve_object_id(url_id, activity=activity) if object_id: url = post_url(object_id) for alternate_id in util.trim_nulls(itertools.chain( (self.username or self.inferred_username,), self.inferred_user_ids)): url = url.replace('facebook.com/%s/' % alternate_id, 'facebook.com/%s/' % self.key.id()) return super(FacebookPage, self).canonicalize_url(url) def cached_resolve_object_id(self, post_id, activity=None): """Resolve a post id to its Facebook object id, if any. Wraps :meth:`granary.facebook.Facebook.resolve_object_id()` and uses self.resolved_object_ids_json as a cache. Args: post_id: string Facebook post id activity: optional AS activity representation of Facebook post Returns: string Facebook object id or None """ parsed = gr_facebook.Facebook.parse_id(post_id) if parsed.post: post_id = parsed.post resolved = self._load_cache('resolved_object_ids') if post_id not in resolved: resolved[post_id] = self.gr_source.resolve_object_id( self.key.id(), post_id, activity=activity) return resolved[post_id] def is_activity_public(self, activity): """Returns True if the given activity is public, False otherwise. Uses the :attr:`post_publics_json` cache if we can't tell otherwise. """ obj = activity.get('object', {}) fb_id = activity.get('fb_id') or obj.get('fb_id') if fb_id and gr_source.object_type(activity) not in ('comment', 'like', 'share'): fb_id = self.cached_resolve_object_id(fb_id, activity=activity) post_publics = self._load_cache('post_publics') public = gr_source.Source.is_public(activity) if not fb_id: return public elif public is not None: post_publics[fb_id] = public # write cache return public else: return post_publics.get(fb_id) # read cache def _load_cache(self, name): """Loads resolved_object_ids_json or post_publics_json into self.updates.""" assert name in ('resolved_object_ids', 'post_publics') field = getattr(self, name + '_json') if self.updates is None: self.updates = {} loaded = self.updates.setdefault(name, {}) if not loaded and field: loaded = self.updates[name] = json.loads(field) return loaded def _save_cache(self, name): """Writes resolved_object_ids or post_publics from self.updates to _json.""" if self.updates is None: return assert name in ('resolved_object_ids', 'post_publics') max = globals()['MAX_' + name.upper()] val = self.updates.get(name) if val: keep = heapq.nlargest(max, (int(id) if util.is_int(id) else str(id) for id in val.keys())) setattr(self, name + '_json', json.dumps({str(id): val[str(id)] for id in keep})) def _pre_put_hook(self): """Encode the resolved_object_ids and post_publics fields from updates. ...and cap them at MAX_RESOLVED_OBJECT_IDS and MAX_POST_PUBLICS. Tries to keep the latest ones by assuming that ids are roughly monotonically increasing. """ self._save_cache('resolved_object_ids') self._save_cache('post_publics') def infer_profile_url(self, url): """Find a Facebook profile URL (ideally the one with the user's numeric ID) Looks up existing sources by username, inferred username, and domain. Args: url: string, a person's URL Return: a string URL for their Facebook profile (or None) """ domain = util.domain_from_link(url) if domain == self.gr_source.DOMAIN: username = urlparse.urlparse(url).path.strip('/') if '/' not in username: user = FacebookPage.query(ndb.OR( FacebookPage.username == username, FacebookPage.inferred_username == username)).get() if user: return self.gr_source.user_url(user.key.id()) return super(FacebookPage, self).infer_profile_url(url) @ndb.transactional def on_new_syndicated_post(self, syndpost): """If this source has no username, try to infer one from a syndication URL. Args: syndpost: :class:`models.SyndicatedPost` """ url = syndpost.syndication if self.username or not url: return # FB usernames only have letters, numbers, and periods: # https://www.facebook.com/help/105399436216001 author_id = self.gr_source.base_object({'object': {'url': url}})\ .get('author', {}).get('id') if author_id: if author_id != self.inferred_username and not util.is_int(author_id): logging.info('Inferring username %s from syndication url %s', author_id, url) self.inferred_username = author_id self.put() syndpost.syndication = self.canonicalize_url(syndpost.syndication) elif author_id != self.key.id() and author_id not in self.inferred_user_ids: logging.info('Inferring app-scoped user id %s from syndication url %s', author_id, url) self.inferred_user_ids = util.uniquify(self.inferred_user_ids + [author_id]) self.put() syndpost.syndication = self.canonicalize_url(syndpost.syndication)
class GitHub(Source): """A GitHub user. The key name is the GitHub username. """ GR_CLASS = gr_github.GitHub OAUTH_START = oauth_github.Start SHORT_NAME = 'github' TYPE_LABELS = { 'post': 'issue', 'like': 'star', } BACKFEED_REQUIRES_SYNDICATION_LINK = True DISABLE_HTTP_CODES = Source.DISABLE_HTTP_CODES + ('403', ) CAN_PUBLISH = True URL_CANONICALIZER = util.UrlCanonicalizer(domain=GR_CLASS.DOMAIN, fragment=True) # This makes us backfeed issue/PR comments to previous comments on the same # issue/PR. IGNORE_SYNDICATION_LINK_FRAGMENTS = True USERNAME_KEY_ID = True @staticmethod def new(auth_entity=None, **kwargs): """Creates and returns a :class:`GitHub` for the logged in user. Args: auth_entity: :class:`oauth_dropins.github.GitHubAuth` kwargs: property values """ assert 'username' not in kwargs assert 'id' not in kwargs user = json_loads(auth_entity.user_json) gr_source = gr_github.GitHub(access_token=auth_entity.access_token()) actor = gr_source.user_to_actor(user) return GitHub(username=auth_entity.key_id(), auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=actor.get('url'), **kwargs) def silo_url(self): """Returns the GitHub account URL, e.g. https://github.com/foo.""" return self.gr_source.user_url(self.username) def label_name(self): """Returns the username.""" return self.username def user_tag_id(self): """Returns this user's tag URI, eg 'tag:github.com:2013,MDQ6VXNlcjc3OD='.""" id = json_loads(self.auth_entity.get().user_json)['id'] return self.gr_source.tag_uri(id) def get_activities_response(self, *args, **kwargs): """Override/drop a few kwargs.""" kwargs.update({ 'fetch_shares': None, 'fetch_mentions': None, 'count': min(10, kwargs.get('count', 0)), }) return self.gr_source.get_activities_response(*args, **kwargs)
class GooglePlusPage(models.Source): """A Google+ profile or page. The key name is the user id. """ GR_CLASS = gr_googleplus.GooglePlus SHORT_NAME = 'googleplus' URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, approve=r'https://plus\.google\.com/[^/?]+/posts/[^/?]+$', headers=util.REQUEST_HEADERS) # no reject regexp; non-private G+ post URLs just 404 # We're currently close to the G+ API's daily limit of 10k requests per day. # So low! :/ Usage history: # QPS: https://cloud.google.com/console/project/1029605954231 # Today's quota usage: https://code.google.com/apis/console/b/0/?noredirect#project:1029605954231:quotas # Daily total usage: https://code.google.com/apis/console/b/0/?pli=1#project:1029605954231:stats # API quotas are refilled daily. Use 30h to make sure we're over a day even # after the randomized task ETA. RATE_LIMITED_POLL = datetime.timedelta(hours=30) type = ndb.StringProperty(choices=('user', 'page')) @staticmethod def new(handler, auth_entity=None, **kwargs): """Creates and returns a :class:`GooglePlusPage` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.googleplus.GooglePlusAuth` """ # Google+ Person resource # https://developers.google.com/+/api/latest/people#resource user = json.loads(auth_entity.user_json) type = 'user' if user.get('objectType', 'person') == 'person' else 'page' # override the sz param to ask for a 128x128 image. if there's an existing # sz query param (there usually is), the new one will come afterward and # override it. picture = user.get('image', {}).get('url') picture = util.add_query_params(picture, {'sz': '128'}) return GooglePlusPage(id=user['id'], auth_entity=auth_entity.key, url=user.get('url'), name=user.get('displayName'), picture=picture, type=type, **kwargs) def silo_url(self): """Returns the Google+ account URL, e.g. https://plus.google.com/+Foo.""" return self.url def __getattr__(self, name): """Overridden to pass auth_entity to :class:`granary.googleplus.GooglePlus`.""" if name == 'gr_source' and self.auth_entity: self.gr_source = gr_googleplus.GooglePlus( auth_entity=self.auth_entity.get()) return self.gr_source return getattr(super(GooglePlusPage, self), name) def search_for_links(self): """Searches for activities with links to any of this source's web sites. Only searches for root domain web site URLs! Skips URLs with paths; they tend to generate false positive results in G+'s search. Not sure why yet. G+ search supports OR: https://developers.google.com/+/api/latest/activities/search Returns: sequence of ActivityStreams activity dicts """ urls = [ '"%s"' % util.fragmentless(url) for url in self.domain_urls if not util.in_webmention_blacklist(util.domain_from_link(url)) and urlparse.urlparse(url).path in ('', '/') ][:models.MAX_AUTHOR_URLS] if urls: return self.get_activities(search_query=' OR '.join(urls), group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50) return []
class Instagram(models.Source): """An Instagram account. The key name is the username. """ GR_CLASS = gr_instagram.Instagram SHORT_NAME = 'instagram' FAST_POLL = datetime.timedelta(minutes=60) URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, subdomain='www', approve=r'https://www.instagram.com/p/[^/?]+/', trailing_slash=True, headers=util.USER_AGENT_HEADER) # no reject regexp; non-private Instagram post URLs just 404 @staticmethod def new(handler, auth_entity=None, actor=None, **kwargs): """Creates and returns a InstagramPage for the logged in user. Args: handler: the current RequestHandler auth_entity: oauth_dropins.instagram.InstagramAuth """ user = json.loads(auth_entity.user_json) user['actor'] = actor auth_entity.user_json = json.dumps(user) auth_entity.put() username = actor['username'] if not kwargs.get('features'): kwargs['features'] = ['listen'] urls = microformats2.object_urls(actor) return Instagram(id=username, auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=gr_instagram.Instagram.user_url(username), domain_urls=urls, domains=[util.domain_from_link(url) for url in urls], **kwargs) def silo_url(self): """Returns the Instagram account URL, e.g. https://instagram.com/foo.""" return self.url def user_tag_id(self): """Returns the tag URI for this source, e.g. 'tag:instagram.com:123456'.""" user = json.loads(self.auth_entity.get().user_json) return self.gr_source.tag_uri(user.get('id') or self.key.id()) def label_name(self): """Returns the username.""" return self.key.id() def get_activities_response(self, *args, **kwargs): """Set user_id because scraping requires it.""" kwargs.setdefault('group_id', gr_source.SELF) kwargs.setdefault('user_id', self.key.id()) return self.gr_source.get_activities_response(*args, **kwargs)
class Source(StringIdModel, metaclass=SourceMeta): """A silo account, e.g. a Facebook or Google+ account. Each concrete silo class should subclass this class. """ # Turn off NDB instance and memcache caching. # https://developers.google.com/appengine/docs/python/ndb/cache # https://github.com/snarfed/bridgy/issues/558 # https://github.com/snarfed/bridgy/issues/68 _use_cache = False STATUSES = ('enabled', 'disabled') POLL_STATUSES = ('ok', 'error', 'polling') FEATURES = ('listen', 'publish', 'webmention', 'email') # short name for this site type. used in URLs, etc. SHORT_NAME = None # the corresponding granary class GR_CLASS = None # oauth-dropins Start class OAUTH_START = None # whether Bridgy supports listen for this silo - this is unlikely, so we default to True CAN_LISTEN = True # whether Bridgy supports publish for this silo CAN_PUBLISH = None # whether this source should poll automatically, or only when triggered # (eg Instagram) AUTO_POLL = True # how often to poll for responses FAST_POLL = timedelta(minutes=30) # how often to poll sources that have never sent a webmention SLOW_POLL = timedelta(days=1) # how often to poll sources that are currently rate limited by their silo RATE_LIMITED_POLL = SLOW_POLL # how long to wait after signup for a successful webmention before dropping to # the lower frequency poll FAST_POLL_GRACE_PERIOD = timedelta(days=7) # how often refetch author url to look for updated syndication links FAST_REFETCH = timedelta(hours=6) # refetch less often (this often) if it's been >2w since the last synd link SLOW_REFETCH = timedelta(days=2) # rate limiting HTTP status codes returned by this silo. e.g. twitter returns # 429, instagram 503, google+ 403. RATE_LIMIT_HTTP_CODES = ('429',) DISABLE_HTTP_CODES = ('401',) TRANSIENT_ERROR_HTTP_CODES = () # whether granary supports fetching block lists HAS_BLOCKS = False # whether to require a u-syndication link for backfeed BACKFEED_REQUIRES_SYNDICATION_LINK = False # ignore fragments when comparing syndication links in OPD IGNORE_SYNDICATION_LINK_FRAGMENTS = False # convert username to all lower case to use as key name USERNAME_KEY_ID = False # Maps Publish.type (e.g. 'like') to source-specific human readable type label # (e.g. 'favorite'). Subclasses should override this. TYPE_LABELS = {} # subclasses should override this URL_CANONICALIZER = util.UrlCanonicalizer() # Regexps for URL paths that don't accept incoming webmentions. Currently used # by Blogger. PATH_BLOCKLIST = () created = ndb.DateTimeProperty(auto_now_add=True, required=True, tzinfo=timezone.utc) url = ndb.StringProperty() username = ndb.StringProperty() status = ndb.StringProperty(choices=STATUSES, default='enabled') poll_status = ndb.StringProperty(choices=POLL_STATUSES, default='ok') rate_limited = ndb.BooleanProperty(default=False) name = ndb.StringProperty() # full human-readable name picture = ndb.StringProperty() domains = ndb.StringProperty(repeated=True) domain_urls = ndb.StringProperty(repeated=True) features = ndb.StringProperty(repeated=True, choices=FEATURES) superfeedr_secret = ndb.StringProperty() webmention_endpoint = ndb.StringProperty() # points to an oauth-dropins auth entity. The model class should be a subclass # of oauth_dropins.BaseAuth. the token should be generated with the # offline_access scope so that it doesn't expire. auth_entity = ndb.KeyProperty() # # listen-only properties # last_polled = ndb.DateTimeProperty(default=util.EPOCH, tzinfo=timezone.utc) last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH, tzinfo=timezone.utc) last_webmention_sent = ndb.DateTimeProperty(tzinfo=timezone.utc) last_public_post = ndb.DateTimeProperty(tzinfo=timezone.utc) recent_private_posts = ndb.IntegerProperty(default=0) # the last time we re-fetched the author's url looking for updated # syndication links last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH, tzinfo=timezone.utc) # the last time we've seen a rel=syndication link for this Source. # we won't spend the time to re-fetch and look for updates if there's # never been one last_syndication_url = ndb.DateTimeProperty(tzinfo=timezone.utc) # the last time we saw a syndication link in an h-feed, as opposed to just on # permalinks. background: https://github.com/snarfed/bridgy/issues/624 last_feed_syndication_url = ndb.DateTimeProperty(tzinfo=timezone.utc) last_activity_id = ndb.StringProperty() last_activities_etag = ndb.StringProperty() last_activities_cache_json = ndb.TextProperty() seen_responses_cache_json = ndb.TextProperty(compressed=True) # populated in Poll.poll(), used by handlers blocked_ids = ndb.JsonProperty(compressed=True) # maps updated property names to values that put_updates() writes back to the # datastore transactionally. set this to {} before beginning. updates = None # gr_source is *not* set to None by default here, since it needs to be unset # for __getattr__ to run when it's accessed. def __init__(self, *args, id=None, **kwargs): """Constructor. Escapes the key string id if it starts with `__`.""" username = kwargs.get('username') if self.USERNAME_KEY_ID and username and not id: id = username.lower() if id and id.startswith('__'): id = '\\' + id super().__init__(*args, id=id, **kwargs) def key_id(self): """Returns the key's unescaped string id.""" id = self.key.id() return id[1:] if id[0] == '\\' else id @classmethod def new(cls, **kwargs): """Factory method. Creates and returns a new instance for the current user. To be implemented by subclasses. """ raise NotImplementedError() def __getattr__(self, name): """Lazily load the auth entity and instantiate :attr:`self.gr_source`. Once :attr:`self.gr_source` is set, this method will *not* be called; :attr:`gr_source` will be returned normally. """ if name != 'gr_source': return getattr(super(), name) super_attr = getattr(super(), name, None) if super_attr: return super_attr elif not self.auth_entity: return None auth_entity = self.auth_entity.get() try: refresh_token = auth_entity.refresh_token self.gr_source = self.GR_CLASS(refresh_token) return self.gr_source except AttributeError: logger.info('no refresh_token') args = auth_entity.access_token() if not isinstance(args, tuple): args = (args,) kwargs = {} if self.key.kind() == 'FacebookPage' and auth_entity.type == 'user': kwargs = {'user_id': self.key_id()} elif self.key.kind() == 'Instagram': kwargs = {'scrape': True, 'cookie': INSTAGRAM_SESSIONID_COOKIE} elif self.key.kind() == 'Mastodon': args = (auth_entity.instance(),) + args inst = auth_entity.app.get().instance_info kwargs = { 'user_id': json_loads(auth_entity.user_json).get('id'), # https://docs-develop.pleroma.social/backend/API/differences_in_mastoapi_responses/#instance 'truncate_text_length': json_loads(inst).get('max_toot_chars') if inst else None, } elif self.key.kind() == 'Twitter': kwargs = {'username': self.key_id(), 'scrape_headers': TWITTER_SCRAPE_HEADERS} self.gr_source = self.GR_CLASS(*args, **kwargs) return self.gr_source @classmethod def lookup(cls, id): """Returns the entity with the given id. By default, interprets id as just the key id. Subclasses may extend this to support usernames, etc. TODO: if USERNAME_KEY_ID, normalize to lower case before looking up. Need to wait until we've backfilled all existing entities with upper case key ids. """ if id and id.startswith('__'): id = '\\' + id return ndb.Key(cls, id).get() def user_tag_id(self): """Returns the tag URI for this source, e.g. 'tag:plus.google.com:123456'.""" return self.gr_source.tag_uri(self.key_id()) def bridgy_path(self): """Returns the Bridgy page URL path for this source.""" return f'/{self.SHORT_NAME}/{self.key_id()}' def bridgy_url(self): """Returns the Bridgy page URL for this source.""" return util.host_url(self.bridgy_path()) def silo_url(self, handler): """Returns the silo account URL, e.g. https://twitter.com/foo.""" raise NotImplementedError() def label(self): """Human-readable label for this source.""" return f'{self.label_name()} ({self.GR_CLASS.NAME})' def label_name(self): """Human-readable name or username for this source, whichever is preferred.""" return self.name or self.key_id() @classmethod @ndb.transactional() def put_updates(cls, source): """Writes source.updates to the datastore transactionally. Returns: source: :class:`Source` Returns: the updated :class:`Source` """ if not source.updates: return source to_log = {k: v for k, v in source.updates.items() if not k.endswith('_json')} logger.info(f'Updating {source.label()} {source.bridgy_path()} : {to_log!r}') updates = source.updates source = source.key.get() source.updates = updates for name, val in updates.items(): setattr(source, name, val) source.put() return source def poll_period(self): """Returns the poll frequency for this source, as a :class:`datetime.timedelta`. Defaults to ~15m, depending on silo. If we've never sent a webmention for this source, or the last one we sent was over a month ago, we drop them down to ~1d after a week long grace period. """ now = util.now_fn() if self.rate_limited: return self.RATE_LIMITED_POLL elif now < self.created + self.FAST_POLL_GRACE_PERIOD: return self.FAST_POLL elif not self.last_webmention_sent: return self.SLOW_POLL elif self.last_webmention_sent > now - timedelta(days=7): return self.FAST_POLL elif self.last_webmention_sent > now - timedelta(days=30): return self.FAST_POLL * 10 else: return self.SLOW_POLL def should_refetch(self): """Returns True if we should run OPD refetch on this source now.""" now = util.now_fn() if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER: return True elif not self.last_syndication_url: return False period = (self.FAST_REFETCH if self.last_syndication_url > now - timedelta(days=14) else self.SLOW_REFETCH) return self.last_poll_attempt >= self.last_hfeed_refetch + period @classmethod def bridgy_webmention_endpoint(cls, domain='brid.gy'): """Returns the Bridgy webmention endpoint for this source type.""" return f'https://{domain}/webmention/{cls.SHORT_NAME}' def has_bridgy_webmention_endpoint(self): """Returns True if this source uses Bridgy's webmention endpoint.""" return self.webmention_endpoint in ( self.bridgy_webmention_endpoint(), self.bridgy_webmention_endpoint(domain='www.brid.gy')) def get_author_urls(self): """Determine the author urls for a particular source. In debug mode, replace test domains with localhost. Return: a list of string URLs, possibly empty """ return [util.replace_test_domains_with_localhost(u) for u in self.domain_urls] def search_for_links(self): """Searches for activities with links to any of this source's web sites. https://github.com/snarfed/bridgy/issues/456 https://github.com/snarfed/bridgy/issues/565 Returns: sequence of ActivityStreams activity dicts """ return [] def get_activities_response(self, **kwargs): """Returns recent posts and embedded comments for this source. May be overridden by subclasses. """ kwargs.setdefault('group_id', gr_source.SELF) resp = self.gr_source.get_activities_response(**kwargs) for activity in resp['items']: self._inject_user_urls(activity) return resp def get_activities(self, **kwargs): return self.get_activities_response(**kwargs)['items'] def get_comment(self, comment_id, **kwargs): """Returns a comment from this source. Passes through to granary by default. May be overridden by subclasses. Args: comment_id: string, site-specific comment id kwargs: passed to :meth:`granary.source.Source.get_comment` Returns: dict, decoded ActivityStreams comment object, or None """ comment = self.gr_source.get_comment(comment_id, **kwargs) if comment: self._inject_user_urls(comment) return comment def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs): """Returns an ActivityStreams 'like' activity object. Passes through to granary by default. May be overridden by subclasses. Args: activity_user_id: string id of the user who posted the original activity activity_id: string activity id like_user_id: string id of the user who liked the activity kwargs: passed to granary.Source.get_comment """ return self.gr_source.get_like(activity_user_id, activity_id, like_user_id, **kwargs) def _inject_user_urls(self, activity): """Adds this user's web site URLs to their user mentions (in tags), in place.""" obj = activity.get('object') or activity user_tag_id = self.user_tag_id() for tag in obj.get('tags', []): if tag.get('id') == user_tag_id: tag.setdefault('urls', []).extend([{'value': u} for u in self.domain_urls]) def create_comment(self, post_url, author_name, author_url, content): """Creates a new comment in the source silo. Must be implemented by subclasses. Args: post_url: string author_name: string author_url: string content: string Returns: response dict with at least 'id' field """ raise NotImplementedError() def feed_url(self): """Returns the RSS or Atom (or similar) feed URL for this source. Must be implemented by subclasses. Currently only implemented by :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. Returns: string URL """ raise NotImplementedError() def edit_template_url(self): """Returns the URL for editing this blog's template HTML. Must be implemented by subclasses. Currently only implemented by :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. Returns: string URL """ raise NotImplementedError() @classmethod def button_html(cls, feature, **kwargs): """Returns an HTML string with a login form and button for this site. Mostly just passes through to :meth:`oauth_dropins.handlers.Start.button_html`. Returns: string, HTML """ assert set(feature.split(',')) <= set(cls.FEATURES) form_extra = (kwargs.pop('form_extra', '') + f'<input name="feature" type="hidden" value="{feature}" />') source = kwargs.pop('source', None) if source: form_extra += f'\n<input name="id" type="hidden" value="{source.key_id()}" />' if cls.OAUTH_START: return cls.OAUTH_START.button_html( f'/{cls.SHORT_NAME}/start', form_extra=form_extra, image_prefix='/oauth_dropins_static/', **kwargs) return '' @classmethod @ndb.transactional() def create_new(cls, user_url=None, **kwargs): """Creates and saves a new :class:`Source` and adds a poll task for it. Args: user_url: a string, optional. if provided, supersedes other urls when determining the author_url **kwargs: passed to :meth:`new()` Returns: newly created :class:`Source` """ source = cls.new(**kwargs) if source is None: return None if not source.domain_urls: # defer to the source if it already set this auth_entity = kwargs.get('auth_entity') if auth_entity and hasattr(auth_entity, 'user_json'): source.domain_urls, source.domains = source.urls_and_domains( auth_entity, user_url) logger.debug(f'URLs/domains: {source.domain_urls} {source.domains}') # check if this source already exists existing = source.key.get() if existing: # merge some fields source.features = set(source.features + existing.features) source.populate(**existing.to_dict(include=( 'created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled', 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret', 'webmention_endpoint'))) verb = 'Updated' else: verb = 'Added' author_urls = source.get_author_urls() link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0] if author_urls else 'http://indiewebify.me/#send-webmentions') feature = source.features[0] if source.features else 'listen' blurb = '%s %s. %s' % ( verb, source.label(), 'Try previewing a post from your web site!' if feature == 'publish' else '<a href="%s">Try a webmention!</a>' % link if feature == 'webmention' else "Refresh in a minute to see what we've found!") logger.info(f'{blurb} {source.bridgy_url()}') source.verify() if source.verified(): flash(blurb) source.put() if 'webmention' in source.features: superfeedr.subscribe(source) if 'listen' in source.features and source.AUTO_POLL: util.add_poll_task(source, now=True) util.add_poll_task(source) return source def verified(self): """Returns True if this source is ready to be used, false otherwise. See :meth:`verify()` for details. May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. """ if not self.domains or not self.domain_urls: return False if 'webmention' in self.features and not self.webmention_endpoint: return False if ('listen' in self.features and not (self.webmention_endpoint or self.last_webmention_sent)): return False return True def verify(self, force=False): """Checks that this source is ready to be used. For blog and listen sources, this fetches their front page HTML and discovers their webmention endpoint. For publish sources, this checks that they have a domain. May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. Args: force: if True, fully verifies (e.g. re-fetches the blog's HTML and performs webmention discovery) even we already think this source is verified. """ author_urls = [u for u, d in zip(self.get_author_urls(), self.domains) if not util.in_webmention_blocklist(d)] if ((self.verified() and not force) or self.status == 'disabled' or not self.features or not author_urls): return author_url = author_urls[0] try: got = webmention.discover(author_url, timeout=util.HTTP_TIMEOUT) self.webmention_endpoint = got.endpoint self._fetched_html = got.response.text except BaseException as e: logger.info('Error discovering webmention endpoint', exc_info=e) self.webmention_endpoint = None self.put() def urls_and_domains(self, auth_entity, user_url, actor=None, resolve_source_domain=True): """Returns this user's valid (not webmention-blocklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing actor: dict, optional AS actor for the user. If provided, overrides auth_entity resolve_source_domain: boolean, whether to follow redirects on URLs on this source's domain Returns: ([string url, ...], [string domain, ...]) """ if not actor: actor = self.gr_source.user_to_actor(json_loads(auth_entity.user_json)) logger.debug(f'Extracting URLs and domains from actor: {json_dumps(actor, indent=2)}') candidates = util.trim_nulls(util.uniquify( [user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logger.info(f'Too many profile links! Only resolving the first {MAX_AUTHOR_URLS}: {candidates}') urls = [] for i, url in enumerate(candidates): on_source_domain = util.domain_from_link(url) == self.gr_source.DOMAIN resolve = ((resolve_source_domain or not on_source_domain) and i < MAX_AUTHOR_URLS) resolved = self.resolve_profile_url(url, resolve=resolve) if resolved: urls.append(resolved) final_urls = [] domains = [] for url in util.dedupe_urls(urls): # normalizes domains to lower case # skip links on this source's domain itself. only currently needed for # Mastodon; the other silo domains are in the webmention blocklist. domain = util.domain_from_link(url) if domain != self.gr_source.DOMAIN: final_urls.append(url) domains.append(domain) return final_urls, domains @staticmethod def resolve_profile_url(url, resolve=True): """Resolves a profile URL to be added to a source. Args: url: string resolve: boolean, whether to make HTTP requests to follow redirects, etc. Returns: string, resolved URL, or None """ final, _, ok = util.get_webmention_target(url, resolve=resolve) if not ok: return None final = final.lower() if util.schemeless(final).startswith(util.schemeless(url.lower())): # redirected to a deeper path. use the original higher level URL. #652 final = url # If final has a path segment check if root has a matching rel=me. match = re.match(r'^(https?://[^/]+)/.+', final) if match and resolve: root = match.group(1) try: mf2 = util.fetch_mf2(root) me_urls = mf2['rels'].get('me', []) if final in me_urls: final = root except requests.RequestException: logger.warning(f"Couldn't fetch {root}, preserving path in {final}", exc_info=True) return final def canonicalize_url(self, url, activity=None, **kwargs): """Canonicalizes a post or object URL. Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`. """ return self.URL_CANONICALIZER(url, **kwargs) if self.URL_CANONICALIZER else url def infer_profile_url(self, url): """Given an arbitrary URL representing a person, try to find their profile URL for *this* service. Queries Bridgy's registered accounts for users with a particular domain in their silo profile. Args: url: string, a person's URL Return: a string URL for their profile on this service (or None) """ domain = util.domain_from_link(url) if domain == self.gr_source.DOMAIN: return url user = self.__class__.query(self.__class__.domains == domain).get() if user: return self.gr_source.user_url(user.key_id()) def preprocess_for_publish(self, obj): """Preprocess an object before trying to publish it. By default this tries to massage person tags so that the tag's "url" points to the person's profile on this service (as opposed to a person's homepage). The object is modified in place. Args: obj: ActivityStreams activity or object dict """ for tag in obj.get('tags', []): if tag.get('objectType') == 'person': silo_url = None for url in microformats2.object_urls(tag): silo_url = url and self.infer_profile_url(url) if silo_url: break if silo_url: tag['url'] = silo_url # recurse on contained object(s) for obj in util.get_list(obj, 'object'): self.preprocess_for_publish(obj) def on_new_syndicated_post(self, syndpost): """Called when a new :class:`SyndicatedPost` is stored for this source. Args: syndpost: :class:`SyndicatedPost` """ pass def is_private(self): """Returns True if this source is private aka protected. ...ie their posts are not public. """ return False def is_activity_public(self, activity): """Returns True if the given activity is public, False otherwise. Just wraps :meth:`granary.source.Source.is_public`. Subclasses may override. """ return gr_source.Source.is_public(activity) def is_beta_user(self): """Returns True if this is a "beta" user opted into new features. Beta users come from beta_users.txt. """ return self.bridgy_path() in util.BETA_USER_PATHS def load_blocklist(self): """Fetches this user's blocklist, if supported, and stores it in the entity.""" if not self.HAS_BLOCKS: return try: ids = self.gr_source.get_blocklist_ids() except gr_source.RateLimited as e: ids = e.partial or [] self.blocked_ids = ids[:BLOCKLIST_MAX_IDS] self.put() def is_blocked(self, obj): """Returns True if an object's author is being blocked. ...ie they're in this user's block list. Note that this method is tested in test_twitter.py, not test_models.py, for historical reasons. """ if not self.blocked_ids: return False for o in [obj] + util.get_list(obj, 'object'): for field in 'author', 'actor': if o.get(field, {}).get('numeric_id') in self.blocked_ids: return True
class Twitter(models.Source): """A Twitter account. The key name is the username. """ GR_CLASS = gr_twitter.Twitter SHORT_NAME = 'twitter' TYPE_LABELS = { 'post': 'tweet', 'comment': '@-reply', 'repost': 'retweet', 'like': 'favorite', } URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, approve=r'https://twitter\.com/[^/?]+/status/[^/?]+', reject=r'https://twitter\.com/.+\?protected_redirect=true', headers=util.USER_AGENT_HEADER) # Twitter's rate limiting window is currently 15m. A normal poll with nothing # new hits /statuses/user_timeline and /search/tweets once each. Both # allow 180 calls per window before they're rate limited. # https://dev.twitter.com/docs/rate-limiting/1.1/limits @staticmethod def new(handler, auth_entity=None, **kwargs): """Creates and returns a Twitter entity. Args: handler: the current RequestHandler auth_entity: oauth-dropins.twitter.TwitterAuth kwargs: property values """ user = json.loads(auth_entity.user_json) gr_source = gr_twitter.Twitter(*auth_entity.access_token()) actor = gr_source.user_to_actor(user) return Twitter(id=user['screen_name'], auth_entity=auth_entity.key, url=actor.get('url'), name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), **kwargs) def silo_url(self): """Returns the Twitter account URL, e.g. https://twitter.com/foo.""" return self.gr_source.user_url(self.key.id()) def label_name(self): """Returns the username.""" return self.key.id() def search_for_links(self): """Searches for activities with links to any of this source's web sites. Twitter search supports OR: https://dev.twitter.com/rest/public/search ...but it only returns complete(ish) results if we strip scheme from URLs, ie search for example.com instead of http://example.com/, and that also returns false positivies, so we check that the returned tweets actually have matching links. https://github.com/snarfed/bridgy/issues/565 Returns: sequence of ActivityStreams activity dicts """ urls = set( util.fragmentless(url) for url in self.domain_urls if not util.in_webmention_blacklist(util.domain_from_link(url))) if not urls: return [] query = ' OR '.join('"%s"' % util.schemeless(url, slashes=False) for url in urls) candidates = self.get_activities(search_query=query, group_id=gr_source.SEARCH, etag=self.last_activities_etag, fetch_replies=False, fetch_likes=False, fetch_shares=False, count=50) # filter out retweets and search false positives that don't actually link to us results = [] for candidate in candidates: if candidate.get('verb') == 'share': continue obj = candidate['object'] tags = obj.get('tags', []) atts = obj.get('attachments', []) for url in urls: if (url in obj.get('content', '') or any( t.get('url', '').startswith(url) for t in tags + atts)): id = candidate['id'] results.append(candidate) break return results def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs): """Returns an ActivityStreams 'like' activity object for a favorite. We get Twitter favorites by scraping HTML, and we only get the first page, which only has 25. So, use a Response in the datastore first, if we have one, and only re-scrape HTML as a fallback. Args: activity_user_id: string id of the user who posted the original activity activity_id: string activity id like_user_id: string id of the user who liked the activity kwargs: passed to granary.Source.get_comment """ id = self.gr_source.tag_uri('%s_favorited_by_%s' % (activity_id, like_user_id)) resp = models.Response.get_by_id(id) if resp: return json.loads(resp.response_json) else: return super(Twitter, self).get_like(activity_user_id, activity_id, like_user_id, **kwargs) def is_private(self): """Returns True if this Twitter account is protected. https://dev.twitter.com/rest/reference/get/users/show#highlighter_25173 https://support.twitter.com/articles/14016 https://support.twitter.com/articles/20169886 """ return json.loads(self.auth_entity.get().user_json).get('protected') def canonicalize_url(self, url, activity=None, **kwargs): """Normalize /statuses/ to /status/. https://github.com/snarfed/bridgy/issues/618 """ url = url.replace('/statuses/', '/status/') return super(Twitter, self).canonicalize_url(url, **kwargs)
class GitHub(Source): """A GitHub user. WARNING: technically we should override URL_CANONICALIZER here and pass it fragment=True, since comment permalinks have meaningful fragments, eg #issuecomment=123. Right now, when we see a comment syndication URL, we strip its fragment and store just the issue URL as the synd URL, which is obviously wrong. ...HOWEVER, that has the nice side effect of enabling backfeed to comments as well as issues, since we think comment OPs are the issue itself. This is obviously not ideal. The fix is to extend original_post_discovery.discover() to allow silo-specific synd URL comparisons, so that a comment on an issue can match along with the issue itself. I'm lazy, though, so I'm leaving this as is for now. The key name is the GitHub username. """ GR_CLASS = gr_github.GitHub OAUTH_START_HANDLER = oauth_github.StartHandler SHORT_NAME = 'github' TYPE_LABELS = { 'post': 'issue', 'like': 'star', } BACKFEED_REQUIRES_SYNDICATION_LINK = True DISABLE_HTTP_CODES = Source.DISABLE_HTTP_CODES + ('403', ) CAN_PUBLISH = True # WARNING: see docstring URL_CANONICALIZER = util.UrlCanonicalizer(domain=GR_CLASS.DOMAIN, headers=util.REQUEST_HEADERS) @staticmethod def new(handler, auth_entity=None, **kwargs): """Creates and returns a :class:`GitHub` for the logged in user. Args: handler: the current :class:`webapp2.RequestHandler` auth_entity: :class:`oauth_dropins.github.GitHubAuth` kwargs: property values """ user = json_loads(auth_entity.user_json) gr_source = gr_github.GitHub(access_token=auth_entity.access_token()) actor = gr_source.user_to_actor(user) return GitHub(id=auth_entity.key.id(), auth_entity=auth_entity.key, name=actor.get('displayName'), picture=actor.get('image', {}).get('url'), url=actor.get('url'), **kwargs) def silo_url(self): """Returns the GitHub account URL, e.g. https://github.com/foo.""" return self.gr_source.user_url(self.key.id()) def label_name(self): """Returns the username.""" return self.key.id() def get_activities_response(self, *args, **kwargs): """Drop kwargs that granary doesn't currently support for github.""" kwargs.update({ 'fetch_shares': None, 'fetch_mentions': None, }) return self.gr_source.get_activities_response(*args, **kwargs)
class Facebook(browser.BrowserSource): """A Facebook account. The key name is the Facebook global user id. """ GR_CLASS = gr_facebook.Facebook SHORT_NAME = 'facebook' OAUTH_START_HANDLER = oauth_facebook.StartHandler URL_CANONICALIZER = util.UrlCanonicalizer( domain=GR_CLASS.DOMAIN, subdomain='www', query=True, approve=r'https://www\.facebook\.com/[^/?]+/posts/[^/?]+$', headers=util.REQUEST_HEADERS) # no reject regexp; non-private FB post URLs just 404 # blank granary Facebook object, shared across all instances gr_source = gr_facebook.Facebook() # unique name used in FB URLs, e.g. facebook.com/[username] username = ndb.StringProperty() @classmethod def new(cls, handler, auth_entity=None, actor=None, **kwargs): """Creates and returns an entity based on an AS1 actor.""" src = super().new(handler, auth_entity=None, actor=actor, **kwargs) src.username = actor.get('username') return src @classmethod def key_id_from_actor(cls, actor): """Returns the actor's numeric_id field to use as this entity's key id. numeric_id is the Facebook global user id. """ return actor['numeric_id'] @classmethod def lookup(cls, id): """Returns the entity with the given id or username.""" return ndb.Key(cls, id).get() or cls.query(cls.username == id).get() def silo_url(self): """Returns the Facebook profile URL, e.g. https://facebook.com/foo. Facebook profile URLS with app-scoped user ids (eg www.facebook.com/ID) no longer work as of April 2018, so if that's all we have, return None instead. https://developers.facebook.com/blog/post/2018/04/19/facebook-login-changes-address-abuse/ """ if self.username: return self.gr_source.user_url(self.username) user_id = self.key.id() if util.is_int(id) and int(id) < MIN_APP_SCOPED_ID: return self.gr_source.user_url(user_id) @classmethod def button_html(cls, feature, **kwargs): return super(cls, cls).button_html(feature, form_method='get', **kwargs) return oauth_instagram.StartHandler.button_html( '/about#browser-extension', form_method='get', image_prefix='/oauth_dropins/static/') def canonicalize_url(self, url, **kwargs): """Facebook-specific standardization of syndicated urls. Canonical form is https://www.facebook.com/USERID/posts/POSTID Args: url: a string, the url of the syndicated content kwargs: unused Return: a string, the canonical form of the syndication url """ if util.domain_from_link(url) != self.gr_source.DOMAIN: return None def post_url(id): return 'https://www.facebook.com/%s/posts/%s' % (self.key.id(), id) parsed = urllib.parse.urlparse(url) params = urllib.parse.parse_qs(parsed.query) path = parsed.path.strip('/').split('/') url_id = self.gr_source.post_id(url) ids = params.get('story_fbid') or params.get('fbid') post_id = ids[0] if ids else url_id if post_id: url = post_url(post_id) url = url.replace('facebook.com/%s/' % self.username, 'facebook.com/%s/' % self.key.id()) return super(Facebook, self).canonicalize_url(url)