class Plan(ndb.Model): name = ndb.StringProperty() data = ndb.TextProperty()
class SomeKind(ndb.Model): foo = ndb.StringProperty()
class Dog(ndb.Model): name = ndb.StringProperty()
class AModel(ndb.Model): s_foo = ndb.StringProperty()
class CModel(ndb.Model): s_foobar = ndb.StringProperty() key_b = ndb.KeyProperty(kind="BModel", indexed=True) key_a = ndb.ComputedProperty( # Issue here lambda self: self.key_b.get().key_a if self.key_b else None, )
class Cat(Feline): three = ndb.StringProperty()
class SubKind(Base): foo = ndb.StringProperty()
class Bug(ndb.Model): """Bug entity.""" OSV_ID_PREFIX = 'OSV-' # Very large fake version to use when there is no fix available. _NOT_FIXED_SEMVER = '999999.999999.999999' # Display ID as used by the source database. The full qualified database that # OSV tracks this as may be different. db_id = ndb.StringProperty() # Other IDs this bug is known as. aliases = ndb.StringProperty(repeated=True) # Related IDs. related = ndb.StringProperty(repeated=True) # Status of the bug. status = ndb.IntegerProperty() # Timestamp when Bug was allocated. timestamp = ndb.DateTimeProperty() # When the entry was last edited. last_modified = ndb.DateTimeProperty() # When the entry was withdrawn. withdrawn = ndb.DateTimeProperty() # The source identifier. # For OSS-Fuzz, this oss-fuzz:<ClusterFuzz testcase ID>. # For others this is <source>:<path/to/source>. source_id = ndb.StringProperty() # The main fixed commit (from bisection). fixed = ndb.StringProperty(default='') # The main regressing commit (from bisection). regressed = ndb.StringProperty(default='') # All affected ranges. affected_ranges = ndb.StructuredProperty(AffectedRange, repeated=True) # List of affected versions. affected = ndb.TextProperty(repeated=True) # List of normalized versions indexed for fuzzy matching. affected_fuzzy = ndb.StringProperty(repeated=True) # OSS-Fuzz issue ID. issue_id = ndb.StringProperty() # Package URL for this package. purl = ndb.StringProperty() # Project/package name for the bug. project = ndb.StringProperty() # Package ecosystem for the project. ecosystem = ndb.StringProperty() # Summary for the bug. summary = ndb.TextProperty() # Vulnerability details. details = ndb.TextProperty() # Severity of the bug. severity = ndb.StringProperty(validator=_check_valid_severity) # Whether or not the bug is public (OSS-Fuzz only). public = ndb.BooleanProperty() # Reference URL types (dict of url -> type). reference_url_types = ndb.JsonProperty() # Search indices (auto-populated) search_indices = ndb.StringProperty(repeated=True) # Whether or not the bug has any affected versions (auto-populated). has_affected = ndb.BooleanProperty() # Source of truth for this Bug. source_of_truth = ndb.IntegerProperty(default=SourceOfTruth.INTERNAL) # Whether the bug is fixed (indexed for querying). is_fixed = ndb.BooleanProperty() # Database specific. database_specific = ndb.JsonProperty() # Ecosystem specific. ecosystem_specific = ndb.JsonProperty() # Normalized SEMVER fixed indexes for querying. semver_fixed_indexes = ndb.StringProperty(repeated=True) # The source of this Bug. source = ndb.StringProperty() def id(self): """Get the bug ID.""" if self.db_id: return self.db_id # TODO(ochang): Remove once all existing bugs have IDs migrated. if re.match(r'^\d+', self.key.id()): return self.OSV_ID_PREFIX + self.key.id() return self.key.id() @property def repo_url(self): """Repo URL.""" for affected_range in self.affected_ranges: if affected_range.repo_url: return affected_range.repo_url return None @classmethod def get_by_id(cls, vuln_id, *args, **kwargs): """Overridden get_by_id to handle OSV allocated IDs.""" result = cls.query(cls.db_id == vuln_id).get() if result: return result # TODO(ochang): Remove once all exsting bugs have IDs migrated. if vuln_id.startswith(cls.OSV_ID_PREFIX): vuln_id = vuln_id[len(cls.OSV_ID_PREFIX):] return super().get_by_id(vuln_id, *args, **kwargs) def _tokenize(self, value): """Tokenize value for indexing.""" if not value: return [] value_lower = value.lower() return re.split(r'\W+', value_lower) + [value_lower] def _pre_put_hook(self): """Pre-put hook for populating search indices.""" search_indices = set() search_indices.update(self._tokenize(self.id())) if self.project: search_indices.update(self._tokenize(self.project)) if self.ecosystem: search_indices.update(self._tokenize(self.ecosystem)) self.search_indices = sorted(list(search_indices)) self.has_affected = bool(self.affected) or any( r.type in ('SEMVER', 'ECOSYSTEM') for r in self.affected_ranges) self.affected_fuzzy = bug.normalize_tags(self.affected) if not self.last_modified: self.last_modified = utcnow() self.is_fixed = any(affected_range.fixed for affected_range in self.affected_ranges) self.semver_fixed_indexes = [] for affected_range in self.affected_ranges: if affected_range.type == 'SEMVER': fixed = affected_range.fixed or self._NOT_FIXED_SEMVER self.semver_fixed_indexes.append(semver_index.normalize(fixed)) if self.source_id: self.source, _ = sources.parse_source_id(self.source_id) if not self.source: raise ValueError('Source not specified for Bug.') if not self.db_id: raise ValueError('DB ID not specified for Bug.') if not self.key: source_repo = get_source_repository(self.source) if not source_repo: raise ValueError(f'Invalid source {self.source}') if source_repo.db_prefix and self.db_id.startswith( source_repo.db_prefix): key_id = self.db_id else: key_id = f'{self.source}:{self.db_id}' self.key = ndb.Key(Bug, key_id) def update_from_vulnerability(self, vulnerability): """Set fields from vulnerability. Does not set the ID.""" self.summary = vulnerability.summary self.details = vulnerability.details self.reference_url_types = { ref.url: vulnerability_pb2.Reference.Type.Name(ref.type) for ref in vulnerability.references } if vulnerability.HasField('modified'): self.last_modified = vulnerability.modified.ToDatetime() if vulnerability.HasField('published'): self.timestamp = vulnerability.published.ToDatetime() if vulnerability.HasField('withdrawn'): self.withdrawn = vulnerability.withdrawn.ToDatetime() self.project = vulnerability.package.name self.ecosystem = vulnerability.package.ecosystem if vulnerability.package.purl: self.purl = vulnerability.package.purl self.affected = list(vulnerability.affects.versions) self.aliases = list(vulnerability.aliases) self.related = list(vulnerability.related) vuln_dict = sources.vulnerability_to_dict(vulnerability) if vulnerability.database_specific: self.database_specific = vuln_dict['database_specific'] if vulnerability.ecosystem_specific: self.ecosystem_specific = vuln_dict['ecosystem_specific'] self.affected_ranges = [] for affected_range in vulnerability.affects.ranges: self.affected_ranges.append( AffectedRange(type=vulnerability_pb2.AffectedRange.Type.Name( affected_range.type), repo_url=affected_range.repo, introduced=affected_range.introduced or '', fixed=affected_range.fixed or '')) def to_vulnerability(self, include_source=False): """Convert to Vulnerability proto.""" package = vulnerability_pb2.Package(name=self.project, ecosystem=self.ecosystem, purl=self.purl) affects = vulnerability_pb2.Affects(versions=self.affected) for affected_range in self.affected_ranges: affects.ranges.add(type=vulnerability_pb2.AffectedRange.Type.Value( affected_range.type), repo=affected_range.repo_url, introduced=affected_range.introduced, fixed=affected_range.fixed) details = self.details if self.status == bug.BugStatus.INVALID: affects = None details = 'INVALID' if self.last_modified: modified = timestamp_pb2.Timestamp() modified.FromDatetime(self.last_modified) else: modified = None if self.withdrawn: withdrawn = timestamp_pb2.Timestamp() withdrawn.FromDatetime(self.withdrawn) else: withdrawn = None published = timestamp_pb2.Timestamp() published.FromDatetime(self.timestamp) references = [] if self.reference_url_types: for url, url_type in self.reference_url_types.items(): references.append( vulnerability_pb2.Reference( url=url, type=vulnerability_pb2.Reference.Type.Value(url_type))) result = vulnerability_pb2.Vulnerability(id=self.id(), published=published, modified=modified, aliases=self.aliases, related=self.related, withdrawn=withdrawn, summary=self.summary, details=details, package=package, affects=affects, references=references) if self.ecosystem_specific: result.ecosystem_specific.update(self.ecosystem_specific) if self.database_specific: result.database_specific.update(self.database_specific) if self.source and include_source: source_repo = get_source_repository(self.source) if not source_repo or not source_repo.link: return result result.database_specific.update({ 'source': source_repo.link + sources.source_path(source_repo, self), }) return result
class SourceRepository(ndb.Model): """Source repository.""" # The type of the repository. type = ndb.IntegerProperty() # The name of the source. name = ndb.StringProperty() # The repo URL for the source. repo_url = ndb.StringProperty() # The username to use for SSH auth. repo_username = ndb.StringProperty() # Optional branch for repo. repo_branch = ndb.StringProperty() # Bucket name. bucket = ndb.StringProperty() # The directory in the repo where Vulnerability data is stored. directory_path = ndb.StringProperty() # Last synced hash. last_synced_hash = ndb.StringProperty() # Last date recurring updates were requested. last_update_date = ndb.DateProperty() # Patterns of files to exclude (regex). ignore_patterns = ndb.StringProperty(repeated=True) # Whether this repository is editable. editable = ndb.BooleanProperty(default=False) # Default extension. extension = ndb.StringProperty(default='.yaml') # Key path within each file to store the vulnerability. key_path = ndb.StringProperty() # It true, don't analyze any git ranges. ignore_git = ndb.BooleanProperty(default=False) # Whether to detect cherypicks or not (slow for large repos). detect_cherrypicks = ndb.BooleanProperty(default=True) # Whether to populate "versions" from git ranges. versions_from_repo = ndb.BooleanProperty(default=True) # HTTP link prefix. link = ndb.StringProperty() # DB prefix, if the database allocates its own. db_prefix = ndb.StringProperty() def ignore_file(self, file_path): """Return whether or not we should be ignoring a file.""" if not self.ignore_patterns: return False file_name = os.path.basename(file_path) for pattern in self.ignore_patterns: if re.match(pattern, file_name): return True return False def _pre_put_hook(self): """Pre-put hook for validation.""" if self.type == SourceRepositoryType.BUCKET and self.editable: raise ValueError('BUCKET SourceRepository cannot be editable.')
class Webmentions(StringIdModel): """A bundle of links to send webmentions for. Use the :class:`Response` and :class:`BlogPost` concrete subclasses below. """ STATUSES = ('new', 'processing', 'complete', 'error') # Turn off instance and memcache caching. See Source for details. _use_cache = False _use_memcache = False source = ndb.KeyProperty() status = ndb.StringProperty(choices=STATUSES, default='new') leased_until = ndb.DateTimeProperty() created = ndb.DateTimeProperty(auto_now_add=True) updated = ndb.DateTimeProperty(auto_now=True) # Original post links, ie webmention targets sent = ndb.StringProperty(repeated=True) unsent = ndb.StringProperty(repeated=True) error = ndb.StringProperty(repeated=True) failed = ndb.StringProperty(repeated=True) skipped = ndb.StringProperty(repeated=True) def label(self): """Returns a human-readable string description for use in log messages. To be implemented by subclasses. """ raise NotImplementedError() def add_task(self): """Adds a propagate task for this entity. To be implemented by subclasses. """ raise NotImplementedError() @ndb.transactional() def get_or_save(self): existing = self.key.get() if existing: return existing if self.unsent or self.error: logging.debug('New webmentions to propagate! %s', self.label()) self.add_task() else: self.status = 'complete' self.put() return self def restart(self): """Moves status and targets to 'new' and adds a propagate task.""" self.status = 'new' self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error + self.failed + self.skipped) self.sent = self.error = self.failed = self.skipped = [] # clear any cached webmention endpoints with util.webmention_endpoint_cache_lock: for url in self.unsent: util.webmention_endpoint_cache.pop(util.webmention_endpoint_cache_key(url), None) # this datastore put and task add should be transactional, but Cloud Tasks # doesn't support that :( # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available self.put() self.add_task()
class Response(Webmentions): """A comment, like, or repost to be propagated. The key name is the comment object id as a tag URI. """ # ActivityStreams JSON activity and comment, like, or repost type = ndb.StringProperty(choices=VERB_TYPES, default='comment') # These are TextProperty, and not JsonProperty, so that their plain text is # visible in the App Engine admin console. (JsonProperty uses a blob. :/) activities_json = ndb.TextProperty(repeated=True) response_json = ndb.TextProperty() # Old values for response_json. Populated when the silo reports that the # response has changed, e.g. the user edited a comment or changed their RSVP # to an event. old_response_jsons = ndb.TextProperty(repeated=True) # JSON dict mapping original post url to activity index in activities_json. # only set when there's more than one activity. urls_to_activity = ndb.TextProperty() # Original post links found by original post discovery original_posts = ndb.StringProperty(repeated=True) def label(self): return ' '.join((self.key.kind(), self.type, self.key.id(), json_loads(self.response_json).get('url', '[no url]'))) def add_task(self): util.add_propagate_task(self) @staticmethod def get_type(obj): type = get_type(obj) return type if type in VERB_TYPES else 'comment' def get_or_save(self, source, restart=False): resp = super(Response, self).get_or_save() if (self.type != resp.type or source.gr_source.activity_changed(json_loads(resp.response_json), json_loads(self.response_json), log=True)): logging.info('Response changed! Re-propagating. Original: %s' % resp) resp.old_response_jsons = resp.old_response_jsons[:10] + [resp.response_json] resp.response_json = self.response_json resp.restart(source) elif restart and resp is not self: # ie it already existed resp.restart(source) return resp def restart(self, source=None): """Moves status and targets to 'new' and adds a propagate task.""" # add original posts with syndication URLs # TODO: unify with Poll.repropagate_old_responses() if not source: source = self.source.get() synd_urls = set() for activity_json in self.activities_json: activity = json_loads(activity_json) url = activity.get('url') or activity.get('object', {}).get('url') if url: url = source.canonicalize_url(url, activity=activity) if url: synd_urls.add(url) if synd_urls: self.unsent += [synd.original for synd in SyndicatedPost.query(SyndicatedPost.syndication.IN(synd_urls)) if synd.original] return super(Response, self).restart()
class Source(StringIdModel, metaclass=SourceMeta): """A silo account, e.g. a Facebook or Google+ account. Each concrete silo class should subclass this class. """ # Turn off NDB instance and memcache caching. # https://developers.google.com/appengine/docs/python/ndb/cache # https://github.com/snarfed/bridgy/issues/558 # https://github.com/snarfed/bridgy/issues/68 _use_cache = False STATUSES = ('enabled', 'disabled', 'error') # 'error' is deprecated POLL_STATUSES = ('ok', 'error', 'polling') FEATURES = ('listen', 'publish', 'webmention', 'email') # short name for this site type. used in URLs, etc. SHORT_NAME = None # the corresponding granary class GR_CLASS = None # oauth-dropins StartHandler class OAUTH_START_HANDLER = None # whether Bridgy supports listen for this silo - this is unlikely, so we default to True CAN_LISTEN = True # whether Bridgy supports publish for this silo CAN_PUBLISH = None # how often to poll for responses FAST_POLL = datetime.timedelta(minutes=30) # how often to poll sources that have never sent a webmention SLOW_POLL = datetime.timedelta(days=1) # how often to poll sources that are currently rate limited by their silo RATE_LIMITED_POLL = SLOW_POLL # how long to wait after signup for a successful webmention before dropping to # the lower frequency poll FAST_POLL_GRACE_PERIOD = datetime.timedelta(days=7) # how often refetch author url to look for updated syndication links FAST_REFETCH = datetime.timedelta(hours=6) # refetch less often (this often) if it's been >2w since the last synd link SLOW_REFETCH = datetime.timedelta(days=2) # rate limiting HTTP status codes returned by this silo. e.g. twitter returns # 429, instagram 503, google+ 403. RATE_LIMIT_HTTP_CODES = ('429',) DISABLE_HTTP_CODES = ('401',) TRANSIENT_ERROR_HTTP_CODES = () # whether granary supports fetching block lists HAS_BLOCKS = False # whether to require a u-syndication link for backfeed BACKFEED_REQUIRES_SYNDICATION_LINK = False # Maps Publish.type (e.g. 'like') to source-specific human readable type label # (e.g. 'favorite'). Subclasses should override this. TYPE_LABELS = {} # subclasses should override this URL_CANONICALIZER = util.UrlCanonicalizer(headers=util.REQUEST_HEADERS) # Regexps for URL paths that don't accept incoming webmentions. Currently used # by Blogger. PATH_BLACKLIST = () created = ndb.DateTimeProperty(auto_now_add=True, required=True) url = ndb.StringProperty() status = ndb.StringProperty(choices=STATUSES, default='enabled') poll_status = ndb.StringProperty(choices=POLL_STATUSES, default='ok') rate_limited = ndb.BooleanProperty(default=False) name = ndb.StringProperty() # full human-readable name picture = ndb.StringProperty() domains = ndb.StringProperty(repeated=True) domain_urls = ndb.StringProperty(repeated=True) features = ndb.StringProperty(repeated=True, choices=FEATURES) superfeedr_secret = ndb.StringProperty() webmention_endpoint = ndb.StringProperty() # points to an oauth-dropins auth entity. The model class should be a subclass # of oauth_dropins.BaseAuth. the token should be generated with the # offline_access scope so that it doesn't expire. auth_entity = ndb.KeyProperty() # # listen-only properties # last_polled = ndb.DateTimeProperty(default=util.EPOCH) last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH) last_webmention_sent = ndb.DateTimeProperty() last_public_post = ndb.DateTimeProperty() recent_private_posts = ndb.IntegerProperty(default=0) # the last time we re-fetched the author's url looking for updated # syndication links last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH) # the last time we've seen a rel=syndication link for this Source. # we won't spend the time to re-fetch and look for updates if there's # never been one last_syndication_url = ndb.DateTimeProperty() # the last time we saw a syndication link in an h-feed, as opposed to just on # permalinks. background: https://github.com/snarfed/bridgy/issues/624 last_feed_syndication_url = ndb.DateTimeProperty() last_activity_id = ndb.StringProperty() last_activities_etag = ndb.StringProperty() last_activities_cache_json = ndb.TextProperty() seen_responses_cache_json = ndb.TextProperty(compressed=True) # populated in Poll.poll(), used by handlers blocked_ids = ndb.JsonProperty(compressed=True) # maps updated property names to values that put_updates() writes back to the # datastore transactionally. set this to {} before beginning. updates = None # gr_source is *not* set to None by default here, since it needs to be unset # for __getattr__ to run when it's accessed. def __init__(self, *args, id=None, **kwargs): """Constructor. Escapes the key string id if it starts with `__`.""" if id and id.startswith('__'): id = '\\' + id super().__init__(*args, id=id, **kwargs) def key_id(self): """Returns the key's unescaped string id.""" id = self.key.id() return id[1:] if id[0] == '\\' else id @classmethod def new(cls, handler, **kwargs): """Factory method. Creates and returns a new instance for the current user. To be implemented by subclasses. """ raise NotImplementedError() def __getattr__(self, name): """Lazily load the auth entity and instantiate :attr:`self.gr_source`. Once :attr:`self.gr_source` is set, this method will *not* be called; :attr:`gr_source` will be returned normally. """ if name == 'gr_source' and self.auth_entity: auth_entity = self.auth_entity.get() args = auth_entity.access_token() if not isinstance(args, tuple): args = (args,) kwargs = {} if self.key.kind() == 'FacebookPage' and auth_entity.type == 'user': kwargs = {'user_id': self.key_id()} elif self.key.kind() == 'Instagram': kwargs = {'scrape': True, 'cookie': INSTAGRAM_SESSIONID_COOKIE} elif self.key.kind() == 'Mastodon': args = (auth_entity.instance(),) + args kwargs = {'user_id': json_loads(auth_entity.user_json).get('id')} elif self.key.kind() == 'Twitter': kwargs = {'username': self.key_id()} self.gr_source = self.GR_CLASS(*args, **kwargs) return self.gr_source return getattr(super(Source, self), name) @classmethod def lookup(cls, id): """Returns the entity with the given id. By default, interprets id as just the key id. Subclasses may extend this to support usernames, etc. """ if id and id.startswith('__'): id = '\\' + id return ndb.Key(cls, id).get() def user_tag_id(self): """Returns the tag URI for this source, e.g. 'tag:plus.google.com:123456'.""" return self.gr_source.tag_uri(self.key_id()) def bridgy_path(self): """Returns the Bridgy page URL path for this source.""" return '/%s/%s' % (self.SHORT_NAME, self.key_id()) def bridgy_url(self, handler): """Returns the Bridgy page URL for this source.""" return util.host_url(handler) + self.bridgy_path() def silo_url(self, handler): """Returns the silo account URL, e.g. https://twitter.com/foo.""" raise NotImplementedError() def label(self): """Human-readable label for this source.""" return '%s (%s)' % (self.label_name(), self.GR_CLASS.NAME) def label_name(self): """Human-readable name or username for this source, whichever is preferred.""" return self.name or self.key_id() @classmethod @ndb.transactional() def put_updates(cls, source): """Writes source.updates to the datastore transactionally. Returns: source: :class:`Source` Returns: the updated :class:`Source` """ if not source.updates: return source logging.info('Updating %s %s : %r', source.label(), source.bridgy_path(), {k: v for k, v in source.updates.items() if not k.endswith('_json')}) updates = source.updates source = source.key.get() source.updates = updates for name, val in updates.items(): setattr(source, name, val) if source.status == 'error': # deprecated logging.warning('Resetting status from error to enabled') source.status = 'enabled' source.put() return source def poll_period(self): """Returns the poll frequency for this source, as a :class:`datetime.timedelta`. Defaults to ~15m, depending on silo. If we've never sent a webmention for this source, or the last one we sent was over a month ago, we drop them down to ~1d after a week long grace period. """ now = datetime.datetime.now() if self.rate_limited: return self.RATE_LIMITED_POLL elif now < self.created + self.FAST_POLL_GRACE_PERIOD: return self.FAST_POLL elif not self.last_webmention_sent: return self.SLOW_POLL elif self.last_webmention_sent > now - datetime.timedelta(days=7): return self.FAST_POLL elif self.last_webmention_sent > now - datetime.timedelta(days=30): return self.FAST_POLL * 10 else: return self.SLOW_POLL def should_refetch(self): """Returns True if we should run OPD refetch on this source now.""" now = datetime.datetime.now() if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER: return True elif not self.last_syndication_url: return False period = (self.FAST_REFETCH if self.last_syndication_url > now - datetime.timedelta(days=14) else self.SLOW_REFETCH) return self.last_poll_attempt >= self.last_hfeed_refetch + period @classmethod def bridgy_webmention_endpoint(cls, domain='brid.gy'): """Returns the Bridgy webmention endpoint for this source type.""" return 'https://%s/webmention/%s' % (domain, cls.SHORT_NAME) def has_bridgy_webmention_endpoint(self): """Returns True if this source uses Bridgy's webmention endpoint.""" return self.webmention_endpoint in ( self.bridgy_webmention_endpoint(), self.bridgy_webmention_endpoint(domain='www.brid.gy')) def get_author_urls(self): """Determine the author urls for a particular source. In debug mode, replace test domains with localhost. Return: a list of string URLs, possibly empty """ return [util.replace_test_domains_with_localhost(u) for u in self.domain_urls] def search_for_links(self): """Searches for activities with links to any of this source's web sites. https://github.com/snarfed/bridgy/issues/456 https://github.com/snarfed/bridgy/issues/565 Returns: sequence of ActivityStreams activity dicts """ return [] def get_activities_response(self, **kwargs): """Returns recent posts and embedded comments for this source. May be overridden by subclasses. """ kwargs.setdefault('group_id', gr_source.SELF) resp = self.gr_source.get_activities_response(**kwargs) for activity in resp['items']: self._inject_user_urls(activity) return resp def get_activities(self, **kwargs): return self.get_activities_response(**kwargs)['items'] def get_comment(self, comment_id, **kwargs): """Returns a comment from this source. Passes through to granary by default. May be overridden by subclasses. Args: comment_id: string, site-specific comment id kwargs: passed to :meth:`granary.source.Source.get_comment` Returns: dict, decoded ActivityStreams comment object, or None """ comment = self.gr_source.get_comment(comment_id, **kwargs) if comment: self._inject_user_urls(comment) return comment def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs): """Returns an ActivityStreams 'like' activity object. Passes through to granary by default. May be overridden by subclasses. Args: activity_user_id: string id of the user who posted the original activity activity_id: string activity id like_user_id: string id of the user who liked the activity kwargs: passed to granary.Source.get_comment """ return self.gr_source.get_like(activity_user_id, activity_id, like_user_id, **kwargs) def _inject_user_urls(self, activity): """Adds this user's web site URLs to their user mentions (in tags), in place.""" obj = activity.get('object') or activity user_tag_id = self.user_tag_id() for tag in obj.get('tags', []): if tag.get('id') == user_tag_id: tag.setdefault('urls', []).extend([{'value': u} for u in self.domain_urls]) def create_comment(self, post_url, author_name, author_url, content): """Creates a new comment in the source silo. Must be implemented by subclasses. Args: post_url: string author_name: string author_url: string content: string Returns: response dict with at least 'id' field """ raise NotImplementedError() def feed_url(self): """Returns the RSS or Atom (or similar) feed URL for this source. Must be implemented by subclasses. Currently only implemented by :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. Returns: string URL """ raise NotImplementedError() def edit_template_url(self): """Returns the URL for editing this blog's template HTML. Must be implemented by subclasses. Currently only implemented by :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`. Returns: string URL """ raise NotImplementedError() @classmethod def button_html(cls, feature, **kwargs): """Returns an HTML string with a login form and button for this site. Mostly just passes through to :meth:`oauth_dropins.handlers.StartHandler.button_html`. Returns: string, HTML """ assert feature in cls.FEATURES form_extra = (kwargs.pop('form_extra', '') + '<input name="feature" type="hidden" value="%s" />' % feature) source = kwargs.pop('source', None) if source: form_extra += ('\n<input name="id" type="hidden" value="%s" />' % source.key_id()) return cls.OAUTH_START_HANDLER.button_html( '/%s/start' % cls.SHORT_NAME, form_extra=form_extra, image_prefix='/oauth_dropins/static/', **kwargs) @classmethod def create_new(cls, handler, user_url=None, **kwargs): """Creates and saves a new :class:`Source` and adds a poll task for it. Args: handler: the current :class:`webapp2.RequestHandler` user_url: a string, optional. if provided, supersedes other urls when determining the author_url **kwargs: passed to :meth:`new()` Returns: newly created :class:`Source` """ source = cls.new(handler, **kwargs) if source is None: return None if not source.domain_urls: # defer to the source if it already set this auth_entity = kwargs.get('auth_entity') if auth_entity and hasattr(auth_entity, 'user_json'): source.domain_urls, source.domains = source._urls_and_domains( auth_entity, user_url) logging.debug('URLs/domains: %s %s', source.domain_urls, source.domains) # check if this source already exists existing = source.key.get() if existing: # merge some fields source.features = set(source.features + existing.features) source.populate(**existing.to_dict(include=( 'created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled', 'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret', 'webmention_endpoint'))) verb = 'Updated' else: verb = 'Added' author_urls = source.get_author_urls() link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0] if author_urls else 'http://indiewebify.me/#send-webmentions') feature = source.features[0] if source.features else 'listen' blurb = '%s %s. %s' % ( verb, source.label(), 'Try previewing a post from your web site!' if feature == 'publish' else '<a href="%s">Try a webmention!</a>' % link if feature == 'webmention' else "Refresh in a minute to see what we've found!") logging.info('%s %s', blurb, source.bridgy_url(handler)) # uncomment to send email notification for each new user # if not existing: # util.email_me(subject=blurb, body=source.bridgy_url(handler)) source.verify() if source.verified(): handler.messages = {blurb} # TODO: ugh, *all* of this should be transactional source.put() if 'webmention' in source.features: superfeedr.subscribe(source, handler) if 'listen' in source.features: util.add_poll_task(source, now=True) util.add_poll_task(source) return source def verified(self): """Returns True if this source is ready to be used, false otherwise. See :meth:`verify()` for details. May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. """ if not self.domains or not self.domain_urls: return False if 'webmention' in self.features and not self.webmention_endpoint: return False if ('listen' in self.features and not (self.webmention_endpoint or self.last_webmention_sent)): return False return True def verify(self, force=False): """Checks that this source is ready to be used. For blog and listen sources, this fetches their front page HTML and discovers their webmention endpoint. For publish sources, this checks that they have a domain. May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`. Args: force: if True, fully verifies (e.g. re-fetches the blog's HTML and performs webmention discovery) even we already think this source is verified. """ author_urls = [u for u, d in zip(self.get_author_urls(), self.domains) if not util.in_webmention_blocklist(d)] if ((self.verified() and not force) or self.status == 'disabled' or not self.features or not author_urls): return author_url = author_urls[0] logging.info('Attempting to discover webmention endpoint on %s', author_url) mention = send.WebmentionSend('https://brid.gy/', author_url) mention.requests_kwargs = {'timeout': util.HTTP_TIMEOUT, 'headers': util.REQUEST_HEADERS} try: mention._discoverEndpoint() except BaseException: logging.info('Error discovering webmention endpoint', stack_info=True) mention.error = {'code': 'EXCEPTION'} self._fetched_html = getattr(mention, 'html', None) error = getattr(mention, 'error', None) endpoint = getattr(mention, 'receiver_endpoint', None) if error or not endpoint: logging.info("No webmention endpoint found: %s %r", error, endpoint) self.webmention_endpoint = None else: logging.info("Discovered webmention endpoint %s", endpoint) self.webmention_endpoint = endpoint self.put() def _urls_and_domains(self, auth_entity, user_url): """Returns this user's valid (not webmention-blocklisted) URLs and domains. Converts the auth entity's user_json to an ActivityStreams actor and uses its 'urls' and 'url' fields. May be overridden by subclasses. Args: auth_entity: :class:`oauth_dropins.models.BaseAuth` user_url: string, optional URL passed in when authorizing Returns: ([string url, ...], [string domain, ...]) """ user = json_loads(auth_entity.user_json) actor = (user.get('actor') # for Instagram; its user_json is IndieAuth or self.gr_source.user_to_actor(user)) logging.debug('Extracting URLs and domains from actor: %s', json_dumps(actor, indent=2)) candidates = util.trim_nulls(util.uniquify( [user_url] + microformats2.object_urls(actor))) if len(candidates) > MAX_AUTHOR_URLS: logging.info('Too many profile links! Only resolving the first %s: %s', MAX_AUTHOR_URLS, candidates) urls = [] for i, url in enumerate(candidates): resolved = self.resolve_profile_url(url, resolve=i < MAX_AUTHOR_URLS) if resolved: urls.append(resolved) final_urls = [] domains = [] for url in util.dedupe_urls(urls): # normalizes domains to lower case # skip links on this source's domain itself. only currently needed for # Mastodon; the other silo domains are in the webmention blocklist. domain = util.domain_from_link(url) if domain != self.gr_source.DOMAIN: final_urls.append(url) domains.append(domain) return final_urls, domains @staticmethod def resolve_profile_url(url, resolve=True): """Resolves a profile URL to be added to a source. Args: url: string resolve: boolean, whether to make HTTP requests to follow redirects, etc. Returns: string, resolved URL, or None """ final, _, ok = util.get_webmention_target(url, resolve=resolve) if not ok: return None final = final.lower() if util.schemeless(final).startswith(util.schemeless(url.lower())): # redirected to a deeper path. use the original higher level URL. #652 final = url # If final has a path segment check if root has a matching rel=me. match = re.match(r'^(https?://[^/]+)/.+', final) if match and resolve: root = match.group(1) try: mf2 = util.fetch_mf2(root) me_urls = mf2['rels'].get('me', []) if final in me_urls: final = root except requests.RequestException: logging.warning("Couldn't fetch %s, preserving path in %s", root, final, stack_info=True) return final def canonicalize_url(self, url, activity=None, **kwargs): """Canonicalizes a post or object URL. Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`. """ return self.URL_CANONICALIZER(url, **kwargs) if self.URL_CANONICALIZER else url def infer_profile_url(self, url): """Given an arbitrary URL representing a person, try to find their profile URL for *this* service. Queries Bridgy's registered accounts for users with a particular domain in their silo profile. Args: url: string, a person's URL Return: a string URL for their profile on this service (or None) """ domain = util.domain_from_link(url) if domain == self.gr_source.DOMAIN: return url user = self.__class__.query(self.__class__.domains == domain).get() if user: return self.gr_source.user_url(user.key_id()) def preprocess_for_publish(self, obj): """Preprocess an object before trying to publish it. By default this tries to massage person tags so that the tag's "url" points to the person's profile on this service (as opposed to a person's homepage). The object is modified in place. Args: obj: ActivityStreams activity or object dict """ for tag in obj.get('tags', []): if tag.get('objectType') == 'person': silo_url = None for url in microformats2.object_urls(tag): silo_url = url and self.infer_profile_url(url) if silo_url: break if silo_url: tag['url'] = silo_url # recurse on contained object(s) for obj in util.get_list(obj, 'object'): self.preprocess_for_publish(obj) def on_new_syndicated_post(self, syndpost): """Called when a new :class:`SyndicatedPost` is stored for this source. Args: syndpost: :class:`SyndicatedPost` """ pass def is_private(self): """Returns True if this source is private aka protected. ...ie their posts are not public. """ return False def is_activity_public(self, activity): """Returns True if the given activity is public, False otherwise. Just wraps :meth:`granary.source.Source.is_public`. Subclasses may override. """ return gr_source.Source.is_public(activity) def is_beta_user(self): """Returns True if this is a "beta" user opted into new features. Beta users come from beta_users.txt. """ return self.bridgy_path() in util.BETA_USER_PATHS def load_blocklist(self): """Fetches this user's blocklist, if supported, and stores it in the entity.""" if not self.HAS_BLOCKS: return try: ids = self.gr_source.get_blocklist_ids() except gr_source.RateLimited as e: ids = e.partial or [] self.blocked_ids = ids[:BLOCKLIST_MAX_IDS] self.put() def is_blocked(self, obj): """Returns True if an object's author is being blocked. ...ie they're in this user's block list. Note that this method is tested in test_twitter.py, not test_models.py, for historical reasons. """ if not self.blocked_ids: return False for o in [obj] + util.get_list(obj, 'object'): for field in 'author', 'actor': if o.get(field, {}).get('numeric_id') in self.blocked_ids: return True
class SyndicatedPost(ndb.Model): """Represents a syndicated post and its discovered original (or not if we found no original post). We discover the relationship by following rel=syndication links on the author's h-feed. See :mod:`original_post_discovery`. When a :class:`SyndicatedPost` entity is about to be stored, :meth:`source.Source.on_new_syndicated_post()` is called before it's stored. """ # Turn off instance and memcache caching. See Response for details. _use_cache = False _use_memcache = False syndication = ndb.StringProperty() original = ndb.StringProperty() created = ndb.DateTimeProperty(auto_now_add=True) updated = ndb.DateTimeProperty(auto_now=True) @classmethod @ndb.transactional() def insert_original_blank(cls, source, original): """Insert a new original -> None relationship. Does a check-and-set to make sure no previous relationship exists for this original. If there is, nothing will be added. Args: source: :class:`Source` subclass original: string """ if cls.query(cls.original == original, ancestor=source.key).get(): return cls(parent=source.key, original=original, syndication=None).put() @classmethod @ndb.transactional() def insert_syndication_blank(cls, source, syndication): """Insert a new syndication -> None relationship. Does a check-and-set to make sure no previous relationship exists for this syndication. If there is, nothing will be added. Args: source: :class:`Source` subclass original: string """ if cls.query(cls.syndication == syndication, ancestor=source.key).get(): return cls(parent=source.key, original=None, syndication=syndication).put() @classmethod @ndb.transactional() def insert(cls, source, syndication, original): """Insert a new (non-blank) syndication -> original relationship. This method does a check-and-set within transaction to avoid including duplicate relationships. If blank entries exists for the syndication or original URL (i.e. syndication -> None or original -> None), they will first be removed. If non-blank relationships exist, they will be retained. Args: source: :class:`Source` subclass syndication: string (not None) original: string (not None) Returns: SyndicatedPost: newly created or preexisting entity """ # check for an exact match duplicate = cls.query(cls.syndication == syndication, cls.original == original, ancestor=source.key).get() if duplicate: return duplicate # delete blanks (expect at most 1 of each) for filter in (ndb.AND(cls.syndication == syndication, cls.original == None), ndb.AND(cls.original == original, cls.syndication == None)): for synd in cls.query(filter, ancestor=source.key).fetch(keys_only=True): synd.delete() r = cls(parent=source.key, original=original, syndication=syndication) r.put() return r
class OtherKind(ndb.Model): foo = ndb.StringProperty() bar = ndb.BooleanProperty(default=True)
class Animal(ndb.PolyModel): one = ndb.StringProperty()
class Visit(ndb.Model): 'Visit entity registers visitor IP address & timestamp' visitor = ndb.StringProperty() timestamp = ndb.DateTimeProperty(auto_now_add=True)
class Feline(Animal): two = ndb.StringProperty()
class Query(ndb.Model): text = ndb.StringProperty() date = ndb.DateTimeProperty(auto_now_add=True) user_id = ndb.StringProperty()
class SomeKind(ndb.Model): foo = ndb.StringProperty() created_at = ndb.DateTimeProperty(indexed=True, auto_now_add=True) updated_at = ndb.DateTimeProperty(indexed=True, auto_now=True)
class Topic(ndb.Model): action_text = ndb.StringProperty() @classmethod def query_topic(cls, ancestor_key): return cls.query(ancestor=ancestor_key)
class OtherKind(ndb.Model): one = ndb.StringProperty() two = ndb.StringProperty()
class Synonym(ndb.Model): synonym = ndb.StringProperty() @classmethod def query_synonym(cls, ancestor_key): return cls.query(ancestor=ancestor_key)
class BModel(ndb.Model): s_bar = ndb.StringProperty() key_a = ndb.KeyProperty(kind="AModel", indexed=True)
class AnyKind(ndb.Model): foo = ndb.IntegerProperty() bar = ndb.StringProperty() baz = ndb.IntegerProperty() qux = ndb.StringProperty()
class SomeKind(ndb.Model): foo = ndb.StringProperty(repeated=True)
class MyKind(ndb.Model): bar = ndb.StringProperty()
class SomeKind(ndb.Model): foo = ndb.StringProperty() bar = ndb.StringProperty(name="notbar")
class FacebookAuth(models.BaseAuth): """An authenticated Facebook user or page. Provides methods that return information about this user (or page) and make OAuth-signed requests to Facebook's HTTP-based APIs. Stores OAuth credentials in the datastore. See models.BaseAuth for usage details. Facebook-specific details: implements urlopen() but not api(). The key name is the user's or page's Facebook ID. """ type = ndb.StringProperty(choices=('user', 'page')) auth_code = ndb.StringProperty() access_token_str = ndb.StringProperty(required=True) # https://developers.facebook.com/docs/graph-api/reference/user#fields user_json = ndb.TextProperty(required=True) # https://developers.facebook.com/docs/graph-api/reference/user/accounts#fields pages_json = ndb.TextProperty() def site_name(self): return 'Facebook' def user_display_name(self): """Returns the user's or page's name. """ return json_loads(self.user_json)['name'] def access_token(self): """Returns the OAuth access token string. """ return self.access_token_str def urlopen(self, url, **kwargs): """Wraps urlopen() and adds OAuth credentials to the request. """ return models.BaseAuth.urlopen_access_token(url, self.access_token_str, **kwargs) def for_page(self, page_id): """Returns a new, unsaved FacebookAuth entity for a page in pages_json. The returned entity's properties will be populated with the page's data. access_token will be the page access token, user_json will be the page object, and pages_json will be a single-element list with the page. If page_id is not in pages_json, returns None. Args: page_id: string, Facebook page id """ for page in json_loads(self.pages_json): id = page.get('id') if id == page_id: entity = FacebookAuth(id=id, type='page', pages_json=json_dumps([page]), access_token_str=page.get('access_token')) entity.user_json = entity.urlopen(API_PAGE_URL).read() logging.debug('Page object: %s', entity.user_json) return entity return None def is_authority_for(self, key): """Additionally check if the key represents a Page that this user has authority over. Args: auth_entity_key: ndb.Key Returns: boolean: true if key represents this user or one of the user's pages. """ return super(FacebookAuth, self).is_authority_for(key) or any( key == self.for_page(page.get('id')).key for page in json_loads(self.pages_json))
class SomeKind(ndb.Model): foo = ndb.IntegerProperty() bar = ndb.StringProperty()
class Job(Model): """Definition of a job type used by the bots.""" VALID_NAME_REGEX = NAME_CHECK_REGEX # Job type name. name = ndb.StringProperty() # Job environment string. environment_string = ndb.TextProperty() # The platform that this job can run on. platform = ndb.StringProperty() # Blobstore key of the custom binary for this job. custom_binary_key = ndb.StringProperty() # Filename for the custom binary. custom_binary_filename = ndb.StringProperty() # Revision of the custom binary. custom_binary_revision = ndb.IntegerProperty() # Description of the job. description = ndb.TextProperty() # Template to use, if any. templates = ndb.StringProperty(repeated=True) # Project name. project = ndb.StringProperty() # Keywords is used for searching. keywords = ndb.StringProperty(repeated=True) # If this is set, this Job is for an external reproduction infrastructure. The # value here is the topic used for reproduction requests. external_reproduction_topic = ndb.StringProperty() # If this is set, this Job is for an external reproduction infrastructure. The # value here is the subscription used for receiving reproduction updates. external_updates_subscription = ndb.StringProperty() def is_external(self): """Whether this job is external.""" return (bool(self.external_reproduction_topic) or bool(self.external_updates_subscription)) def get_environment(self): """Get the environment as a dict for this job, including any environment variables in its template.""" if not self.templates: return environment.parse_environment_definition( self.environment_string) job_environment = {} for template_name in self.templates: template = JobTemplate.query( JobTemplate.name == template_name).get() if not template: continue template_environment = environment.parse_environment_definition( template.environment_string) job_environment.update(template_environment) environment_overrides = environment.parse_environment_definition( self.environment_string) job_environment.update(environment_overrides) return job_environment def get_environment_string(self): """Get the environment string for this job, including any environment variables in its template. Avoid using this if possible.""" environment_string = '' job_environment = self.get_environment() for key, value in six.iteritems(job_environment): environment_string += f'{key} = {value}\n' return environment_string def populate_indices(self): """Populate keywords for fast job searching.""" self.keywords = list( search_tokenizer.tokenize(self.name) | search_tokenizer.tokenize(self.project)) def _pre_put_hook(self): """Pre-put hook.""" self.project = self.get_environment().get('PROJECT_NAME', utils.default_project_name()) self.populate_indices()