Example #1
0
class Plan(ndb.Model):
    name = ndb.StringProperty()
    data = ndb.TextProperty()
Example #2
0
 class SomeKind(ndb.Model):
     foo = ndb.StringProperty()
Example #3
0
 class Dog(ndb.Model):
     name = ndb.StringProperty()
Example #4
0
 class AModel(ndb.Model):
     s_foo = ndb.StringProperty()
Example #5
0
 class CModel(ndb.Model):
     s_foobar = ndb.StringProperty()
     key_b = ndb.KeyProperty(kind="BModel", indexed=True)
     key_a = ndb.ComputedProperty(  # Issue here
         lambda self: self.key_b.get().key_a if self.key_b else None, )
Example #6
0
 class Cat(Feline):
     three = ndb.StringProperty()
Example #7
0
 class SubKind(Base):
     foo = ndb.StringProperty()
Example #8
0
File: models.py Project: google/osv
class Bug(ndb.Model):
    """Bug entity."""
    OSV_ID_PREFIX = 'OSV-'
    # Very large fake version to use when there is no fix available.
    _NOT_FIXED_SEMVER = '999999.999999.999999'

    # Display ID as used by the source database. The full qualified database that
    # OSV tracks this as may be different.
    db_id = ndb.StringProperty()
    # Other IDs this bug is known as.
    aliases = ndb.StringProperty(repeated=True)
    # Related IDs.
    related = ndb.StringProperty(repeated=True)
    # Status of the bug.
    status = ndb.IntegerProperty()
    # Timestamp when Bug was allocated.
    timestamp = ndb.DateTimeProperty()
    # When the entry was last edited.
    last_modified = ndb.DateTimeProperty()
    # When the entry was withdrawn.
    withdrawn = ndb.DateTimeProperty()
    # The source identifier.
    # For OSS-Fuzz, this oss-fuzz:<ClusterFuzz testcase ID>.
    # For others this is <source>:<path/to/source>.
    source_id = ndb.StringProperty()
    # The main fixed commit (from bisection).
    fixed = ndb.StringProperty(default='')
    # The main regressing commit (from bisection).
    regressed = ndb.StringProperty(default='')
    # All affected ranges.
    affected_ranges = ndb.StructuredProperty(AffectedRange, repeated=True)
    # List of affected versions.
    affected = ndb.TextProperty(repeated=True)
    # List of normalized versions indexed for fuzzy matching.
    affected_fuzzy = ndb.StringProperty(repeated=True)
    # OSS-Fuzz issue ID.
    issue_id = ndb.StringProperty()
    # Package URL for this package.
    purl = ndb.StringProperty()
    # Project/package name for the bug.
    project = ndb.StringProperty()
    # Package ecosystem for the project.
    ecosystem = ndb.StringProperty()
    # Summary for the bug.
    summary = ndb.TextProperty()
    # Vulnerability details.
    details = ndb.TextProperty()
    # Severity of the bug.
    severity = ndb.StringProperty(validator=_check_valid_severity)
    # Whether or not the bug is public (OSS-Fuzz only).
    public = ndb.BooleanProperty()
    # Reference URL types (dict of url -> type).
    reference_url_types = ndb.JsonProperty()
    # Search indices (auto-populated)
    search_indices = ndb.StringProperty(repeated=True)
    # Whether or not the bug has any affected versions (auto-populated).
    has_affected = ndb.BooleanProperty()
    # Source of truth for this Bug.
    source_of_truth = ndb.IntegerProperty(default=SourceOfTruth.INTERNAL)
    # Whether the bug is fixed (indexed for querying).
    is_fixed = ndb.BooleanProperty()
    # Database specific.
    database_specific = ndb.JsonProperty()
    # Ecosystem specific.
    ecosystem_specific = ndb.JsonProperty()
    # Normalized SEMVER fixed indexes for querying.
    semver_fixed_indexes = ndb.StringProperty(repeated=True)
    # The source of this Bug.
    source = ndb.StringProperty()

    def id(self):
        """Get the bug ID."""
        if self.db_id:
            return self.db_id

        # TODO(ochang): Remove once all existing bugs have IDs migrated.
        if re.match(r'^\d+', self.key.id()):
            return self.OSV_ID_PREFIX + self.key.id()

        return self.key.id()

    @property
    def repo_url(self):
        """Repo URL."""
        for affected_range in self.affected_ranges:
            if affected_range.repo_url:
                return affected_range.repo_url

        return None

    @classmethod
    def get_by_id(cls, vuln_id, *args, **kwargs):
        """Overridden get_by_id to handle OSV allocated IDs."""
        result = cls.query(cls.db_id == vuln_id).get()
        if result:
            return result

        # TODO(ochang): Remove once all exsting bugs have IDs migrated.
        if vuln_id.startswith(cls.OSV_ID_PREFIX):
            vuln_id = vuln_id[len(cls.OSV_ID_PREFIX):]

        return super().get_by_id(vuln_id, *args, **kwargs)

    def _tokenize(self, value):
        """Tokenize value for indexing."""
        if not value:
            return []

        value_lower = value.lower()
        return re.split(r'\W+', value_lower) + [value_lower]

    def _pre_put_hook(self):
        """Pre-put hook for populating search indices."""
        search_indices = set()

        search_indices.update(self._tokenize(self.id()))
        if self.project:
            search_indices.update(self._tokenize(self.project))

        if self.ecosystem:
            search_indices.update(self._tokenize(self.ecosystem))

        self.search_indices = sorted(list(search_indices))
        self.has_affected = bool(self.affected) or any(
            r.type in ('SEMVER', 'ECOSYSTEM') for r in self.affected_ranges)
        self.affected_fuzzy = bug.normalize_tags(self.affected)

        if not self.last_modified:
            self.last_modified = utcnow()

        self.is_fixed = any(affected_range.fixed
                            for affected_range in self.affected_ranges)

        self.semver_fixed_indexes = []
        for affected_range in self.affected_ranges:
            if affected_range.type == 'SEMVER':
                fixed = affected_range.fixed or self._NOT_FIXED_SEMVER
                self.semver_fixed_indexes.append(semver_index.normalize(fixed))

        if self.source_id:
            self.source, _ = sources.parse_source_id(self.source_id)

        if not self.source:
            raise ValueError('Source not specified for Bug.')

        if not self.db_id:
            raise ValueError('DB ID not specified for Bug.')

        if not self.key:
            source_repo = get_source_repository(self.source)
            if not source_repo:
                raise ValueError(f'Invalid source {self.source}')

            if source_repo.db_prefix and self.db_id.startswith(
                    source_repo.db_prefix):
                key_id = self.db_id
            else:
                key_id = f'{self.source}:{self.db_id}'

            self.key = ndb.Key(Bug, key_id)

    def update_from_vulnerability(self, vulnerability):
        """Set fields from vulnerability. Does not set the ID."""
        self.summary = vulnerability.summary
        self.details = vulnerability.details
        self.reference_url_types = {
            ref.url: vulnerability_pb2.Reference.Type.Name(ref.type)
            for ref in vulnerability.references
        }

        if vulnerability.HasField('modified'):
            self.last_modified = vulnerability.modified.ToDatetime()
        if vulnerability.HasField('published'):
            self.timestamp = vulnerability.published.ToDatetime()
        if vulnerability.HasField('withdrawn'):
            self.withdrawn = vulnerability.withdrawn.ToDatetime()

        self.project = vulnerability.package.name
        self.ecosystem = vulnerability.package.ecosystem
        if vulnerability.package.purl:
            self.purl = vulnerability.package.purl

        self.affected = list(vulnerability.affects.versions)
        self.aliases = list(vulnerability.aliases)
        self.related = list(vulnerability.related)

        vuln_dict = sources.vulnerability_to_dict(vulnerability)
        if vulnerability.database_specific:
            self.database_specific = vuln_dict['database_specific']

        if vulnerability.ecosystem_specific:
            self.ecosystem_specific = vuln_dict['ecosystem_specific']

        self.affected_ranges = []
        for affected_range in vulnerability.affects.ranges:
            self.affected_ranges.append(
                AffectedRange(type=vulnerability_pb2.AffectedRange.Type.Name(
                    affected_range.type),
                              repo_url=affected_range.repo,
                              introduced=affected_range.introduced or '',
                              fixed=affected_range.fixed or ''))

    def to_vulnerability(self, include_source=False):
        """Convert to Vulnerability proto."""
        package = vulnerability_pb2.Package(name=self.project,
                                            ecosystem=self.ecosystem,
                                            purl=self.purl)

        affects = vulnerability_pb2.Affects(versions=self.affected)
        for affected_range in self.affected_ranges:
            affects.ranges.add(type=vulnerability_pb2.AffectedRange.Type.Value(
                affected_range.type),
                               repo=affected_range.repo_url,
                               introduced=affected_range.introduced,
                               fixed=affected_range.fixed)

        details = self.details
        if self.status == bug.BugStatus.INVALID:
            affects = None
            details = 'INVALID'

        if self.last_modified:
            modified = timestamp_pb2.Timestamp()
            modified.FromDatetime(self.last_modified)
        else:
            modified = None

        if self.withdrawn:
            withdrawn = timestamp_pb2.Timestamp()
            withdrawn.FromDatetime(self.withdrawn)
        else:
            withdrawn = None

        published = timestamp_pb2.Timestamp()
        published.FromDatetime(self.timestamp)

        references = []
        if self.reference_url_types:
            for url, url_type in self.reference_url_types.items():
                references.append(
                    vulnerability_pb2.Reference(
                        url=url,
                        type=vulnerability_pb2.Reference.Type.Value(url_type)))

        result = vulnerability_pb2.Vulnerability(id=self.id(),
                                                 published=published,
                                                 modified=modified,
                                                 aliases=self.aliases,
                                                 related=self.related,
                                                 withdrawn=withdrawn,
                                                 summary=self.summary,
                                                 details=details,
                                                 package=package,
                                                 affects=affects,
                                                 references=references)

        if self.ecosystem_specific:
            result.ecosystem_specific.update(self.ecosystem_specific)
        if self.database_specific:
            result.database_specific.update(self.database_specific)

        if self.source and include_source:
            source_repo = get_source_repository(self.source)
            if not source_repo or not source_repo.link:
                return result

            result.database_specific.update({
                'source':
                source_repo.link + sources.source_path(source_repo, self),
            })

        return result
Example #9
0
File: models.py Project: google/osv
class SourceRepository(ndb.Model):
    """Source repository."""
    # The type of the repository.
    type = ndb.IntegerProperty()
    # The name of the source.
    name = ndb.StringProperty()
    # The repo URL for the source.
    repo_url = ndb.StringProperty()
    # The username to use for SSH auth.
    repo_username = ndb.StringProperty()
    # Optional branch for repo.
    repo_branch = ndb.StringProperty()
    # Bucket name.
    bucket = ndb.StringProperty()
    # The directory in the repo where Vulnerability data is stored.
    directory_path = ndb.StringProperty()
    # Last synced hash.
    last_synced_hash = ndb.StringProperty()
    # Last date recurring updates were requested.
    last_update_date = ndb.DateProperty()
    # Patterns of files to exclude (regex).
    ignore_patterns = ndb.StringProperty(repeated=True)
    # Whether this repository is editable.
    editable = ndb.BooleanProperty(default=False)
    # Default extension.
    extension = ndb.StringProperty(default='.yaml')
    # Key path within each file to store the vulnerability.
    key_path = ndb.StringProperty()
    # It true, don't analyze any git ranges.
    ignore_git = ndb.BooleanProperty(default=False)
    # Whether to detect cherypicks or not (slow for large repos).
    detect_cherrypicks = ndb.BooleanProperty(default=True)
    # Whether to populate "versions" from git ranges.
    versions_from_repo = ndb.BooleanProperty(default=True)
    # HTTP link prefix.
    link = ndb.StringProperty()
    # DB prefix, if the database allocates its own.
    db_prefix = ndb.StringProperty()

    def ignore_file(self, file_path):
        """Return whether or not we should be ignoring a file."""
        if not self.ignore_patterns:
            return False

        file_name = os.path.basename(file_path)
        for pattern in self.ignore_patterns:
            if re.match(pattern, file_name):
                return True

        return False

    def _pre_put_hook(self):
        """Pre-put hook for validation."""
        if self.type == SourceRepositoryType.BUCKET and self.editable:
            raise ValueError('BUCKET SourceRepository cannot be editable.')
Example #10
0
class Webmentions(StringIdModel):
  """A bundle of links to send webmentions for.

  Use the :class:`Response` and :class:`BlogPost` concrete subclasses below.
  """
  STATUSES = ('new', 'processing', 'complete', 'error')

  # Turn off instance and memcache caching. See Source for details.
  _use_cache = False
  _use_memcache = False

  source = ndb.KeyProperty()
  status = ndb.StringProperty(choices=STATUSES, default='new')
  leased_until = ndb.DateTimeProperty()
  created = ndb.DateTimeProperty(auto_now_add=True)
  updated = ndb.DateTimeProperty(auto_now=True)

  # Original post links, ie webmention targets
  sent = ndb.StringProperty(repeated=True)
  unsent = ndb.StringProperty(repeated=True)
  error = ndb.StringProperty(repeated=True)
  failed = ndb.StringProperty(repeated=True)
  skipped = ndb.StringProperty(repeated=True)

  def label(self):
    """Returns a human-readable string description for use in log messages.

    To be implemented by subclasses.
    """
    raise NotImplementedError()

  def add_task(self):
    """Adds a propagate task for this entity.

    To be implemented by subclasses.
    """
    raise NotImplementedError()

  @ndb.transactional()
  def get_or_save(self):
    existing = self.key.get()
    if existing:
      return existing

    if self.unsent or self.error:
      logging.debug('New webmentions to propagate! %s', self.label())
      self.add_task()
    else:
      self.status = 'complete'

    self.put()
    return self

  def restart(self):
    """Moves status and targets to 'new' and adds a propagate task."""
    self.status = 'new'
    self.unsent = util.dedupe_urls(self.unsent + self.sent + self.error +
                                   self.failed + self.skipped)
    self.sent = self.error = self.failed = self.skipped = []

    # clear any cached webmention endpoints
    with util.webmention_endpoint_cache_lock:
      for url in self.unsent:
        util.webmention_endpoint_cache.pop(util.webmention_endpoint_cache_key(url), None)

    # this datastore put and task add should be transactional, but Cloud Tasks
    # doesn't support that :(
    # https://cloud.google.com/appengine/docs/standard/python/taskqueue/push/migrating-push-queues#features-not-available
    self.put()
    self.add_task()
Example #11
0
class Response(Webmentions):
  """A comment, like, or repost to be propagated.

  The key name is the comment object id as a tag URI.
  """
  # ActivityStreams JSON activity and comment, like, or repost
  type = ndb.StringProperty(choices=VERB_TYPES, default='comment')
  # These are TextProperty, and not JsonProperty, so that their plain text is
  # visible in the App Engine admin console. (JsonProperty uses a blob. :/)
  activities_json = ndb.TextProperty(repeated=True)
  response_json = ndb.TextProperty()
  # Old values for response_json. Populated when the silo reports that the
  # response has changed, e.g. the user edited a comment or changed their RSVP
  # to an event.
  old_response_jsons = ndb.TextProperty(repeated=True)
  # JSON dict mapping original post url to activity index in activities_json.
  # only set when there's more than one activity.
  urls_to_activity = ndb.TextProperty()
  # Original post links found by original post discovery
  original_posts = ndb.StringProperty(repeated=True)

  def label(self):
    return ' '.join((self.key.kind(), self.type, self.key.id(),
                     json_loads(self.response_json).get('url', '[no url]')))

  def add_task(self):
    util.add_propagate_task(self)

  @staticmethod
  def get_type(obj):
    type = get_type(obj)
    return type if type in VERB_TYPES else 'comment'

  def get_or_save(self, source, restart=False):
    resp = super(Response, self).get_or_save()

    if (self.type != resp.type or
        source.gr_source.activity_changed(json_loads(resp.response_json),
                                         json_loads(self.response_json),
                                         log=True)):
      logging.info('Response changed! Re-propagating. Original: %s' % resp)
      resp.old_response_jsons = resp.old_response_jsons[:10] + [resp.response_json]
      resp.response_json = self.response_json
      resp.restart(source)
    elif restart and resp is not self:  # ie it already existed
      resp.restart(source)

    return resp

  def restart(self, source=None):
    """Moves status and targets to 'new' and adds a propagate task."""
    # add original posts with syndication URLs
    # TODO: unify with Poll.repropagate_old_responses()
    if not source:
      source = self.source.get()

    synd_urls = set()
    for activity_json in self.activities_json:
      activity = json_loads(activity_json)
      url = activity.get('url') or activity.get('object', {}).get('url')
      if url:
        url = source.canonicalize_url(url, activity=activity)
        if url:
          synd_urls.add(url)

    if synd_urls:
      self.unsent += [synd.original for synd in
                      SyndicatedPost.query(SyndicatedPost.syndication.IN(synd_urls))
                      if synd.original]

    return super(Response, self).restart()
Example #12
0
class Source(StringIdModel, metaclass=SourceMeta):
  """A silo account, e.g. a Facebook or Google+ account.

  Each concrete silo class should subclass this class.
  """

  # Turn off NDB instance and memcache caching.
  # https://developers.google.com/appengine/docs/python/ndb/cache
  # https://github.com/snarfed/bridgy/issues/558
  # https://github.com/snarfed/bridgy/issues/68
  _use_cache = False

  STATUSES = ('enabled', 'disabled', 'error')  # 'error' is deprecated
  POLL_STATUSES = ('ok', 'error', 'polling')
  FEATURES = ('listen', 'publish', 'webmention', 'email')

  # short name for this site type. used in URLs, etc.
  SHORT_NAME = None
  # the corresponding granary class
  GR_CLASS = None
  # oauth-dropins StartHandler class
  OAUTH_START_HANDLER = None
  # whether Bridgy supports listen for this silo - this is unlikely, so we default to True
  CAN_LISTEN = True
  # whether Bridgy supports publish for this silo
  CAN_PUBLISH = None
  # how often to poll for responses
  FAST_POLL = datetime.timedelta(minutes=30)
  # how often to poll sources that have never sent a webmention
  SLOW_POLL = datetime.timedelta(days=1)
  # how often to poll sources that are currently rate limited by their silo
  RATE_LIMITED_POLL = SLOW_POLL
  # how long to wait after signup for a successful webmention before dropping to
  # the lower frequency poll
  FAST_POLL_GRACE_PERIOD = datetime.timedelta(days=7)
  # how often refetch author url to look for updated syndication links
  FAST_REFETCH = datetime.timedelta(hours=6)
  # refetch less often (this often) if it's been >2w since the last synd link
  SLOW_REFETCH = datetime.timedelta(days=2)
  # rate limiting HTTP status codes returned by this silo. e.g. twitter returns
  # 429, instagram 503, google+ 403.
  RATE_LIMIT_HTTP_CODES = ('429',)
  DISABLE_HTTP_CODES = ('401',)
  TRANSIENT_ERROR_HTTP_CODES = ()
  # whether granary supports fetching block lists
  HAS_BLOCKS = False
  # whether to require a u-syndication link for backfeed
  BACKFEED_REQUIRES_SYNDICATION_LINK = False

  # Maps Publish.type (e.g. 'like') to source-specific human readable type label
  # (e.g. 'favorite'). Subclasses should override this.
  TYPE_LABELS = {}

  # subclasses should override this
  URL_CANONICALIZER = util.UrlCanonicalizer(headers=util.REQUEST_HEADERS)

  # Regexps for URL paths that don't accept incoming webmentions. Currently used
  # by Blogger.
  PATH_BLACKLIST = ()

  created = ndb.DateTimeProperty(auto_now_add=True, required=True)
  url = ndb.StringProperty()
  status = ndb.StringProperty(choices=STATUSES, default='enabled')
  poll_status = ndb.StringProperty(choices=POLL_STATUSES, default='ok')
  rate_limited = ndb.BooleanProperty(default=False)
  name = ndb.StringProperty()  # full human-readable name
  picture = ndb.StringProperty()
  domains = ndb.StringProperty(repeated=True)
  domain_urls = ndb.StringProperty(repeated=True)
  features = ndb.StringProperty(repeated=True, choices=FEATURES)
  superfeedr_secret = ndb.StringProperty()
  webmention_endpoint = ndb.StringProperty()

  # points to an oauth-dropins auth entity. The model class should be a subclass
  # of oauth_dropins.BaseAuth. the token should be generated with the
  # offline_access scope so that it doesn't expire.
  auth_entity = ndb.KeyProperty()

  #
  # listen-only properties
  #
  last_polled = ndb.DateTimeProperty(default=util.EPOCH)
  last_poll_attempt = ndb.DateTimeProperty(default=util.EPOCH)
  last_webmention_sent = ndb.DateTimeProperty()
  last_public_post = ndb.DateTimeProperty()
  recent_private_posts = ndb.IntegerProperty(default=0)

  # the last time we re-fetched the author's url looking for updated
  # syndication links
  last_hfeed_refetch = ndb.DateTimeProperty(default=util.EPOCH)

  # the last time we've seen a rel=syndication link for this Source.
  # we won't spend the time to re-fetch and look for updates if there's
  # never been one
  last_syndication_url = ndb.DateTimeProperty()
  # the last time we saw a syndication link in an h-feed, as opposed to just on
  # permalinks. background: https://github.com/snarfed/bridgy/issues/624
  last_feed_syndication_url = ndb.DateTimeProperty()

  last_activity_id = ndb.StringProperty()
  last_activities_etag = ndb.StringProperty()
  last_activities_cache_json = ndb.TextProperty()
  seen_responses_cache_json = ndb.TextProperty(compressed=True)

  # populated in Poll.poll(), used by handlers
  blocked_ids = ndb.JsonProperty(compressed=True)

  # maps updated property names to values that put_updates() writes back to the
  # datastore transactionally. set this to {} before beginning.
  updates = None

  # gr_source is *not* set to None by default here, since it needs to be unset
  # for __getattr__ to run when it's accessed.

  def __init__(self, *args, id=None, **kwargs):
    """Constructor. Escapes the key string id if it starts with `__`."""
    if id and id.startswith('__'):
      id = '\\' + id
    super().__init__(*args, id=id, **kwargs)

  def key_id(self):
    """Returns the key's unescaped string id."""
    id = self.key.id()
    return id[1:] if id[0] == '\\' else id

  @classmethod
  def new(cls, handler, **kwargs):
    """Factory method. Creates and returns a new instance for the current user.

    To be implemented by subclasses.
    """
    raise NotImplementedError()

  def __getattr__(self, name):
    """Lazily load the auth entity and instantiate :attr:`self.gr_source`.

    Once :attr:`self.gr_source` is set, this method will *not* be called;
    :attr:`gr_source` will be returned normally.
    """
    if name == 'gr_source' and self.auth_entity:
      auth_entity = self.auth_entity.get()
      args = auth_entity.access_token()
      if not isinstance(args, tuple):
        args = (args,)

      kwargs = {}
      if self.key.kind() == 'FacebookPage' and auth_entity.type == 'user':
        kwargs = {'user_id': self.key_id()}
      elif self.key.kind() == 'Instagram':
        kwargs = {'scrape': True, 'cookie': INSTAGRAM_SESSIONID_COOKIE}
      elif self.key.kind() == 'Mastodon':
        args = (auth_entity.instance(),) + args
        kwargs = {'user_id': json_loads(auth_entity.user_json).get('id')}
      elif self.key.kind() == 'Twitter':
        kwargs = {'username': self.key_id()}

      self.gr_source = self.GR_CLASS(*args, **kwargs)
      return self.gr_source

    return getattr(super(Source, self), name)

  @classmethod
  def lookup(cls, id):
    """Returns the entity with the given id.

    By default, interprets id as just the key id. Subclasses may extend this to
    support usernames, etc.
    """
    if id and id.startswith('__'):
      id = '\\' + id
    return ndb.Key(cls, id).get()

  def user_tag_id(self):
    """Returns the tag URI for this source, e.g. 'tag:plus.google.com:123456'."""
    return self.gr_source.tag_uri(self.key_id())

  def bridgy_path(self):
    """Returns the Bridgy page URL path for this source."""
    return '/%s/%s' % (self.SHORT_NAME, self.key_id())

  def bridgy_url(self, handler):
    """Returns the Bridgy page URL for this source."""
    return util.host_url(handler) + self.bridgy_path()

  def silo_url(self, handler):
    """Returns the silo account URL, e.g. https://twitter.com/foo."""
    raise NotImplementedError()

  def label(self):
    """Human-readable label for this source."""
    return '%s (%s)' % (self.label_name(), self.GR_CLASS.NAME)

  def label_name(self):
    """Human-readable name or username for this source, whichever is preferred."""
    return self.name or self.key_id()

  @classmethod
  @ndb.transactional()
  def put_updates(cls, source):
    """Writes source.updates to the datastore transactionally.

    Returns:
      source: :class:`Source`

    Returns:
      the updated :class:`Source`
    """
    if not source.updates:
      return source

    logging.info('Updating %s %s : %r', source.label(), source.bridgy_path(),
                 {k: v for k, v in source.updates.items() if not k.endswith('_json')})

    updates = source.updates
    source = source.key.get()
    source.updates = updates
    for name, val in updates.items():
      setattr(source, name, val)

    if source.status == 'error':  # deprecated
      logging.warning('Resetting status from error to enabled')
      source.status = 'enabled'

    source.put()
    return source

  def poll_period(self):
    """Returns the poll frequency for this source, as a :class:`datetime.timedelta`.

    Defaults to ~15m, depending on silo. If we've never sent a webmention for
    this source, or the last one we sent was over a month ago, we drop them down
    to ~1d after a week long grace period.
    """
    now = datetime.datetime.now()
    if self.rate_limited:
      return self.RATE_LIMITED_POLL
    elif now < self.created + self.FAST_POLL_GRACE_PERIOD:
      return self.FAST_POLL
    elif not self.last_webmention_sent:
      return self.SLOW_POLL
    elif self.last_webmention_sent > now - datetime.timedelta(days=7):
      return self.FAST_POLL
    elif self.last_webmention_sent > now - datetime.timedelta(days=30):
      return self.FAST_POLL * 10
    else:
      return self.SLOW_POLL

  def should_refetch(self):
    """Returns True if we should run OPD refetch on this source now."""
    now = datetime.datetime.now()
    if self.last_hfeed_refetch == REFETCH_HFEED_TRIGGER:
      return True
    elif not self.last_syndication_url:
      return False

    period = (self.FAST_REFETCH
              if self.last_syndication_url > now - datetime.timedelta(days=14)
              else self.SLOW_REFETCH)
    return self.last_poll_attempt >= self.last_hfeed_refetch + period

  @classmethod
  def bridgy_webmention_endpoint(cls, domain='brid.gy'):
    """Returns the Bridgy webmention endpoint for this source type."""
    return 'https://%s/webmention/%s' % (domain, cls.SHORT_NAME)

  def has_bridgy_webmention_endpoint(self):
    """Returns True if this source uses Bridgy's webmention endpoint."""
    return self.webmention_endpoint in (
      self.bridgy_webmention_endpoint(),
      self.bridgy_webmention_endpoint(domain='www.brid.gy'))

  def get_author_urls(self):
    """Determine the author urls for a particular source.

    In debug mode, replace test domains with localhost.

    Return:
      a list of string URLs, possibly empty
    """
    return [util.replace_test_domains_with_localhost(u) for u in self.domain_urls]

  def search_for_links(self):
    """Searches for activities with links to any of this source's web sites.

    https://github.com/snarfed/bridgy/issues/456
    https://github.com/snarfed/bridgy/issues/565

    Returns:
      sequence of ActivityStreams activity dicts
    """
    return []

  def get_activities_response(self, **kwargs):
    """Returns recent posts and embedded comments for this source.

    May be overridden by subclasses.
    """
    kwargs.setdefault('group_id', gr_source.SELF)
    resp = self.gr_source.get_activities_response(**kwargs)
    for activity in resp['items']:
      self._inject_user_urls(activity)
    return resp

  def get_activities(self, **kwargs):
    return self.get_activities_response(**kwargs)['items']

  def get_comment(self, comment_id, **kwargs):
    """Returns a comment from this source.

    Passes through to granary by default. May be overridden by subclasses.

    Args:
      comment_id: string, site-specific comment id
      kwargs: passed to :meth:`granary.source.Source.get_comment`

    Returns:
      dict, decoded ActivityStreams comment object, or None
    """
    comment = self.gr_source.get_comment(comment_id, **kwargs)
    if comment:
      self._inject_user_urls(comment)
    return comment

  def get_like(self, activity_user_id, activity_id, like_user_id, **kwargs):
    """Returns an ActivityStreams 'like' activity object.

    Passes through to granary by default. May be overridden
    by subclasses.

    Args:
      activity_user_id: string id of the user who posted the original activity
      activity_id: string activity id
      like_user_id: string id of the user who liked the activity
      kwargs: passed to granary.Source.get_comment
    """
    return self.gr_source.get_like(activity_user_id, activity_id, like_user_id,
                                   **kwargs)

  def _inject_user_urls(self, activity):
    """Adds this user's web site URLs to their user mentions (in tags), in place."""
    obj = activity.get('object') or activity
    user_tag_id = self.user_tag_id()
    for tag in obj.get('tags', []):
      if tag.get('id') == user_tag_id:
        tag.setdefault('urls', []).extend([{'value': u} for u in self.domain_urls])

  def create_comment(self, post_url, author_name, author_url, content):
    """Creates a new comment in the source silo.

    Must be implemented by subclasses.

    Args:
      post_url: string
      author_name: string
      author_url: string
      content: string

    Returns:
      response dict with at least 'id' field
    """
    raise NotImplementedError()

  def feed_url(self):
    """Returns the RSS or Atom (or similar) feed URL for this source.

    Must be implemented by subclasses. Currently only implemented by
    :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`.

    Returns:
      string URL
    """
    raise NotImplementedError()

  def edit_template_url(self):
    """Returns the URL for editing this blog's template HTML.

    Must be implemented by subclasses. Currently only implemented by
    :mod:`blogger`, :mod:`medium`, :mod:`tumblr`, and :mod:`wordpress_rest`.

    Returns:
      string URL
    """
    raise NotImplementedError()

  @classmethod
  def button_html(cls, feature, **kwargs):
    """Returns an HTML string with a login form and button for this site.

    Mostly just passes through to
    :meth:`oauth_dropins.handlers.StartHandler.button_html`.

    Returns: string, HTML
    """
    assert feature in cls.FEATURES
    form_extra = (kwargs.pop('form_extra', '') +
                  '<input name="feature" type="hidden" value="%s" />' % feature)

    source = kwargs.pop('source', None)
    if source:
      form_extra += ('\n<input name="id" type="hidden" value="%s" />' %
                     source.key_id())

    return cls.OAUTH_START_HANDLER.button_html(
      '/%s/start' % cls.SHORT_NAME,
      form_extra=form_extra,
      image_prefix='/oauth_dropins/static/',
      **kwargs)

  @classmethod
  def create_new(cls, handler, user_url=None, **kwargs):
    """Creates and saves a new :class:`Source` and adds a poll task for it.

    Args:
      handler: the current :class:`webapp2.RequestHandler`
      user_url: a string, optional. if provided, supersedes other urls when
        determining the author_url
      **kwargs: passed to :meth:`new()`

    Returns: newly created :class:`Source`
    """
    source = cls.new(handler, **kwargs)
    if source is None:
      return None

    if not source.domain_urls:  # defer to the source if it already set this
      auth_entity = kwargs.get('auth_entity')
      if auth_entity and hasattr(auth_entity, 'user_json'):
        source.domain_urls, source.domains = source._urls_and_domains(
          auth_entity, user_url)
    logging.debug('URLs/domains: %s %s', source.domain_urls, source.domains)

    # check if this source already exists
    existing = source.key.get()
    if existing:
      # merge some fields
      source.features = set(source.features + existing.features)
      source.populate(**existing.to_dict(include=(
            'created', 'last_hfeed_refetch', 'last_poll_attempt', 'last_polled',
            'last_syndication_url', 'last_webmention_sent', 'superfeedr_secret',
            'webmention_endpoint')))
      verb = 'Updated'
    else:
      verb = 'Added'

    author_urls = source.get_author_urls()
    link = ('http://indiewebify.me/send-webmentions/?url=' + author_urls[0]
            if author_urls else 'http://indiewebify.me/#send-webmentions')
    feature = source.features[0] if source.features else 'listen'
    blurb = '%s %s. %s' % (
      verb, source.label(),
      'Try previewing a post from your web site!' if feature == 'publish'
      else '<a href="%s">Try a webmention!</a>' % link if feature == 'webmention'
      else "Refresh in a minute to see what we've found!")
    logging.info('%s %s', blurb, source.bridgy_url(handler))
    # uncomment to send email notification for each new user
    # if not existing:
    #   util.email_me(subject=blurb, body=source.bridgy_url(handler))

    source.verify()
    if source.verified():
      handler.messages = {blurb}

    # TODO: ugh, *all* of this should be transactional
    source.put()

    if 'webmention' in source.features:
      superfeedr.subscribe(source, handler)

    if 'listen' in source.features:
      util.add_poll_task(source, now=True)
      util.add_poll_task(source)

    return source

  def verified(self):
    """Returns True if this source is ready to be used, false otherwise.

    See :meth:`verify()` for details. May be overridden by subclasses, e.g.
    :class:`tumblr.Tumblr`.
    """
    if not self.domains or not self.domain_urls:
      return False
    if 'webmention' in self.features and not self.webmention_endpoint:
      return False
    if ('listen' in self.features and
        not (self.webmention_endpoint or self.last_webmention_sent)):
      return False
    return True

  def verify(self, force=False):
    """Checks that this source is ready to be used.

    For blog and listen sources, this fetches their front page HTML and
    discovers their webmention endpoint. For publish sources, this checks that
    they have a domain.

    May be overridden by subclasses, e.g. :class:`tumblr.Tumblr`.

    Args:
      force: if True, fully verifies (e.g. re-fetches the blog's HTML and
        performs webmention discovery) even we already think this source is
        verified.
    """
    author_urls = [u for u, d in zip(self.get_author_urls(), self.domains)
                   if not util.in_webmention_blocklist(d)]
    if ((self.verified() and not force) or self.status == 'disabled' or
        not self.features or not author_urls):
      return

    author_url = author_urls[0]
    logging.info('Attempting to discover webmention endpoint on %s', author_url)
    mention = send.WebmentionSend('https://brid.gy/', author_url)
    mention.requests_kwargs = {'timeout': util.HTTP_TIMEOUT,
                               'headers': util.REQUEST_HEADERS}
    try:
      mention._discoverEndpoint()
    except BaseException:
      logging.info('Error discovering webmention endpoint', stack_info=True)
      mention.error = {'code': 'EXCEPTION'}

    self._fetched_html = getattr(mention, 'html', None)
    error = getattr(mention, 'error', None)
    endpoint = getattr(mention, 'receiver_endpoint', None)
    if error or not endpoint:
      logging.info("No webmention endpoint found: %s %r", error, endpoint)
      self.webmention_endpoint = None
    else:
      logging.info("Discovered webmention endpoint %s", endpoint)
      self.webmention_endpoint = endpoint

    self.put()

  def _urls_and_domains(self, auth_entity, user_url):
    """Returns this user's valid (not webmention-blocklisted) URLs and domains.

    Converts the auth entity's user_json to an ActivityStreams actor and uses
    its 'urls' and 'url' fields. May be overridden by subclasses.

    Args:
      auth_entity: :class:`oauth_dropins.models.BaseAuth`
      user_url: string, optional URL passed in when authorizing

    Returns:
      ([string url, ...], [string domain, ...])
    """
    user = json_loads(auth_entity.user_json)
    actor = (user.get('actor')  # for Instagram; its user_json is IndieAuth
             or self.gr_source.user_to_actor(user))
    logging.debug('Extracting URLs and domains from actor: %s',
                  json_dumps(actor, indent=2))

    candidates = util.trim_nulls(util.uniquify(
        [user_url] + microformats2.object_urls(actor)))

    if len(candidates) > MAX_AUTHOR_URLS:
      logging.info('Too many profile links! Only resolving the first %s: %s',
                   MAX_AUTHOR_URLS, candidates)

    urls = []
    for i, url in enumerate(candidates):
      resolved = self.resolve_profile_url(url, resolve=i < MAX_AUTHOR_URLS)
      if resolved:
        urls.append(resolved)

    final_urls = []
    domains = []
    for url in util.dedupe_urls(urls):  # normalizes domains to lower case
      # skip links on this source's domain itself. only currently needed for
      # Mastodon; the other silo domains are in the webmention blocklist.
      domain = util.domain_from_link(url)
      if domain != self.gr_source.DOMAIN:
        final_urls.append(url)
        domains.append(domain)

    return final_urls, domains

  @staticmethod
  def resolve_profile_url(url, resolve=True):
    """Resolves a profile URL to be added to a source.

    Args:
      url: string
      resolve: boolean, whether to make HTTP requests to follow redirects, etc.

    Returns: string, resolved URL, or None
    """
    final, _, ok = util.get_webmention_target(url, resolve=resolve)
    if not ok:
      return None

    final = final.lower()
    if util.schemeless(final).startswith(util.schemeless(url.lower())):
      # redirected to a deeper path. use the original higher level URL. #652
      final = url

    # If final has a path segment check if root has a matching rel=me.
    match = re.match(r'^(https?://[^/]+)/.+', final)
    if match and resolve:
      root = match.group(1)
      try:
        mf2 = util.fetch_mf2(root)
        me_urls = mf2['rels'].get('me', [])
        if final in me_urls:
          final = root
      except requests.RequestException:
        logging.warning("Couldn't fetch %s, preserving path in %s",
                        root, final, stack_info=True)

    return final

  def canonicalize_url(self, url, activity=None, **kwargs):
    """Canonicalizes a post or object URL.

    Wraps :class:`oauth_dropins.webutil.util.UrlCanonicalizer`.
    """
    return self.URL_CANONICALIZER(url, **kwargs) if self.URL_CANONICALIZER else url

  def infer_profile_url(self, url):
    """Given an arbitrary URL representing a person, try to find their
    profile URL for *this* service.

    Queries Bridgy's registered accounts for users with a particular
    domain in their silo profile.

    Args:
      url: string, a person's URL

    Return:
      a string URL for their profile on this service (or None)
    """
    domain = util.domain_from_link(url)
    if domain == self.gr_source.DOMAIN:
      return url
    user = self.__class__.query(self.__class__.domains == domain).get()
    if user:
      return self.gr_source.user_url(user.key_id())

  def preprocess_for_publish(self, obj):
    """Preprocess an object before trying to publish it.

    By default this tries to massage person tags so that the tag's
    "url" points to the person's profile on this service (as opposed
    to a person's homepage).

    The object is modified in place.

    Args:
      obj: ActivityStreams activity or object dict
    """
    for tag in obj.get('tags', []):
      if tag.get('objectType') == 'person':
        silo_url = None
        for url in microformats2.object_urls(tag):
          silo_url = url and self.infer_profile_url(url)
          if silo_url:
            break
        if silo_url:
          tag['url'] = silo_url

    # recurse on contained object(s)
    for obj in util.get_list(obj, 'object'):
      self.preprocess_for_publish(obj)

  def on_new_syndicated_post(self, syndpost):
    """Called when a new :class:`SyndicatedPost` is stored for this source.

    Args:
      syndpost: :class:`SyndicatedPost`
    """
    pass

  def is_private(self):
    """Returns True if this source is private aka protected.

    ...ie their posts are not public.
    """
    return False

  def is_activity_public(self, activity):
    """Returns True if the given activity is public, False otherwise.

    Just wraps :meth:`granary.source.Source.is_public`. Subclasses may override.
    """
    return gr_source.Source.is_public(activity)

  def is_beta_user(self):
    """Returns True if this is a "beta" user opted into new features.

    Beta users come from beta_users.txt.
    """
    return self.bridgy_path() in util.BETA_USER_PATHS

  def load_blocklist(self):
    """Fetches this user's blocklist, if supported, and stores it in the entity."""
    if not self.HAS_BLOCKS:
      return

    try:
      ids = self.gr_source.get_blocklist_ids()
    except gr_source.RateLimited as e:
      ids = e.partial or []

    self.blocked_ids = ids[:BLOCKLIST_MAX_IDS]
    self.put()

  def is_blocked(self, obj):
    """Returns True if an object's author is being blocked.

    ...ie they're in this user's block list.

    Note that this method is tested in test_twitter.py, not test_models.py, for
    historical reasons.
    """
    if not self.blocked_ids:
      return False

    for o in [obj] + util.get_list(obj, 'object'):
      for field in 'author', 'actor':
        if o.get(field, {}).get('numeric_id') in self.blocked_ids:
          return True
Example #13
0
class SyndicatedPost(ndb.Model):
  """Represents a syndicated post and its discovered original (or not
  if we found no original post).  We discover the relationship by
  following rel=syndication links on the author's h-feed.

  See :mod:`original_post_discovery`.

  When a :class:`SyndicatedPost` entity is about to be stored,
  :meth:`source.Source.on_new_syndicated_post()` is called before it's stored.
  """

  # Turn off instance and memcache caching. See Response for details.
  _use_cache = False
  _use_memcache = False

  syndication = ndb.StringProperty()
  original = ndb.StringProperty()
  created = ndb.DateTimeProperty(auto_now_add=True)
  updated = ndb.DateTimeProperty(auto_now=True)

  @classmethod
  @ndb.transactional()
  def insert_original_blank(cls, source, original):
    """Insert a new original -> None relationship. Does a check-and-set to
    make sure no previous relationship exists for this original. If
    there is, nothing will be added.

    Args:
      source: :class:`Source` subclass
      original: string
    """
    if cls.query(cls.original == original, ancestor=source.key).get():
      return
    cls(parent=source.key, original=original, syndication=None).put()

  @classmethod
  @ndb.transactional()
  def insert_syndication_blank(cls, source, syndication):
    """Insert a new syndication -> None relationship. Does a check-and-set
    to make sure no previous relationship exists for this
    syndication. If there is, nothing will be added.

    Args:
      source: :class:`Source` subclass
      original: string
    """

    if cls.query(cls.syndication == syndication, ancestor=source.key).get():
      return
    cls(parent=source.key, original=None, syndication=syndication).put()

  @classmethod
  @ndb.transactional()
  def insert(cls, source, syndication, original):
    """Insert a new (non-blank) syndication -> original relationship.

    This method does a check-and-set within transaction to avoid
    including duplicate relationships.

    If blank entries exists for the syndication or original URL
    (i.e. syndication -> None or original -> None), they will first be
    removed. If non-blank relationships exist, they will be retained.

    Args:
      source: :class:`Source` subclass
      syndication: string (not None)
      original: string (not None)

    Returns:
      SyndicatedPost: newly created or preexisting entity
    """
    # check for an exact match
    duplicate = cls.query(cls.syndication == syndication,
                          cls.original == original,
                          ancestor=source.key).get()
    if duplicate:
      return duplicate

    # delete blanks (expect at most 1 of each)
    for filter in (ndb.AND(cls.syndication == syndication, cls.original == None),
                   ndb.AND(cls.original == original, cls.syndication == None)):
      for synd in cls.query(filter, ancestor=source.key).fetch(keys_only=True):
        synd.delete()

    r = cls(parent=source.key, original=original, syndication=syndication)
    r.put()
    return r
Example #14
0
 class OtherKind(ndb.Model):
     foo = ndb.StringProperty()
     bar = ndb.BooleanProperty(default=True)
Example #15
0
 class Animal(ndb.PolyModel):
     one = ndb.StringProperty()
Example #16
0
class Visit(ndb.Model):
    'Visit entity registers visitor IP address & timestamp'
    visitor = ndb.StringProperty()
    timestamp = ndb.DateTimeProperty(auto_now_add=True)
Example #17
0
 class Feline(Animal):
     two = ndb.StringProperty()
Example #18
0
class Query(ndb.Model):
    text = ndb.StringProperty()
    date = ndb.DateTimeProperty(auto_now_add=True)
    user_id = ndb.StringProperty()
Example #19
0
 class SomeKind(ndb.Model):
     foo = ndb.StringProperty()
     created_at = ndb.DateTimeProperty(indexed=True, auto_now_add=True)
     updated_at = ndb.DateTimeProperty(indexed=True, auto_now=True)
class Topic(ndb.Model):
    action_text = ndb.StringProperty()

    @classmethod
    def query_topic(cls, ancestor_key):
        return cls.query(ancestor=ancestor_key)
Example #21
0
 class OtherKind(ndb.Model):
     one = ndb.StringProperty()
     two = ndb.StringProperty()
class Synonym(ndb.Model):
    synonym = ndb.StringProperty()

    @classmethod
    def query_synonym(cls, ancestor_key):
        return cls.query(ancestor=ancestor_key)
Example #23
0
 class BModel(ndb.Model):
     s_bar = ndb.StringProperty()
     key_a = ndb.KeyProperty(kind="AModel", indexed=True)
Example #24
0
 class AnyKind(ndb.Model):
     foo = ndb.IntegerProperty()
     bar = ndb.StringProperty()
     baz = ndb.IntegerProperty()
     qux = ndb.StringProperty()
Example #25
0
 class SomeKind(ndb.Model):
     foo = ndb.StringProperty(repeated=True)
Example #26
0
 class MyKind(ndb.Model):
     bar = ndb.StringProperty()
Example #27
0
 class SomeKind(ndb.Model):
     foo = ndb.StringProperty()
     bar = ndb.StringProperty(name="notbar")
Example #28
0
class FacebookAuth(models.BaseAuth):
  """An authenticated Facebook user or page.

  Provides methods that return information about this user (or page) and make
  OAuth-signed requests to Facebook's HTTP-based APIs. Stores OAuth credentials
  in the datastore. See models.BaseAuth for usage details.

  Facebook-specific details: implements urlopen() but not api(). The
  key name is the user's or page's Facebook ID.
  """
  type = ndb.StringProperty(choices=('user', 'page'))
  auth_code = ndb.StringProperty()
  access_token_str = ndb.StringProperty(required=True)
  # https://developers.facebook.com/docs/graph-api/reference/user#fields
  user_json = ndb.TextProperty(required=True)
  # https://developers.facebook.com/docs/graph-api/reference/user/accounts#fields
  pages_json = ndb.TextProperty()

  def site_name(self):
    return 'Facebook'

  def user_display_name(self):
    """Returns the user's or page's name.
    """
    return json_loads(self.user_json)['name']

  def access_token(self):
    """Returns the OAuth access token string.
    """
    return self.access_token_str

  def urlopen(self, url, **kwargs):
    """Wraps urlopen() and adds OAuth credentials to the request.
    """
    return models.BaseAuth.urlopen_access_token(url, self.access_token_str,
                                                **kwargs)

  def for_page(self, page_id):
    """Returns a new, unsaved FacebookAuth entity for a page in pages_json.

    The returned entity's properties will be populated with the page's data.
    access_token will be the page access token, user_json will be the page
    object, and pages_json will be a single-element list with the page.

    If page_id is not in pages_json, returns None.

    Args:
      page_id: string, Facebook page id
    """
    for page in json_loads(self.pages_json):
      id = page.get('id')
      if id == page_id:
        entity = FacebookAuth(id=id, type='page', pages_json=json_dumps([page]),
                              access_token_str=page.get('access_token'))
        entity.user_json = entity.urlopen(API_PAGE_URL).read()
        logging.debug('Page object: %s', entity.user_json)
        return entity

    return None

  def is_authority_for(self, key):
    """Additionally check if the key represents a Page that this user has
    authority over.

    Args:
      auth_entity_key: ndb.Key

    Returns:
      boolean: true if key represents this user or one of the user's pages.
    """
    return super(FacebookAuth, self).is_authority_for(key) or any(
      key == self.for_page(page.get('id')).key
      for page in json_loads(self.pages_json))
Example #29
0
 class SomeKind(ndb.Model):
     foo = ndb.IntegerProperty()
     bar = ndb.StringProperty()
Example #30
0
class Job(Model):
    """Definition of a job type used by the bots."""

    VALID_NAME_REGEX = NAME_CHECK_REGEX

    # Job type name.
    name = ndb.StringProperty()

    # Job environment string.
    environment_string = ndb.TextProperty()

    # The platform that this job can run on.
    platform = ndb.StringProperty()

    # Blobstore key of the custom binary for this job.
    custom_binary_key = ndb.StringProperty()

    # Filename for the custom binary.
    custom_binary_filename = ndb.StringProperty()

    # Revision of the custom binary.
    custom_binary_revision = ndb.IntegerProperty()

    # Description of the job.
    description = ndb.TextProperty()

    # Template to use, if any.
    templates = ndb.StringProperty(repeated=True)

    # Project name.
    project = ndb.StringProperty()

    # Keywords is used for searching.
    keywords = ndb.StringProperty(repeated=True)

    # If this is set, this Job is for an external reproduction infrastructure. The
    # value here is the topic used for reproduction requests.
    external_reproduction_topic = ndb.StringProperty()

    # If this is set, this Job is for an external reproduction infrastructure. The
    # value here is the subscription used for receiving reproduction updates.
    external_updates_subscription = ndb.StringProperty()

    def is_external(self):
        """Whether this job is external."""
        return (bool(self.external_reproduction_topic)
                or bool(self.external_updates_subscription))

    def get_environment(self):
        """Get the environment as a dict for this job, including any environment
    variables in its template."""
        if not self.templates:
            return environment.parse_environment_definition(
                self.environment_string)

        job_environment = {}
        for template_name in self.templates:
            template = JobTemplate.query(
                JobTemplate.name == template_name).get()
            if not template:
                continue

            template_environment = environment.parse_environment_definition(
                template.environment_string)

            job_environment.update(template_environment)

        environment_overrides = environment.parse_environment_definition(
            self.environment_string)

        job_environment.update(environment_overrides)
        return job_environment

    def get_environment_string(self):
        """Get the environment string for this job, including any environment
    variables in its template. Avoid using this if possible."""
        environment_string = ''
        job_environment = self.get_environment()
        for key, value in six.iteritems(job_environment):
            environment_string += f'{key} = {value}\n'

        return environment_string

    def populate_indices(self):
        """Populate keywords for fast job searching."""
        self.keywords = list(
            search_tokenizer.tokenize(self.name)
            | search_tokenizer.tokenize(self.project))

    def _pre_put_hook(self):
        """Pre-put hook."""
        self.project = self.get_environment().get('PROJECT_NAME',
                                                  utils.default_project_name())
        self.populate_indices()