Beispiel #1
0
    def get(self, owner, repo, ver=None):
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        owner = owner.lower()
        repo = repo.lower()
        library_key = ndb.Key(Library, Library.id(owner, repo))
        if ver is None:
            ver = yield Library.latest_version_for_key_async(library_key)
        if ver is None:
            self.response.set_status(404)
            return
        version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver)
        analysis = Content.get_by_id('analysis',
                                     parent=version_key,
                                     read_policy=ndb.EVENTUAL_CONSISTENCY)

        if analysis is None:
            self.response.set_status(404)
            return

        self.response.headers['Content-Type'] = 'application/json'
        result = {}
        result['status'] = analysis.status
        if analysis.status == Status.ready:
            result['content'] = json.loads(analysis.content)
        if analysis.status == Status.error:
            result['error'] = analysis.error

        if result['status'] != Status.ready:
            self.response.set_status(400)

        self.response.headers['Content-Type'] = 'application/json'
        self.response.write(json.dumps(result))
Beispiel #2
0
  def get(self, owner, repo, version=None):
    self.response.headers['Access-Control-Allow-Origin'] = '*'
    self.response.headers['Content-Type'] = 'application/json'

    library_key = ndb.Key(Library, Library.id(owner, repo))

    if version is None:
      version = yield Library.default_version_for_key_async(library_key)
      if version is None:
        self.response.set_status(404)
        return

    version_key = ndb.Key(Library, library_key.id(), Version, version)

    bower = yield Content.get_by_id_async('bower', parent=version_key)
    if bower is None:
      self.response.set_status(404)
      return

    bower_json = bower.get_json()
    bower_dependencies = bower_json.get('dependencies', {})

    dependencies = []
    version_futures = []
    for name in bower_dependencies.keys():
      dependency = Dependency.from_string(bower_dependencies[name])
      if dependency is None:
        continue
      dependencies.append(dependency)
      dependency_library_key = ndb.Key(Library, Library.id(dependency.owner, dependency.repo))
      version_futures.append(Library.versions_for_key_async(dependency_library_key))

    dependency_futures = []
    for i, dependency in enumerate(dependencies):
      versions = yield version_futures[i]
      def matches(version, spec):
        try:
          return versiontag.match(version, spec)
        except ValueError:
          # FIXME: What other cases do we need to support here?
          return False
      while len(versions) > 0 and not matches(versions[-1], dependency.version):
        versions.pop()
      if len(versions) > 0:
        dependency_library_key = ndb.Key(Library, Library.id(dependency.owner.lower(), dependency.repo.lower()))
        dependency_futures.append(LibraryMetadata.brief_async(dependency_library_key, versions[-1]))

    results = []
    for future in dependency_futures:
      dependency_result = yield future
      if dependency_result is not None:
        results.append(dependency_result)

    result = {
        'results': results,
        'count': len(results),
    }

    self.response.write(json.dumps(result))
Beispiel #3
0
  def update_metadata(self):
    headers = {'Accept': 'application/vnd.github.drax-preview+json'}
    response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers)
    if response.status_code == 200:
      try:
        metadata = json.loads(response.content)
      except ValueError:
        return self.error("could not parse metadata")

      repo = metadata.get('name', '').lower()
      owner = metadata.get('owner', {}).get('login', '').lower()
      if repo != '' and owner != '' and (repo != self.repo or owner != self.owner):
        logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo))
        delete_library(self.library.key)
        task_url = util.ensure_library_task(owner, repo)
        util.new_task(task_url, target='manage')
        raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo))

      self.library.metadata = response.content
      self.library.metadata_etag = response.headers.get('ETag', None)
      self.library.metadata_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 404:
      logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo))
      delete_library(self.library.key)
      raise RequestAborted('repo no longer exists')
    elif response.status_code != 304:
      return self.retry('could not update repo metadata (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse contributors")
      self.library.contributors = response.content
      self.library.contributors_etag = response.headers.get('ETag', None)
      self.library.contributors_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code != 304:
      return self.retry('could not update contributors (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse stats/participation")
      self.library.participation = response.content
      self.library.participation_etag = response.headers.get('ETag', None)
      self.library.participation_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 202:
      # GitHub is "computing" the data. We'll try again next update cycle.
      # TODO: Alternatively we could retry this task
      pass
    elif response.status_code != 304:
      return self.retry('could not update stats/participation (%d)' % response.status_code)
Beispiel #4
0
 def init_library(self, owner, repo, create=True):
   self.owner = owner.lower()
   self.repo = repo.lower()
   if create:
     self.library = Library.get_or_insert(Library.id(owner, repo))
     self.is_new = self.library.metadata is None and self.library.error is None
   else:
     self.library = Library.get_by_id(Library.id(owner, repo))
   if self.library.status == Status.suppressed:
     raise RequestAborted('library is suppressed')
Beispiel #5
0
 def init_library(self, owner, repo, create=True):
     self.owner = owner.lower()
     self.repo = repo.lower()
     if create:
         self.library = Library.get_or_insert(Library.id(owner, repo))
         self.is_new = self.library.metadata is None and self.library.error is None
     else:
         self.library = Library.get_by_id(Library.id(owner, repo))
     if self.library.status == Status.suppressed:
         raise RequestAborted('library is suppressed')
Beispiel #6
0
  def init_library(self, scope, package, create=True):
    self.scope = scope.lower()
    self.package = package.lower()

    if create:
      self.library = Library.get_or_insert(Library.id(self.scope, self.package))
      self.is_new = self.library.metadata is None and self.library.error is None
    else:
      self.library = Library.get_by_id(Library.id(self.scope, self.package))
    if self.library.status == Status.suppressed:
      raise RequestAborted('library is suppressed')
Beispiel #7
0
  def init_library(self, scope, package, create=True):
    self.scope = scope.lower()
    self.package = package.lower()

    if create:
      self.library = Library.get_or_insert(Library.id(self.scope, self.package))
      self.is_new = self.library.metadata is None and self.library.error is None
    else:
      self.library = Library.get_by_id(Library.id(self.scope, self.package))
    if self.library.status == Status.suppressed:
      raise RequestAborted('library is suppressed')
Beispiel #8
0
    def handle_get(self, owner, repo, latest=False):
        self.init_library(owner, repo)
        if self.library is None:
            self.response.set_status(404)
            self.response.write('could not find library: %s' %
                                Library.id(owner, repo))
            return

        if latest:
            version_id = Library.default_version_for_key_async(
                self.library.key).get_result()
            if version_id:
                version = Version.get_by_id(version_id,
                                            parent=self.library.key)
                if version is not None:
                    self.trigger_analysis(version_id,
                                          version.sha,
                                          transactional=False)
        else:
            versions = Version.query(Version.status == Status.ready,
                                     ancestor=self.library.key).fetch()
            for version in versions:
                self.trigger_analysis(version.key.id(),
                                      version.sha,
                                      transactional=False)
Beispiel #9
0
  def update_readme(self, is_npm_package):
    if is_npm_package:
      # Load registry metadata to fetch readme path.
      library = Library.get_by_id(Library.id(self.owner, self.repo))
      registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None
      readme_path = registry_metadata.get('readmeFilename', 'README.md')
      response = util.unpkg_get(self.owner, self.repo, self.version, readme_path)
      readme = response.content
    else:
      # Load readme from GitHub endpoint.
      response = util.github_get('repos', self.owner, self.repo, 'readme', params={"ref": self.sha})

      if response.status_code == 200:
        readme = base64.b64decode(json.loads(response.content)['content'])
      elif response.status_code == 404:
        readme = None
      else:
        return self.retry('error fetching readme (%d)' % response.status_code)

    if readme is not None:
      # Store the raw readme markdown.
      try:
        Content(parent=self.version_key, id='readme', content=readme,
                status=Status.ready, etag=response.headers.get('ETag', None)).put()
      except db.BadValueError:
        return self.error("Could not store README.md as a utf-8 string", ErrorCodes.Version_utf)

      # Convert markdown to HTML and store the result.
      response = util.github_markdown(readme)
      if response.status_code == 200:
        Content(parent=self.version_key, id='readme.html', content=response.content,
                status=Status.ready, etag=response.headers.get('ETag', None)).put()
      else:
        return self.retry('error converting readme to markdown (%d)' % response.status_code)
Beispiel #10
0
  def update_readme(self, is_npm_package):
    if is_npm_package:
      # Load registry metadata to fetch readme path.
      library = Library.get_by_id(Library.id(self.owner, self.repo))
      registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None
      readme_path = registry_metadata.get('readmeFilename', 'README.md')
      response = util.unpkg_get(self.owner, self.repo, self.version, readme_path)
      readme = response.content
    else:
      # Load readme from GitHub endpoint.
      response = util.github_get('repos', self.owner, self.repo, 'readme', params={"ref": self.sha})

      if response.status_code == 200:
        readme = base64.b64decode(json.loads(response.content)['content'])
      elif response.status_code == 404:
        readme = None
      else:
        return self.retry('error fetching readme (%d)' % response.status_code)

    if readme is not None:
      # Store the raw readme markdown.
      try:
        Content(parent=self.version_key, id='readme', content=readme,
                status=Status.ready, etag=response.headers.get('ETag', None)).put()
      except db.BadValueError:
        return self.error("Could not store README.md as a utf-8 string", ErrorCodes.Version_utf)

      # Convert markdown to HTML and store the result.
      response = util.github_markdown(readme)
      if response.status_code == 200:
        Content(parent=self.version_key, id='readme.html', content=response.content,
                status=Status.ready, etag=response.headers.get('ETag', None)).put()
      else:
        return self.retry('error converting readme to markdown (%d)' % response.status_code)
Beispiel #11
0
  def get(self, terms):
    self.response.headers['Access-Control-Allow-Origin'] = '*'
    scoring = self.request.get('noscore', None) is None
    include_results = self.request.get('noresults', None) is None
    include_count = self.request.get('count', None) is not None
    request_cursor = self.request.get('cursor', None)

    if not include_results:
      scoring = False
      include_count = True
    try:
      limit = min(20, int(self.request.get('limit', 20)))
    except ValueError:
      self.response.set_status(400)
      return
    index = search.Index('repo')
    cursor = search.Cursor(web_safe_string=request_cursor)
    try:
      # Accuracy refers to accurate till n results.
      accuracy = 2000 if include_count else None
      sort_options = search.SortOptions(match_scorer=search.MatchScorer()) if scoring else None
      query_options = search.QueryOptions(limit=limit, number_found_accuracy=accuracy, sort_options=sort_options, cursor=cursor)
      search_results = index.search(search.Query(query_string=terms, options=query_options))
      cursor = search_results.cursor
    except search.QueryError:
      self.response.set_status(400)
      self.response.write('bad query')
      return

    count = search_results.number_found
    if include_results:
      result_futures = []
      for result in search_results.results:
        (owner, repo) = result.doc_id.split('/')
        version = None
        for field in result.fields:
          if field.name == 'version':
            version = field.value
            break
        library_key = ndb.Key(Library, Library.id(owner, repo))
        result_futures.append(LibraryMetadata.brief_async(library_key, version, assume_latest=True))
      results = []
      for future in result_futures:
        result = yield future
        if result is None:
          # Fixup count when we skip over incomplete entries.
          count = count - 1
        if result is not None:
          results.append(result)

    result = {
        'cursor': cursor.web_safe_string if cursor and include_results else None,
    }
    if include_count:
      result['count'] = count
    if include_results:
      result['results'] = results

    self.response.headers['Content-Type'] = 'application/json'
    self.response.write(json.dumps(result))
Beispiel #12
0
  def get(self, owner, repo, version=None):
    self.response.headers['Access-Control-Allow-Origin'] = '*'
    self.response.headers['Content-Type'] = 'application/json'

    library_key = ndb.Key(Library, Library.id(owner, repo))

    if version is None:
      version = yield Library.default_version_for_key_async(library_key)
      if version is None:
        self.response.set_status(404)
        return

    version_key = ndb.Key(Library, library_key.id(), Version, version)

    collection_versions = yield Version.collections_for_key_async(version_key)
    collection_futures = []
    for collection_version in collection_versions:
      collection_futures.append(LibraryMetadata.brief_async(collection_version.key.parent(), collection_version.key.id()))
    collections = []
    for future in collection_futures:
      collection_result = yield future
      if collection_result is not None:
        collections.append(collection_result)

    result = {
        'results': collections,
        'count': len(collections),
    }
    self.response.write(json.dumps(result))
Beispiel #13
0
    def handle_post(self):
        message_json = json.loads(
            urllib.unquote(self.request.body).rstrip('='))
        message = message_json['message']
        data = base64.b64decode(str(message['data']))
        attributes = message['attributes']
        owner = attributes['owner']
        repo = attributes['repo']
        version = attributes['version']
        error = attributes.get('error', None)

        version_key = ndb.Key(Library, Library.id(owner, repo), Version,
                              version)

        content = Content.get_by_id('analysis', parent=version_key)
        if content is None:
            return
        if data == '':
            content.set_json(None)
        else:
            content.set_json(json.loads(data))

        if error is None:
            content.status = Status.ready
            content.error = None
        else:
            content.status = Status.error
            content.error = error

        content.put()

        if version_key.id() == Library.default_version_for_key_async(
                version_key.parent()).get_result():
            task_url = util.update_indexes_task(owner, repo)
            util.new_task(task_url, target='manage')
Beispiel #14
0
  def update_search_index(self, owner, repo, version_key, library, bower):
    metadata = json.loads(library.metadata)
    registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None
    npm_description = registry_metadata.get('description', '') if registry_metadata else ''
    npm_keywords = registry_metadata.get('keywords', []) if registry_metadata else []
    fields = [
        search.AtomField(name='owner', value=owner),
        search.TextField(name='repo', value=repo),
        search.AtomField(name='kind', value=library.kind),
        search.AtomField(name='version', value=version_key.id()),
        search.TextField(name='github_description', value=metadata.get('description', '')),
        search.TextField(name='bower_description', value=bower.get('description', '')),
        search.TextField(name='npm_description', value=npm_description),
        search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))),
        search.TextField(name='npm_keywords', value=' '.join(npm_keywords)),
        search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list(
            util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) +
            util.safe_split_strip(repo)))),
    ]

    # Generate weighting field
    weights = [(repo, 10)]

    analysis = Content.get_by_id('analysis', parent=version_key)
    if analysis is not None and analysis.status == Status.ready:
      data = analysis.get_json()
      if data.get('analyzerData', None) is not None:
        # Use analyzer data for search index
        element_objects = data.get('analyzerData', {}).get('elements', [])
        elements = [element.get('tagname', '') or element.get('classname', '') for element in element_objects]
        if elements != []:
          fields.append(search.TextField(name='element', value=' '.join(elements)))
          weights.append((' '.join(elements), 5))

        behavior_objects = data.get('analyzerData', {}).get('metadata', {}).get('polymer', {}).get('behaviors', [])
        behaviors = [behavior.get('name', '') for behavior in behavior_objects]
        if behaviors != []:
          fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
          weights.append((' '.join(behaviors), 5))
      else:
        # Use hydrolysis data for search index
        elements = data.get('elementsByTagName', {}).keys()
        if elements != []:
          fields.append(search.TextField(name='element', value=' '.join(elements)))
          weights.append((' '.join(elements), 5))
        behaviors = data.get('behaviorsByName', {}).keys()
        if behaviors != []:
          fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
          weights.append((' '.join(behaviors), 5))

    weighted = []
    for value, weight in weights:
      for _ in range(0, weight):
        weighted.append(value)
    fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted)))

    rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds())
    document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank)
    index = search.Index('repo')
    index.put(document)
Beispiel #15
0
  def handle_get(self, owner, repo, version):
    self.owner = owner
    self.repo = repo
    self.version = version

    library_key = ndb.Key(Library, Library.id(owner, repo))
    self.version_object = Version.get_by_id(version, parent=library_key)
    if self.version_object is None:
      return self.error('Version entity does not exist: %s/%s' % (Library.id(owner, repo), version))

    self.sha = self.version_object.sha
    self.version_key = self.version_object.key

    self.update_readme()
    self.update_bower()
    self.set_ready()
Beispiel #16
0
 def handle_get(self, owner, repo, version):
   # FIXME: Make deletion transactional with check on library that tag is excluded.
   version_key = ndb.Key(Library, Library.id(owner, repo), Version, version)
   ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True))
   if VersionCache.update(version_key.parent()):
     task_url = util.update_indexes_task(owner, repo)
     util.new_task(task_url, target='manage')
Beispiel #17
0
 def handle_get(self, scope, package, version):
   # FIXME: Make deletion transactional with check on library that tag is excluded.
   version_key = ndb.Key(Library, Library.id(scope, package), Version, version)
   ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True))
   if VersionCache.update(version_key.parent()):
     task_url = util.update_indexes_task(scope, package)
     util.new_task(task_url, target='manage')
Beispiel #18
0
  def handle_get(self, owner, repo, scope, package):
    library = Library.get_by_id(Library.id(owner, repo))

    if library is None:
      return

    library.npm_package = scope + '/' + package
    library.put()

    # Remove from search indexes.
    index = search.Index('repo')
    index.delete(Library.id(owner, repo))

    npm_library = Library.get_by_id(Library.id(scope, package))
    if npm_library is not None:
      npm_library.migrated_from_bower = True
      npm_library.put()
Beispiel #19
0
  def test_ensure_when_present(self):
    Library(id=Library.id('owner', 'repo')).put()
    response = self.app.get(util.ensure_library_task('owner', 'repo'), headers={'X-AppEngine-QueueName': 'default'})

    self.assertEqual(response.status_int, 200)

    tasks = self.tasks.get_filtered_tasks()
    self.assertEqual([], [task.url for task in tasks])
Beispiel #20
0
  def test_ensure_when_present(self):
    Library(id=Library.id('owner', 'repo')).put()
    response = self.app.get(util.ensure_library_task('owner', 'repo'), headers={'X-AppEngine-QueueName': 'default'})

    self.assertEqual(response.status_int, 200)

    tasks = self.tasks.get_filtered_tasks()
    self.assertEqual([], [task.url for task in tasks])
Beispiel #21
0
  def handle_get(self, owner, repo, scope, package):
    library = Library.get_by_id(Library.id(owner, repo))

    if library is None:
      return

    library.npm_package = scope + '/' + package
    library.put()

    # Remove from search indexes.
    index = search.Index('repo')
    index.delete(Library.id(owner, repo))

    npm_library = Library.get_by_id(Library.id(scope, package))
    if npm_library is not None:
      npm_library.migrated_from_bower = True
      npm_library.put()
Beispiel #22
0
    def handle_get(self, owner, repo, version):
        self.owner = owner
        self.repo = repo
        self.version = version

        library_key = ndb.Key(Library, Library.id(owner, repo))
        self.version_object = Version.get_by_id(version, parent=library_key)
        if self.version_object is None:
            return self.error('Version entity does not exist: %s/%s' %
                              (Library.id(owner, repo), version))

        self.sha = self.version_object.sha
        self.version_key = self.version_object.key

        self.update_readme()
        self.update_bower()
        self.set_ready()
Beispiel #23
0
    def get(self, owner, repo, ver=None):
        use_analyzer_data = self.request.get('use_analyzer_data',
                                             None) is not None
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        owner = owner.lower()
        repo = repo.lower()
        library_key = ndb.Key(Library, Library.id(owner, repo))
        if ver is None:
            ver = yield Library.default_version_for_key_async(library_key)
        if ver is None:
            self.response.set_status(404)
            return
        version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver)
        analysis = Content.get_by_id('analysis',
                                     parent=version_key,
                                     read_policy=ndb.EVENTUAL_CONSISTENCY)

        if analysis is None:
            self.response.set_status(404)
            return

        self.response.headers['Content-Type'] = 'application/json'
        result = {}
        result['status'] = analysis.status
        if analysis.status == Status.ready:
            content = analysis.get_json()

            has_analyzer_data = content.get('analyzerData', None) is not None

            if use_analyzer_data and has_analyzer_data:
                # Use the analyzer data fields
                result['analysis'] = content['analyzerData']
            else:
                # Use the hydrolysis fields and delete the analyzer ones
                if has_analyzer_data:
                    del content['analyzerData']
                result['content'] = content

        if analysis.status == Status.error:
            result['error'] = analysis.error

        if result['status'] != Status.ready:
            self.response.set_status(400)

        self.response.headers['Content-Type'] = 'application/json'
        self.response.write(json.dumps(result))
Beispiel #24
0
  def handle_get(self, owner, repo):
    self.init_library(owner, repo)
    if self.library is None:
      self.response.set_status(404)
      self.response.write('could not find library: %s' % Library.id(owner, repo))
      return

    versions = Version.query(Version.status == Status.ready, ancestor=self.library.key).fetch()
    for version in versions:
      self.trigger_analysis(version.key.id(), version.sha, transactional=False)
Beispiel #25
0
    def handle_get(self, owner, repo):
        index = search.Index('repo')
        document = index.get(Library.id(owner, repo))
        if document is None:
            self.response.set_status(404)
            return

        for field in document.fields:
            self.response.write('%s: %s<br>' % (field.name, field.value))
        self.response.write('rank: %s<br>' % (document.rank))
Beispiel #26
0
  def handle_get(self, owner, repo):
    index = search.Index('repo')
    document = index.get(Library.id(owner, repo))
    if document is None:
      self.response.set_status(404)
      return

    for field in document.fields:
      self.response.write('%s: %s<br>' % (field.name, field.value))
    self.response.write('rank: %s<br>' % (document.rank))
Beispiel #27
0
  def update_collection_dependencies(self, collection_version_key, bower):
    dependencies = bower.get('dependencies', {})
    for name in dependencies.keys():
      dep = Dependency.from_string(dependencies[name])
      if dep is None:
        continue
      library_key = ndb.Key(Library, Library.id(dep.owner, dep.repo))
      CollectionReference.ensure(library_key, collection_version_key, semver=dep.version)

      task_url = util.ensure_library_task(dep.owner.lower(), dep.repo.lower())
      util.new_task(task_url, target='manage')
Beispiel #28
0
 def handle_get(self, owner, repo):
   self.init_library(owner, repo, create=False)
   if self.library is None:
     logging.warning('Library not found: %s', Library.id(owner, repo))
     return
   if self.library.spdx_identifier is None:
     # Can't update a library if it's not licensed correctly.
     return
   self.update_metadata()
   self.update_versions()
   self.set_ready()
Beispiel #29
0
    def update_search_index(self, owner, repo, version_key, library, bower):
        metadata = json.loads(library.metadata)
        fields = [
            search.AtomField(name='owner', value=owner),
            search.TextField(name='repo', value=repo),
            search.AtomField(name='kind', value=library.kind),
            search.AtomField(name='version', value=version_key.id()),
            search.TextField(name='github_description',
                             value=metadata.get('description', '')),
            search.TextField(name='bower_description',
                             value=bower.get('description', '')),
            search.TextField(name='bower_keywords',
                             value=' '.join(bower.get('keywords', []))),
            search.TextField(
                name='prefix_matches',
                value=' '.join(
                    util.generate_prefixes_from_list(
                        util.safe_split_strip(metadata.get('description')) +
                        util.safe_split_strip(bower.get('description')) +
                        util.safe_split_strip(repo)))),
        ]

        # Generate weighting field
        weights = [(repo, 10)]

        analysis = Content.get_by_id('analysis', parent=version_key)
        if analysis is not None and analysis.status == Status.ready:
            analysis = json.loads(analysis.content)
            elements = analysis.get('elementsByTagName', {}).keys()
            if elements != []:
                fields.append(
                    search.TextField(name='element', value=' '.join(elements)))
                weights.append((' '.join(elements), 5))
            behaviors = analysis.get('behaviorsByName', {}).keys()
            if behaviors != []:
                fields.append(
                    search.TextField(name='behavior',
                                     value=' '.join(behaviors)))
                weights.append((' '.join(behaviors), 5))

        weighted = []
        for value, weight in weights:
            for _ in range(0, weight):
                weighted.append(value)
        fields.append(
            search.TextField(name='weighted_fields', value=' '.join(weighted)))

        rank = int(
            (library.updated - datetime.datetime(2016, 1, 1)).total_seconds())
        document = search.Document(doc_id=Library.id(owner, repo),
                                   fields=fields,
                                   rank=rank)
        index = search.Index('repo')
        index.put(document)
Beispiel #30
0
 def handle_get(self, owner, repo):
     self.init_library(owner, repo, create=False)
     if self.library is None:
         logging.warning('Library not found: %s', Library.id(owner, repo))
         return
     if self.library.spdx_identifier is None:
         # Can't update a library if it's not licensed correctly.
         return
     self.update_metadata()
     self.update_versions()
     self.set_ready()
Beispiel #31
0
  def handle_get(self, owner, repo):
    library_key = ndb.Key(Library, Library.id(owner, repo))
    version = Library.default_version_for_key_async(library_key).get_result()
    if version is None:
      return self.error('no versions for %s' % Library.id(owner, repo))

    bower_key = ndb.Key(Library, Library.id(owner, repo), Version, version, Content, 'bower')
    bower_object = bower_key.get()
    bower = {} if bower_object is None else bower_object.get_json()
    version_key = bower_key.parent()
    library = version_key.parent().get()

    self.update_search_index(owner, repo, version_key, library, bower)

    if library.kind == 'collection':
      self.update_collection_dependencies(version_key, bower)

    default_version = Library.default_version_for_key_async(library_key).get_result()
    if default_version is not None and default_version != version:
      return self.retry('default version changed while updating indexes')
Beispiel #32
0
  def handle_get(self, owner, repo):
    library_key = ndb.Key(Library, Library.id(owner, repo))
    version = Library.default_version_for_key_async(library_key).get_result()
    if version is None:
      return self.error('no versions for %s' % Library.id(owner, repo))

    bower_key = ndb.Key(Library, Library.id(owner, repo), Version, version, Content, 'bower')
    bower_object = bower_key.get()
    bower = {} if bower_object is None else json.loads(bower_object.content)
    version_key = bower_key.parent()
    library = version_key.parent().get()

    self.update_search_index(owner, repo, version_key, library, bower)

    if library.kind == 'collection':
      self.update_collection_dependencies(version_key, bower)

    default_version = Library.default_version_for_key_async(library_key).get_result()
    if default_version is not None and default_version != version:
      return self.retry('default version changed while updating indexes')
Beispiel #33
0
  def update_collection_dependencies(self, collection_version_key, bower):
    dependencies = bower.get('dependencies', {})
    for name in dependencies.keys():
      dep = Dependency.from_string(dependencies[name])
      if dep is None:
        continue
      library_key = ndb.Key(Library, Library.id(dep.owner, dep.repo))
      CollectionReference.ensure(library_key, collection_version_key, semver=dep.version)

      task_url = util.ensure_library_task(dep.owner.lower(), dep.repo.lower())
      util.new_task(task_url, target='manage')
Beispiel #34
0
  def test_update_all(self):
    library_key = Library(id='owner/repo').put()
    author_key = Author(id='owner').put()

    response = self.app.get('/manage/update-all', headers={'X-AppEngine-QueueName': 'default'})
    self.assertEqual(response.status_int, 200)

    tasks = self.tasks.get_filtered_tasks()
    self.assertEqual([
        util.update_library_task(library_key.id()),
        util.update_author_task(author_key.id()),
    ], [task.url for task in tasks])
Beispiel #35
0
  def test_update_all(self):
    library_key = Library(id='owner/repo').put()
    author_key = Author(id='owner').put()

    response = self.app.get('/manage/update-all', headers={'X-AppEngine-QueueName': 'default'})
    self.assertEqual(response.status_int, 200)

    tasks = self.tasks.get_filtered_tasks()
    self.assertEqual([
        util.update_library_task(library_key.id()),
        util.update_author_task(author_key.id()),
    ], [task.url for task in tasks])
Beispiel #36
0
    def handle_get(self, owner, repo):
        self.init_library(owner, repo)
        if self.library is None:
            self.response.set_status(404)
            self.response.write('could not find library: %s' %
                                Library.id(owner, repo))
            return

        versions = Version.query(Version.status == Status.ready,
                                 ancestor=self.library.key).fetch()
        for version in versions:
            self.trigger_analysis(version.key.id(),
                                  version.sha,
                                  transactional=False)
Beispiel #37
0
  def get(self, owner, repo, ver=None):
    self.response.headers['Access-Control-Allow-Origin'] = '*'

    owner = owner.lower()
    repo = repo.lower()
    library_key = ndb.Key(Library, Library.id(owner, repo))
    result = yield LibraryMetadata.full_async(library_key, ver)
    if result is None:
      self.response.set_status(404)
    else:
      self.response.headers['Content-Type'] = 'application/json'
      if result['status'] != Status.ready:
        self.response.set_status(400)
      self.response.write(json.dumps(result))
Beispiel #38
0
  def handle_post(self):
    message_json = json.loads(urllib.unquote(self.request.body).rstrip('='))
    message = message_json['message']
    data = base64.b64decode(str(message['data']))
    attributes = message['attributes']
    owner = attributes['owner']
    repo = attributes['repo']
    version = attributes['version']
    error = attributes.get('error', None)

    version_key = ndb.Key(Library, Library.id(owner, repo), Version, version)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None:
      return
    if data == '':
      content.content = None
    elif len(data) > 500000:
      # Max entity size is only 1MB.
      logging.error('content was too large: %d %s %s', len(data), Library.id(owner, repo), version)
      error = 'content was too large: %d' % len(data)
    else:
      content.content = data

    if error is None:
      content.status = Status.ready
      content.error = None
    else:
      content.status = Status.error
      content.error = error

    content.put()

    if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result():
      task_url = util.update_indexes_task(owner, repo)
      util.new_task(task_url, target='manage')
Beispiel #39
0
  def post(self):
    if self.request.headers.get('X-Github-Event') != 'pull_request':
      self.response.set_status(202) # Accepted
      self.response.write('Payload was not for a pull_request, aborting.')
      return

    payload = json.loads(self.request.body)
    if payload['action'] != 'opened' and payload['action'] != 'synchronize':
      self.response.set_status(202) # Accepted
      self.response.write('Payload was not opened or synchronize, aborting.')
      return

    # Original repo
    origin_owner = payload['repository']['owner']['login']
    origin_repo = payload['repository']['name']
    origin_full_name = payload['repository']['full_name']
    # Repo where the pull request came from.
    pull_owner = payload['pull_request']['head']['repo']['owner']['login']
    pull_repo = payload['pull_request']['head']['repo']['name']

    key = ndb.Key(Library, Library.id(origin_owner, origin_repo))
    library = key.get(read_policy=ndb.EVENTUAL_CONSISTENCY)

    if library is None:
      logging.error('No library object found for %s', origin_full_name)
      self.response.set_status(400) # Bad request
      self.response.write('It does not seem like this repository was registered')
      return

    sha = payload['pull_request']['head']['sha']
    parsed_url = urlparse(self.request.url)
    params = {
        'state': 'success',
        'target_url': '%s://%s/preview/%s/%s/%s' % (parsed_url.scheme, parsed_url.netloc, pull_owner, pull_repo, sha),
        'description': 'Preview is ready!', # TODO: Don't lie
        'context': 'webcomponents/preview'
    }

    response = util.github_post('repos', origin_owner, origin_repo, 'statuses/%s' % sha, params, library.github_access_token)
    if response.status_code != 201:
      logging.error('Failed to set status on Github PR. Github returned %s:%s', response.status_code, response.content)
      self.response.set_status(500)
      self.response.write('Failed to set status on PR.')
      return

    pull_request_url = payload['pull_request']['url']
    util.new_task(util.ingest_preview_task(pull_owner, pull_repo), params={'commit': sha, 'url': pull_request_url}, target='manage')
Beispiel #40
0
  def handle_get(self, scope, package, latest=False):
    self.init_library(scope, package)
    if self.library is None:
      self.response.set_status(404)
      self.response.write('could not find library: %s' % Library.id(scope, package))
      return

    if latest:
      version_id = Library.default_version_for_key_async(self.library.key).get_result()
      if version_id:
        version = Version.get_by_id(version_id, parent=self.library.key)
        if version is not None:
          self.trigger_analysis(version_id, version.sha, transactional=False)
    else:
      versions = Version.query(Version.status == Status.ready, ancestor=self.library.key).fetch()
      for version in versions:
        self.trigger_analysis(version.key.id(), version.sha, transactional=False)
Beispiel #41
0
  def get(self, owner, repo, ver, path):
    self.response.headers['Access-Control-Allow-Origin'] = '*'

    version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver)

    if version_key is None:
      self.response.set_status(404)
      self.response.write('Invalid repo/version')
      return

    page = Content.get_by_id('page-' + path, parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY)

    if page is None:
      self.response.set_status(404)
      self.response.write('Cannot find page %s' % path)
      return

    self.response.write(page.content)
Beispiel #42
0
  def update_search_index(self, owner, repo, version_key, library, bower):
    metadata = json.loads(library.metadata)
    fields = [
        search.AtomField(name='owner', value=owner),
        search.TextField(name='repo', value=repo),
        search.AtomField(name='kind', value=library.kind),
        search.AtomField(name='version', value=version_key.id()),
        search.TextField(name='github_description', value=metadata.get('description', '')),
        search.TextField(name='bower_description', value=bower.get('description', '')),
        search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))),
        search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list(
            util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) +
            util.safe_split_strip(repo)))),
    ]

    # Generate weighting field
    weights = [(repo, 10)]

    analysis = Content.get_by_id('analysis', parent=version_key)
    if analysis is not None and analysis.status == Status.ready:
      analysis = json.loads(analysis.content)
      elements = analysis.get('elementsByTagName', {}).keys()
      if elements != []:
        fields.append(search.TextField(name='element', value=' '.join(elements)))
        weights.append((' '.join(elements), 5))
      behaviors = analysis.get('behaviorsByName', {}).keys()
      if behaviors != []:
        fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
        weights.append((' '.join(behaviors), 5))

    weighted = []
    for value, weight in weights:
      for _ in range(0, weight):
        weighted.append(value)
    fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted)))

    rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds())
    document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank)
    index = search.Index('repo')
    index.put(document)
Beispiel #43
0
    def update_search_index(self, owner, repo, version_key, library, bower):
        metadata = json.loads(library.metadata)
        fields = [
            search.AtomField(name='owner', value=owner),
            search.TextField(name='repo', value=repo),
            search.AtomField(name='kind', value=library.kind),
            search.AtomField(name='version', value=version_key.id()),
            search.TextField(name='github_description',
                             value=metadata.get('description', '')),
            search.TextField(name='bower_description',
                             value=bower.get('description', '')),
            search.TextField(name='bower_keywords',
                             value=' '.join(bower.get('keywords', []))),
            search.TextField(
                name='prefix_matches',
                value=' '.join(
                    util.generate_prefixes_from_list(
                        [repo] + util.safesplit(metadata.get('description')) +
                        util.safesplit(bower.get('description')) +
                        repo.replace("_", " ").replace("-", " ").split()))),
        ]

        analysis = Content.get_by_id('analysis', parent=version_key)
        if analysis is not None and analysis.status == Status.ready:
            analysis = json.loads(analysis.content)
            elements = analysis.get('elementsByTagName', {}).keys()
            if elements != []:
                fields.append(
                    search.TextField(name='element', value=' '.join(elements)))
            behaviors = analysis.get('behaviorsByName', {}).keys()
            if behaviors != []:
                fields.append(
                    search.TextField(name='behavior',
                                     value=' '.join(behaviors)))

        document = search.Document(doc_id=Library.id(owner, repo),
                                   fields=fields)
        index = search.Index('repo')
        index.put(document)
Beispiel #44
0
  def handle_post(self):
    # Ignore payloads larger than 5 MB.
    if len(self.request.body) > 1048487 * 5:
      return
    message_json = json.loads(urllib.unquote(self.request.body).rstrip('='))
    message = message_json['message']
    data = base64.b64decode(str(message['data']))
    attributes = message['attributes']
    if len(attributes) == 0:
      logging.error(message)
      return
    owner = attributes['owner']
    repo = attributes['repo']
    version = attributes['version']
    error = attributes.get('error', None)

    version_key = ndb.Key(Library, Library.id(owner, repo), Version, version)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None:
      return
    if data == '':
      content.set_json(None)
    else:
      content.set_json(json.loads(data))

    if error is None:
      content.status = Status.ready
      content.error = None
    else:
      content.status = Status.error
      content.error = error

    content.put()

    if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result():
      task_url = util.update_indexes_task(owner, repo)
      util.new_task(task_url, target='manage')
Beispiel #45
0
 def handle_get(self, owner, repo):
   self.response.headers['Content-Type'] = 'text/plain'
   delete_library(ndb.Key(Library, Library.id(owner, repo).lower()), response_for_logging=self.response)
Beispiel #46
0
 def handle_get(self, owner, repo):
   library = Library.get_by_id(Library.id(owner, repo))
   if library is None or library.shallow_ingestion:
     task_url = util.ingest_library_task(owner, repo)
     util.new_task(task_url, target='manage')
Beispiel #47
0
 def handle_get(self, owner, repo):
     self.response.headers['Content-Type'] = 'text/plain'
     delete_library(ndb.Key(Library,
                            Library.id(owner, repo).lower()),
                    response_for_logging=self.response)
Beispiel #48
0
  def update_search_index(self, owner, repo, version_key, library, bower):
    metadata = json.loads(library.metadata)
    registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None
    npm_description = registry_metadata.get('description', '') if registry_metadata else ''
    npm_keywords = registry_metadata.get('keywords', []) if registry_metadata else []
    fields = [
        search.AtomField(name='owner', value=owner),
        search.AtomField(name='github_owner', value=library.github_owner),
        search.TextField(name='repo', value=repo),
        search.AtomField(name='kind', value=library.kind),
        search.AtomField(name='version', value=version_key.id()),
        search.TextField(name='github_description', value=metadata.get('description', '')),
        search.TextField(name='bower_description', value=bower.get('description', '')),
        search.TextField(name='npm_description', value=npm_description),
        search.TextField(name='bower_keywords', value=' '.join(bower.get('keywords', []))),
        search.TextField(name='npm_keywords', value=' '.join(npm_keywords)),
        search.TextField(name='prefix_matches', value=' '.join(util.generate_prefixes_from_list(
            util.safe_split_strip(metadata.get('description')) + util.safe_split_strip(bower.get('description')) +
            util.safe_split_strip(repo)))),
    ]

    # Generate weighting field
    weights = [(repo, 10)]

    analysis = Content.get_by_id('analysis', parent=version_key)
    if analysis is not None and analysis.status == Status.ready:
      data = analysis.get_json()
      if data.get('analyzerData', None) is not None:
        # Use analyzer data for search index
        element_objects = data.get('analyzerData', {}).get('elements', [])
        elements = [element.get('tagname', '') or element.get('classname', '') for element in element_objects]
        if elements != []:
          fields.append(search.TextField(name='element', value=' '.join(elements)))
          weights.append((' '.join(elements), 5))

        behavior_objects = data.get('analyzerData', {}).get('metadata', {}).get('polymer', {}).get('behaviors', [])
        behaviors = [behavior.get('name', '') for behavior in behavior_objects]
        if behaviors != []:
          fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
          weights.append((' '.join(behaviors), 5))
      else:
        # Use hydrolysis data for search index
        elements = data.get('elementsByTagName', {}).keys()
        if elements != []:
          fields.append(search.TextField(name='element', value=' '.join(elements)))
          weights.append((' '.join(elements), 5))
        behaviors = data.get('behaviorsByName', {}).keys()
        if behaviors != []:
          fields.append(search.TextField(name='behavior', value=' '.join(behaviors)))
          weights.append((' '.join(behaviors), 5))

    weighted = []
    for value, weight in weights:
      for _ in range(0, weight):
        weighted.append(value)
    fields.append(search.TextField(name='weighted_fields', value=' '.join(weighted)))

    rank = int((library.updated - datetime.datetime(2016, 1, 1)).total_seconds())
    document = search.Document(doc_id=Library.id(owner, repo), fields=fields, rank=rank)
    index = search.Index('repo')
    index.put(document)
Beispiel #49
0
  def update_metadata(self):
    # Query NPM registry API for packages
    is_npm_package = self.scope.startswith('@')
    if is_npm_package:
      self.update_registry_info()
    else:
      self.owner = self.scope
      self.repo = self.package

    # Fetch GitHub metadata
    headers = {'Accept': 'application/vnd.github.drax-preview+json'}
    response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers)
    if response.status_code == 200:
      try:
        metadata = json.loads(response.content)
      except ValueError:
        return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata)

      self.owner = metadata.get('owner', {}).get('login', '').lower()
      self.repo = metadata.get('name', '').lower()

      # Deleting is only necessary if Library entity is a GitHub repo
      if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope):
        logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo))
        delete_library(self.library.key)
        task_url = util.ensure_library_task(self.owner, self.repo)
        util.new_task(task_url, target='manage')
        raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo))

      # If adding a NPM package that a Bower repo already points to, remove the bower one.
      bower_library_id = Library.id(self.owner, self.repo)
      if is_npm_package and bower_library_id is not None:
        logging.info('removing bower repo %s', Library.id(self.owner, self.repo))
        task_url = util.suppress_library_task(self.owner, self.repo)
        util.new_task(task_url, target='manage')

      self.library.github_owner = self.owner
      self.library.github_repo = self.repo

      self.library.metadata = response.content
      self.library.metadata_etag = response.headers.get('ETag', None)
      self.library.metadata_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 404:
      logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo))
      delete_library(self.library.key)
      raise RequestAborted('repo no longer exists')
    elif response.status_code != 304:
      return self.retry('could not update repo metadata (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors)
      self.library.contributors = response.content
      self.library.contributors_etag = response.headers.get('ETag', None)
      self.library.contributors_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code != 304:
      return self.retry('could not update contributors (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats)
      self.library.participation = response.content
      self.library.participation_etag = response.headers.get('ETag', None)
      self.library.participation_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 202:
      # GitHub is "computing" the data. We'll try again next update cycle.
      # TODO: Alternatively we could retry this task
      pass
    elif response.status_code != 304:
      return self.retry('could not update stats/participation (%d)' % response.status_code)
Beispiel #50
0
    def update_metadata(self):
        headers = {'Accept': 'application/vnd.github.drax-preview+json'}
        response = util.github_get('repos',
                                   self.owner,
                                   self.repo,
                                   etag=self.library.metadata_etag,
                                   headers=headers)
        if response.status_code == 200:
            try:
                metadata = json.loads(response.content)
            except ValueError:
                return self.error("could not parse metadata",
                                  ErrorCodes.Library_parse_metadata)

            repo = metadata.get('name', '').lower()
            owner = metadata.get('owner', {}).get('login', '').lower()
            if repo != '' and owner != '' and (repo != self.repo
                                               or owner != self.owner):
                logging.info('deleting renamed repo %s',
                             Library.id(self.owner, self.repo))
                delete_library(self.library.key)
                task_url = util.ensure_library_task(owner, repo)
                util.new_task(task_url, target='manage')
                raise RequestAborted('repo has been renamed to %s',
                                     Library.id(owner, repo))

            self.library.metadata = response.content
            self.library.metadata_etag = response.headers.get('ETag', None)
            self.library.metadata_updated = datetime.datetime.now()
            self.library_dirty = True
        elif response.status_code == 404:
            logging.info('deleting non-existing repo %s',
                         Library.id(self.owner, self.repo))
            delete_library(self.library.key)
            raise RequestAborted('repo no longer exists')
        elif response.status_code != 304:
            return self.retry('could not update repo metadata (%d)' %
                              response.status_code)

        response = util.github_get('repos',
                                   self.owner,
                                   self.repo,
                                   'contributors',
                                   etag=self.library.contributors_etag)
        if response.status_code == 200:
            try:
                json.loads(response.content)
            except ValueError:
                return self.error("could not parse contributors",
                                  ErrorCodes.Library_parse_contributors)
            self.library.contributors = response.content
            self.library.contributors_etag = response.headers.get('ETag', None)
            self.library.contributors_updated = datetime.datetime.now()
            self.library_dirty = True
        elif response.status_code != 304:
            return self.retry('could not update contributors (%d)' %
                              response.status_code)

        response = util.github_get('repos',
                                   self.owner,
                                   self.repo,
                                   'stats/participation ',
                                   etag=self.library.participation_etag)
        if response.status_code == 200:
            try:
                json.loads(response.content)
            except ValueError:
                return self.error("could not parse stats/participation",
                                  ErrorCodes.Library_parse_stats)
            self.library.participation = response.content
            self.library.participation_etag = response.headers.get(
                'ETag', None)
            self.library.participation_updated = datetime.datetime.now()
            self.library_dirty = True
        elif response.status_code == 202:
            # GitHub is "computing" the data. We'll try again next update cycle.
            # TODO: Alternatively we could retry this task
            pass
        elif response.status_code != 304:
            return self.retry('could not update stats/participation (%d)' %
                              response.status_code)
Beispiel #51
0
 def handle_get(self, owner, repo):
     library = Library.get_by_id(Library.id(owner, repo))
     if library is None or library.shallow_ingestion:
         task_url = util.ingest_library_task(owner, repo)
         util.new_task(task_url, target='manage')
Beispiel #52
0
  def update_metadata(self):
    # Query NPM registry API for packages
    is_npm_package = self.scope.startswith('@')
    if is_npm_package:
      self.update_registry_info()
    else:
      self.owner = self.scope
      self.repo = self.package

    # Fetch GitHub metadata
    headers = {'Accept': 'application/vnd.github.drax-preview+json'}
    response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers)
    if response.status_code == 200:
      try:
        metadata = json.loads(response.content)
      except ValueError:
        return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata)

      self.owner = metadata.get('owner', {}).get('login', '').lower()
      self.repo = metadata.get('name', '').lower()

      # Deleting is only necessary if Library entity is a GitHub repo
      if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope):
        logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo))
        delete_library(self.library.key)
        task_url = util.ensure_library_task(self.owner, self.repo)
        util.new_task(task_url, target='manage')
        raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo))

      # If adding a NPM package that a Bower repo already points to, remove the bower one.
      bower_library_id = Library.id(self.owner, self.repo)
      if is_npm_package and bower_library_id is not None:
        task_url = util.migrate_library_task(self.owner, self.repo, self.scope, self.package)
        util.new_task(task_url, target='manage')

      self.library.github_owner = self.owner
      self.library.github_repo = self.repo

      self.library.metadata = response.content
      self.library.metadata_etag = response.headers.get('ETag', None)
      self.library.metadata_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 404:
      logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo))
      delete_library(self.library.key)
      raise RequestAborted('repo no longer exists')
    elif response.status_code != 304:
      return self.retry('could not update repo metadata (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors)
      self.library.contributors = response.content
      self.library.contributors_etag = response.headers.get('ETag', None)
      self.library.contributors_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code != 304:
      return self.retry('could not update contributors (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats)
      self.library.participation = response.content
      self.library.participation_etag = response.headers.get('ETag', None)
      self.library.participation_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 202:
      # GitHub is "computing" the data. We'll try again next update cycle.
      # TODO: Alternatively we could retry this task
      pass
    elif response.status_code != 304:
      return self.retry('could not update stats/participation (%d)' % response.status_code)
Beispiel #53
0
 def handle_get(self, scope, package):
   self.response.headers['Content-Type'] = 'text/plain'
   delete_library(ndb.Key(Library, Library.id(scope, package).lower()), response_for_logging=self.response)