Ejemplo n.º 1
0
    def handle_get(self):
        queue = taskqueue.Queue('update')
        if queue.fetch_statistics().tasks > 0:
            self.response.write('update already in progress')
            return

        query = Library.query()
        cursor = None
        more = True
        task_count = 0
        while more:
            keys, cursor, more = query.fetch_page(50,
                                                  keys_only=True,
                                                  start_cursor=cursor)
            for key in keys:
                task_count = task_count + 1
                task_url = util.update_library_task(key.id())
                util.new_task(task_url, target='manage', queue_name='update')

        logging.info('triggered %d library updates', task_count)

        query = Author.query()
        cursor = None
        more = True
        task_count = 0
        while more:
            keys, cursor, more = query.fetch_page(50,
                                                  keys_only=True,
                                                  start_cursor=cursor)
            for key in keys:
                task_count = task_count + 1
                task_url = util.update_author_task(key.id())
                util.new_task(task_url, target='manage', queue_name='update')

        logging.info('triggered %d author updates', task_count)
Ejemplo n.º 2
0
 def handle_get(self, scope, package, version):
   # FIXME: Make deletion transactional with check on library that tag is excluded.
   version_key = ndb.Key(Library, Library.id(scope, package), Version, version)
   ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True))
   if VersionCache.update(version_key.parent()):
     task_url = util.update_indexes_task(scope, package)
     util.new_task(task_url, target='manage')
Ejemplo n.º 3
0
  def handle_get(self):
    queue = taskqueue.Queue('update')
    if queue.fetch_statistics().tasks > 0:
      self.response.write('update already in progress')
      return

    query = Library.query()
    cursor = None
    more = True
    task_count = 0
    while more:
      keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor)
      for key in keys:
        task_count = task_count + 1
        task_url = util.update_library_task(key.id())
        util.new_task(task_url, target='manage', queue_name='update')

    logging.info('triggered %d library updates', task_count)

    query = Author.query()
    cursor = None
    more = True
    task_count = 0
    while more:
      keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor)
      for key in keys:
        task_count = task_count + 1
        task_url = util.update_author_task(key.id())
        util.new_task(task_url, target='manage', queue_name='update')

    logging.info('triggered %d author updates', task_count)
Ejemplo n.º 4
0
  def post(self):
    full_name = self.request.get('repo').lower()
    split = full_name.split('/')
    if len(split) != 2:
      self.response.set_status(400)
      self.response.write('Bad request, not repo')
      return
    owner = split[0]
    repo = split[1]

    access_token = exchange_token(self)
    if access_token is None:
      return

    # Validate access token against repo
    repos_response = util.github_get('repos/%s' % full_name, access_token=access_token)
    if repos_response.status_code != 200:
      self.response.set_status(401)
      self.response.write('Cannot access repo')
      return

    info = json.loads(repos_response.content)
    has_access = info['permissions']['admin']

    if not has_access:
      self.response.set_status(401)
      self.response.write('Do not have access to the repo')
      return

    parsed_url = urlparse(self.request.url)
    params = {'name': 'web', 'events': ['pull_request']}
    params['config'] = {
        'url': '%s://%s/api/preview-event' % (parsed_url.scheme, parsed_url.netloc),
        'content_type': 'json',
    }

    # Check if the webhook exists
    list_webhooks_response = util.github_post('repos', owner, repo, 'hooks', access_token=access_token)
    if list_webhooks_response.status_code != 200:
      logging.error('Unable to query existing webhooks, continuing anyway. Github %s: %s',
                    list_webhooks_response.status_code, list_webhooks_response.content)
    else:
      webhooks = json.loads(list_webhooks_response.content)
      for webhook in webhooks:
        if webhook['active'] and webhook['config'] == params['config']:
          self.response.write('Webhook is already configured')
          return

    # Create the webhook
    create_webhook_response = util.github_post('repos', owner, repo, 'hooks', params, access_token)
    if create_webhook_response.status_code != 201:
      self.response.set_status(500)
      self.response.write('Failed to create webhook.')
      logging.error('Failed to create webhook. Github %s: %s',
                    create_webhook_response.status_code, create_webhook_response.content)
      return

    # Trigger shallow ingestion of the library so we can store the access token.
    util.new_task(util.ingest_webhook_task(owner, repo), params={'access_token': access_token}, target='manage')
    self.response.write('Created webhook')
Ejemplo n.º 5
0
    def handle_post(self):
        message_json = json.loads(
            urllib.unquote(self.request.body).rstrip('='))
        message = message_json['message']
        data = base64.b64decode(str(message['data']))
        attributes = message['attributes']
        owner = attributes['owner']
        repo = attributes['repo']
        version = attributes['version']
        error = attributes.get('error', None)

        version_key = ndb.Key(Library, Library.id(owner, repo), Version,
                              version)

        content = Content.get_by_id('analysis', parent=version_key)
        if content is None:
            return
        if data == '':
            content.set_json(None)
        else:
            content.set_json(json.loads(data))

        if error is None:
            content.status = Status.ready
            content.error = None
        else:
            content.status = Status.error
            content.error = error

        content.put()

        if version_key.id() == Library.default_version_for_key_async(
                version_key.parent()).get_result():
            task_url = util.update_indexes_task(owner, repo)
            util.new_task(task_url, target='manage')
Ejemplo n.º 6
0
 def handle_get(self, owner, repo, version):
   # FIXME: Make deletion transactional with check on library that tag is excluded.
   version_key = ndb.Key(Library, Library.id(owner, repo), Version, version)
   ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True))
   if VersionCache.update(version_key.parent()):
     task_url = util.update_indexes_task(owner, repo)
     util.new_task(task_url, target='manage')
Ejemplo n.º 7
0
  def update_metadata(self):
    headers = {'Accept': 'application/vnd.github.drax-preview+json'}
    response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers)
    if response.status_code == 200:
      try:
        metadata = json.loads(response.content)
      except ValueError:
        return self.error("could not parse metadata")

      repo = metadata.get('name', '').lower()
      owner = metadata.get('owner', {}).get('login', '').lower()
      if repo != '' and owner != '' and (repo != self.repo or owner != self.owner):
        logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo))
        delete_library(self.library.key)
        task_url = util.ensure_library_task(owner, repo)
        util.new_task(task_url, target='manage')
        raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo))

      self.library.metadata = response.content
      self.library.metadata_etag = response.headers.get('ETag', None)
      self.library.metadata_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 404:
      logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo))
      delete_library(self.library.key)
      raise RequestAborted('repo no longer exists')
    elif response.status_code != 304:
      return self.retry('could not update repo metadata (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse contributors")
      self.library.contributors = response.content
      self.library.contributors_etag = response.headers.get('ETag', None)
      self.library.contributors_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code != 304:
      return self.retry('could not update contributors (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse stats/participation")
      self.library.participation = response.content
      self.library.participation_etag = response.headers.get('ETag', None)
      self.library.participation_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 202:
      # GitHub is "computing" the data. We'll try again next update cycle.
      # TODO: Alternatively we could retry this task
      pass
    elif response.status_code != 304:
      return self.retry('could not update stats/participation (%d)' % response.status_code)
Ejemplo n.º 8
0
 def trigger_analysis(self, tag, sha, transactional=False):
     analysis_sha = None
     if self.library.kind == 'collection':
         analysis_sha = sha
     version_key = ndb.Key(Library, self.library.key.id(), Version, tag)
     Content(id='analysis', parent=version_key, status=Status.pending).put()
     task_url = util.ingest_analysis_task(self.owner, self.repo, tag,
                                          analysis_sha)
     util.new_task(task_url, target='analysis', transactional=transactional)
Ejemplo n.º 9
0
  def update_collection_dependencies(self, collection_version_key, bower):
    dependencies = bower.get('dependencies', {})
    for name in dependencies.keys():
      dep = Dependency.from_string(dependencies[name])
      if dep is None:
        continue
      library_key = ndb.Key(Library, Library.id(dep.owner, dep.repo))
      CollectionReference.ensure(library_key, collection_version_key, semver=dep.version)

      task_url = util.ensure_library_task(dep.owner.lower(), dep.repo.lower())
      util.new_task(task_url, target='manage')
Ejemplo n.º 10
0
  def update_collection_dependencies(self, collection_version_key, bower):
    dependencies = bower.get('dependencies', {})
    for name in dependencies.keys():
      dep = Dependency.from_string(dependencies[name])
      if dep is None:
        continue
      library_key = ndb.Key(Library, Library.id(dep.owner, dep.repo))
      CollectionReference.ensure(library_key, collection_version_key, semver=dep.version)

      task_url = util.ensure_library_task(dep.owner.lower(), dep.repo.lower())
      util.new_task(task_url, target='manage')
Ejemplo n.º 11
0
    def post(self):
        if not validate_captcha(self):
            return

        url = self.request.get('url')
        match = re.match(r'https://github.com/(.*?)/([^/]*)(.*)', url)
        if match is None:
            self.response.set_status(400)
            self.response.write('Unable to understand url (%s)' % url)

        owner = match.group(1)
        repo = match.group(2)
        tail = match.group(3)

        # SHA already defined
        match = re.match(r'.*commits?/(.*)', tail)
        if match:
            self.response.headers['Access-Control-Allow-Origin'] = '*'
            self.response.headers['Content-Type'] = 'application/json'
            self.response.write('%s/%s/%s' % (owner, repo, match.group(1)))
            util.new_task(util.ingest_preview_task(owner, repo),
                          params={
                              'commit': match.group(1),
                              'url': url
                          },
                          target='manage')
            return

        # Resolve SHA using these patterns and Github API
        tail = re.sub(r'/pull/(.*)', r'pull/\1/head', tail)
        tail = re.sub(r'/tree/(.*)', r'heads/\1', tail)
        tail = re.sub(r'^$', r'heads/master', tail)

        if not tail:
            self.response.set_status(400)
            self.response.write('Unable to understand url (%s)' % url)

        response = util.github_get('repos', owner, repo, 'git/refs/' + tail)

        if response.status_code == 404:
            self.response.set_status(400)
            self.response.write('Error resolving url (%s)' % url)

        sha = json.loads(response.content)['object']['sha']
        util.new_task(util.ingest_preview_task(owner, repo),
                      params={
                          'commit': sha,
                          'url': url
                      },
                      target='manage')
        self.response.headers['Access-Control-Allow-Origin'] = '*'
        self.response.headers['Content-Type'] = 'application/json'
        self.response.write('%s/%s/%s' % (owner, repo, sha))
Ejemplo n.º 12
0
  def trigger_version_ingestion(self, tag, sha, url=None, preview=False):
    version_object = Version.get_by_id(tag, parent=self.library.key)
    if version_object is not None and (version_object.status == Status.ready or version_object.status == Status.pending):
      # Version object is already up to date or pending
      return False

    Version(id=tag, parent=self.library.key, sha=sha, url=url, preview=preview).put()

    task_url = util.ingest_version_task(self.owner, self.repo, tag)
    util.new_task(task_url, target='manage', transactional=True)
    self.trigger_analysis(tag, sha, transactional=True)
    return True
Ejemplo n.º 13
0
  def trigger_analysis(self, tag, sha, transactional=False):
    analysis_sha = None
    if self.library.kind == 'collection':
      analysis_sha = sha
    version_key = ndb.Key(Library, self.library.key.id(), Version, tag)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None or content.status == Status.error:
      Content(id='analysis', parent=version_key, status=Status.pending).put()

    task_url = util.ingest_analysis_task(self.owner, self.repo, tag, analysis_sha)
    util.new_task(task_url, target='analysis', transactional=transactional, queue_name='analysis')
Ejemplo n.º 14
0
  def trigger_analysis(self, tag, sha, transactional=False):
    analysis_sha = None
    if self.library.kind == 'collection':
      analysis_sha = sha
    version_key = ndb.Key(Library, self.library.key.id(), Version, tag)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None or content.status == Status.error:
      Content(id='analysis', parent=version_key, status=Status.pending).put()

    task_url = util.ingest_analysis_task(self.scope, self.package, tag, analysis_sha)
    util.new_task(task_url, target='analysis', transactional=transactional, queue_name='analysis')
Ejemplo n.º 15
0
  def trigger_version_ingestion(self, tag, sha, url=None, preview=False):
    version_object = Version.get_by_id(tag, parent=self.library.key)
    if version_object is not None and (version_object.status == Status.ready or version_object.status == Status.pending):
      # Version object is already up to date or pending
      return False

    Version(id=tag, parent=self.library.key, sha=sha, url=url, preview=preview).put()

    task_url = util.ingest_version_task(self.scope, self.package, tag)
    util.new_task(task_url, target='manage', transactional=True)
    self.trigger_analysis(tag, sha, transactional=True)
    return True
Ejemplo n.º 16
0
  def handle_get(self):
    query = Library.query()
    cursor = None
    more = True
    task_count = 0
    while more:
      keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor)
      for key in keys:
        task_count = task_count + 1
        owner, repo = key.id().split('/', 1)
        task_url = util.update_indexes_task(owner, repo)
        util.new_task(task_url, target='manage')

    logging.info('triggered %d index updates', task_count)
Ejemplo n.º 17
0
  def handle_get(self):
    query = Library.query()
    cursor = None
    more = True
    task_count = 0
    while more:
      keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor)
      for key in keys:
        task_count = task_count + 1
        owner, repo = key.id().split('/', 1)
        task_url = util.update_indexes_task(owner, repo)
        util.new_task(task_url, target='manage')

    logging.info('triggered %d index updates', task_count)
Ejemplo n.º 18
0
    def error(error_string):
      logging.info('ingestion error "%s" for %s/%s/%s', error_string, owner, repo, version)
      ver = key.get()
      ver.error = error_string
      ver.put()
      if generate_search:
        library = key.parent().get()
        versions = json.loads(library.tags)
        idx = versions.index(version)
        if idx > 0:
          logging.info('ingestion for %s/%s falling back to version %s', owner, repo, versions[idx - 1])
          task_url = util.ingest_version_task(owner, repo, versions[idx - 1])
          util.new_task(task_url, {'latestVersion':'True'})

      self.response.set_status(200)
Ejemplo n.º 19
0
  def handle_get(self):
    latest = self.request.get('latest', None) is not None
    query = Library.query()
    cursor = None
    more = True
    task_count = 0
    while more:
      keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor)
      for key in keys:
        task_count = task_count + 1
        owner, repo = key.id().split('/', 1)
        task_url = util.analyze_library_task(owner, repo, latest)
        util.new_task(task_url, target='manage')

    logging.info('triggered %d analyses', task_count)
Ejemplo n.º 20
0
  def handle_get(self):
    latest = self.request.get('latest', None) is not None
    query = Library.query()
    cursor = None
    more = True
    task_count = 0
    while more:
      keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor)
      for key in keys:
        task_count = task_count + 1
        owner, repo = key.id().split('/', 1)
        task_url = util.analyze_library_task(owner, repo, latest)
        util.new_task(task_url, target='manage')

    logging.info('triggered %d analyses', task_count)
Ejemplo n.º 21
0
  def get(self, owner, repo, kind):
    commit = self.request.get('commit', None)
    url = self.request.get('url', None)
    assert commit is not None and url is not None
    self.init_library(owner, repo, kind)
    is_new = self.library.metadata is None and self.library.error is None
    if is_new:
      self.library.ingest_versions = False
      self.library_dirty = True
      self.update_metadata()

    version = Version(parent=self.library.key, id=commit, sha=commit, url=url)
    version.put()
    task_url = util.ingest_version_task(owner, repo, commit)
    util.new_task(task_url)
    self.commit()
Ejemplo n.º 22
0
  def post(self):
    if self.request.headers.get('X-Github-Event') != 'pull_request':
      self.response.set_status(202) # Accepted
      self.response.write('Payload was not for a pull_request, aborting.')
      return

    payload = json.loads(self.request.body)
    if payload['action'] != 'opened' and payload['action'] != 'synchronize':
      self.response.set_status(202) # Accepted
      self.response.write('Payload was not opened or synchronize, aborting.')
      return

    # Original repo
    origin_owner = payload['repository']['owner']['login']
    origin_repo = payload['repository']['name']
    origin_full_name = payload['repository']['full_name']
    # Repo where the pull request came from.
    pull_owner = payload['pull_request']['head']['repo']['owner']['login']
    pull_repo = payload['pull_request']['head']['repo']['name']

    key = ndb.Key(Library, Library.id(origin_owner, origin_repo))
    library = key.get(read_policy=ndb.EVENTUAL_CONSISTENCY)

    if library is None:
      logging.error('No library object found for %s', origin_full_name)
      self.response.set_status(400) # Bad request
      self.response.write('It does not seem like this repository was registered')
      return

    sha = payload['pull_request']['head']['sha']
    parsed_url = urlparse(self.request.url)
    params = {
        'state': 'success',
        'target_url': '%s://%s/preview/%s/%s/%s' % (parsed_url.scheme, parsed_url.netloc, pull_owner, pull_repo, sha),
        'description': 'Preview is ready!', # TODO: Don't lie
        'context': 'webcomponents/preview'
    }

    response = util.github_post('repos', origin_owner, origin_repo, 'statuses/%s' % sha, params, library.github_access_token)
    if response.status_code != 201:
      logging.error('Failed to set status on Github PR. Github returned %s:%s', response.status_code, response.content)
      self.response.set_status(500)
      self.response.write('Failed to set status on PR.')
      return

    pull_request_url = payload['pull_request']['url']
    util.new_task(util.ingest_preview_task(pull_owner, pull_repo), params={'commit': sha, 'url': pull_request_url}, target='manage')
Ejemplo n.º 23
0
  def post(self, library):
    if not validate_captcha(self):
      return

    split = library.split('/')
    if len(split) is 2:
      scope = split[0]
      package = split[1]
    elif len(split) is 1:
      scope = '@@npm'
      package = library
    else:
      self.response.set_status(400)
      self.response.write('Invalid name')
      return

    # TODO: validate valid repo and return result
    task_url = util.ingest_library_task(scope, package)
    util.new_task(task_url, target='manage')
Ejemplo n.º 24
0
  def get(self, owner, repo, version):
    logging.info('ingesting version %s/%s/%s', owner, repo, version)
    key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, version, Content, 'bower')
    bower = json.loads(key.get().content)
    ver = key.parent().get()
    dependencies = bower.get('dependencies', {})
    library_keys = []
    dep_list = []
    for name in dependencies.keys():
      ver.dependencies.append(dependencies[name])
      dep = Dependency.from_string(dependencies[name])
      dep_list.append(dep)
      library_keys.append(ndb.Key(Library, '%s/%s' % (dep.owner.lower(), dep.repo.lower())))

    libraries = Library.get_or_create_list(library_keys)
    for i, library in enumerate(libraries):
      dep = dep_list[i]
      library.collections.append(CollectionReference(version=key.parent(), semver=dep.version))
      util.new_task('ingest/library', dep.owner.lower(), dep.repo.lower())
    libraries.append(ver)
    ndb.put_multi(libraries)
Ejemplo n.º 25
0
  def ingest_versions(self):
    if not self.library.ingest_versions:
      return

    response = self.github.github_resource('repos', self.owner, self.repo, 'git/refs/tags', etag=self.library.tags_etag)
    if response.status_code != 304:
      if response.status_code != 200:
        return self.error('repo tags not found (%d)' % response.status_code)

      self.library.tags = response.content
      self.library.tags_etag = response.headers.get('ETag', None)
      self.library_dirty = True

      data = json.loads(response.content)
      if not isinstance(data, object):
        data = []
      data = [d for d in data if versiontag.is_valid(d['ref'][10:])]
      if len(data) is 0:
        return self.error('repo contains no valid version tags')
      data.sort(lambda a, b: versiontag.compare(a['ref'][10:], b['ref'][10:]))
      data_refs = [d['ref'][10:] for d in data]
      self.library.tags = json.dumps(data_refs)
      self.library.tags_etag = response.headers.get('ETag', None)
      data.reverse()
      is_newest = True
      for version in data:
        tag = version['ref'][10:]
        if not versiontag.is_valid(tag):
          continue
        sha = version['object']['sha']
        params = {}
        if is_newest:
          params["latestVersion"] = "True"
          is_newest = False
        version_object = Version(parent=self.library.key, id=tag, sha=sha)
        version_object.put()
        task_url = util.ingest_version_task(self.owner, self.repo, tag)
        util.new_task(task_url, params)
        util.publish_analysis_request(self.owner, self.repo, tag)
Ejemplo n.º 26
0
  def handle_post(self):
    # Ignore payloads larger than 5 MB.
    if len(self.request.body) > 1048487 * 5:
      return
    message_json = json.loads(urllib.unquote(self.request.body).rstrip('='))
    message = message_json['message']
    data = base64.b64decode(str(message['data']))
    attributes = message['attributes']
    if len(attributes) == 0:
      logging.error(message)
      return
    owner = attributes['owner']
    repo = attributes['repo']
    version = attributes['version']
    error = attributes.get('error', None)

    version_key = ndb.Key(Library, Library.id(owner, repo), Version, version)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None:
      return
    if data == '':
      content.set_json(None)
    else:
      content.set_json(json.loads(data))

    if error is None:
      content.status = Status.ready
      content.error = None
    else:
      content.status = Status.error
      content.error = error

    content.put()

    if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result():
      task_url = util.update_indexes_task(owner, repo)
      util.new_task(task_url, target='manage')
Ejemplo n.º 27
0
  def handle_post(self):
    message_json = json.loads(urllib.unquote(self.request.body).rstrip('='))
    message = message_json['message']
    data = base64.b64decode(str(message['data']))
    attributes = message['attributes']
    owner = attributes['owner']
    repo = attributes['repo']
    version = attributes['version']
    error = attributes.get('error', None)

    version_key = ndb.Key(Library, Library.id(owner, repo), Version, version)

    content = Content.get_by_id('analysis', parent=version_key)
    if content is None:
      return
    if data == '':
      content.content = None
    elif len(data) > 500000:
      # Max entity size is only 1MB.
      logging.error('content was too large: %d %s %s', len(data), Library.id(owner, repo), version)
      error = 'content was too large: %d' % len(data)
    else:
      content.content = data

    if error is None:
      content.status = Status.ready
      content.error = None
    else:
      content.status = Status.error
      content.error = error

    content.put()

    if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result():
      task_url = util.update_indexes_task(owner, repo)
      util.new_task(task_url, target='manage')
Ejemplo n.º 28
0
 def trigger_author_ingestion(self):
   if self.library.shallow_ingestion:
     return
   task_url = util.ensure_author_task(self.owner)
   util.new_task(task_url, target='manage', transactional=True)
Ejemplo n.º 29
0
 def post(self, owner, repo):
     if not validate_captcha(self):
         return
     # TODO: validate valid repo and return result
     task_url = util.ingest_library_task(owner, repo)
     util.new_task(task_url, target='manage')
Ejemplo n.º 30
0
 def trigger_version_deletion(self, tag):
   task_url = util.delete_version_task(self.scope, self.package, tag)
   util.new_task(task_url, target='manage', transactional=True)
Ejemplo n.º 31
0
  def update_metadata(self):
    # Query NPM registry API for packages
    is_npm_package = self.scope.startswith('@')
    if is_npm_package:
      self.update_registry_info()
    else:
      self.owner = self.scope
      self.repo = self.package

    # Fetch GitHub metadata
    headers = {'Accept': 'application/vnd.github.drax-preview+json'}
    response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers)
    if response.status_code == 200:
      try:
        metadata = json.loads(response.content)
      except ValueError:
        return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata)

      self.owner = metadata.get('owner', {}).get('login', '').lower()
      self.repo = metadata.get('name', '').lower()

      # Deleting is only necessary if Library entity is a GitHub repo
      if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope):
        logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo))
        delete_library(self.library.key)
        task_url = util.ensure_library_task(self.owner, self.repo)
        util.new_task(task_url, target='manage')
        raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo))

      # If adding a NPM package that a Bower repo already points to, remove the bower one.
      bower_library_id = Library.id(self.owner, self.repo)
      if is_npm_package and bower_library_id is not None:
        task_url = util.migrate_library_task(self.owner, self.repo, self.scope, self.package)
        util.new_task(task_url, target='manage')

      self.library.github_owner = self.owner
      self.library.github_repo = self.repo

      self.library.metadata = response.content
      self.library.metadata_etag = response.headers.get('ETag', None)
      self.library.metadata_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 404:
      logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo))
      delete_library(self.library.key)
      raise RequestAborted('repo no longer exists')
    elif response.status_code != 304:
      return self.retry('could not update repo metadata (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors)
      self.library.contributors = response.content
      self.library.contributors_etag = response.headers.get('ETag', None)
      self.library.contributors_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code != 304:
      return self.retry('could not update contributors (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats)
      self.library.participation = response.content
      self.library.participation_etag = response.headers.get('ETag', None)
      self.library.participation_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 202:
      # GitHub is "computing" the data. We'll try again next update cycle.
      # TODO: Alternatively we could retry this task
      pass
    elif response.status_code != 304:
      return self.retry('could not update stats/participation (%d)' % response.status_code)
Ejemplo n.º 32
0
Archivo: manage.py Proyecto: shans/v2
 def get(self, owner, repo, kind):
   task_url = util.ingest_library_task(owner, repo, kind)
   util.new_task(task_url)
   self.response.write('OK')
Ejemplo n.º 33
0
 def trigger_version_deletion(self, tag):
   task_url = util.delete_task(self.owner, self.repo, tag)
   util.new_task(task_url, target='manage', transactional=True)
Ejemplo n.º 34
0
 def trigger_version_deletion(self, tag):
   task_url = util.delete_version_task(self.scope, self.package, tag)
   util.new_task(task_url, target='manage', transactional=True)
Ejemplo n.º 35
0
 def handle_get(self, owner, repo):
   task_url = util.ingest_library_task(owner, repo)
   util.new_task(task_url, target='manage')
   self.response.write('OK')
Ejemplo n.º 36
0
 def handle_get(self, owner, repo):
     library = Library.get_by_id(Library.id(owner, repo))
     if library is None or library.shallow_ingestion:
         task_url = util.ingest_library_task(owner, repo)
         util.new_task(task_url, target='manage')
Ejemplo n.º 37
0
 def handle_get(self, name):
   author = Author.get_by_id(name.lower())
   if author is None:
     task_url = util.ingest_author_task(name)
     util.new_task(task_url, target='manage')
Ejemplo n.º 38
0
  def get(self, owner, repo, kind):
    if not (kind == 'element' or kind == 'collection'):
      self.response.set_status(400)
      return
    owner = owner.lower()
    repo = repo.lower()
    library = Library.maybe_create_with_kind(owner, repo, kind)

    logging.info('created library')

    github = quota.GitHub()
    if not github.reserve(3):
      self.response.set_status(500)
      return

    response = github.github_resource('repos', owner, repo)

    if not response.status_code == 200:
      library.error = 'repo metadata not found'
      github.release()
      library.put()
      return

    library.metadata = response.content

    response = github.github_resource('repos', owner, repo, 'contributors')
    if not response.status_code == 200:
      library.error = 'repo contributors not found'
      github.release()
      library.put()
      return

    library.contributors = response.content
    library.contributor_count = len(json.loads(response.content))

    response = github.github_resource('repos', owner, repo, 'git/refs/tags')
    if not response.status_code == 200:
      library.error = 'repo tags not found'
      github.release()
      library.put()
      return

    data = json.loads(response.content)
    if not isinstance(data, object):
      library.error = 'repo contians no valid version tags'
      github.release()
      library.put()
      return

    library.put()

    for version in data:
      tag = version['ref'][10:]
      if not versiontag.is_valid(tag):
        continue
      sha = version['object']['sha']
      version_object = Version(parent=library.key, id=tag, sha=sha)
      version_object.put()
      util.new_task('ingest/version', owner, repo, detail=tag)
      util.publish_hydrolyze_pending(
          '/task/ingest/hydrolyzer/%s/%s/%s' % (owner, repo, tag),
          owner,
          repo,
          tag)
Ejemplo n.º 39
0
 def update_versions_and_index(self):
   if VersionCache.update(self.version_key.parent()):
     task_url = util.update_indexes_task(self.owner, self.repo)
     util.new_task(task_url, target='manage', transactional=True)
Ejemplo n.º 40
0
 def handle_get(self, name):
     author = Author.get_by_id(name.lower())
     if author is None:
         task_url = util.ingest_author_task(name)
         util.new_task(task_url, target='manage')
Ejemplo n.º 41
0
 def handle_get(self, owner, repo):
     task_url = util.ingest_library_task(owner, repo)
     util.new_task(task_url, target='manage')
     self.response.write('OK')
Ejemplo n.º 42
0
 def get(self, kind, owner, repo):
   util.new_task('ingest/library', owner, repo, detail=kind)
   self.response.write('OK')
Ejemplo n.º 43
0
 def handle_get(self, owner, repo):
   library = Library.get_by_id(Library.id(owner, repo))
   if library is None or library.shallow_ingestion:
     task_url = util.ingest_library_task(owner, repo)
     util.new_task(task_url, target='manage')
Ejemplo n.º 44
0
    def update_metadata(self):
        headers = {'Accept': 'application/vnd.github.drax-preview+json'}
        response = util.github_get('repos',
                                   self.owner,
                                   self.repo,
                                   etag=self.library.metadata_etag,
                                   headers=headers)
        if response.status_code == 200:
            try:
                metadata = json.loads(response.content)
            except ValueError:
                return self.error("could not parse metadata",
                                  ErrorCodes.Library_parse_metadata)

            repo = metadata.get('name', '').lower()
            owner = metadata.get('owner', {}).get('login', '').lower()
            if repo != '' and owner != '' and (repo != self.repo
                                               or owner != self.owner):
                logging.info('deleting renamed repo %s',
                             Library.id(self.owner, self.repo))
                delete_library(self.library.key)
                task_url = util.ensure_library_task(owner, repo)
                util.new_task(task_url, target='manage')
                raise RequestAborted('repo has been renamed to %s',
                                     Library.id(owner, repo))

            self.library.metadata = response.content
            self.library.metadata_etag = response.headers.get('ETag', None)
            self.library.metadata_updated = datetime.datetime.now()
            self.library_dirty = True
        elif response.status_code == 404:
            logging.info('deleting non-existing repo %s',
                         Library.id(self.owner, self.repo))
            delete_library(self.library.key)
            raise RequestAborted('repo no longer exists')
        elif response.status_code != 304:
            return self.retry('could not update repo metadata (%d)' %
                              response.status_code)

        response = util.github_get('repos',
                                   self.owner,
                                   self.repo,
                                   'contributors',
                                   etag=self.library.contributors_etag)
        if response.status_code == 200:
            try:
                json.loads(response.content)
            except ValueError:
                return self.error("could not parse contributors",
                                  ErrorCodes.Library_parse_contributors)
            self.library.contributors = response.content
            self.library.contributors_etag = response.headers.get('ETag', None)
            self.library.contributors_updated = datetime.datetime.now()
            self.library_dirty = True
        elif response.status_code != 304:
            return self.retry('could not update contributors (%d)' %
                              response.status_code)

        response = util.github_get('repos',
                                   self.owner,
                                   self.repo,
                                   'stats/participation ',
                                   etag=self.library.participation_etag)
        if response.status_code == 200:
            try:
                json.loads(response.content)
            except ValueError:
                return self.error("could not parse stats/participation",
                                  ErrorCodes.Library_parse_stats)
            self.library.participation = response.content
            self.library.participation_etag = response.headers.get(
                'ETag', None)
            self.library.participation_updated = datetime.datetime.now()
            self.library_dirty = True
        elif response.status_code == 202:
            # GitHub is "computing" the data. We'll try again next update cycle.
            # TODO: Alternatively we could retry this task
            pass
        elif response.status_code != 304:
            return self.retry('could not update stats/participation (%d)' %
                              response.status_code)
Ejemplo n.º 45
0
 def trigger_version_deletion(self, tag):
     task_url = util.delete_task(self.owner, self.repo, tag)
     util.new_task(task_url, target='manage', transactional=True)
Ejemplo n.º 46
0
 def trigger_author_ingestion(self):
     if self.library.shallow_ingestion:
         return
     task_url = util.ensure_author_task(self.owner)
     util.new_task(task_url, target='manage', transactional=True)
Ejemplo n.º 47
0
Archivo: manage.py Proyecto: shans/v2
  def get(self, owner, repo, version):
    logging.info('ingesting version %s/%s/%s', owner, repo, version)

    github = quota.GitHub()
    if not github.reserve(1):
      self.response.set_status(500)
      return

    key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, version)

    response = urlfetch.fetch(util.content_url(owner, repo, version, 'README.md'))
    readme = response.content

    try:
      content = Content(parent=key, id='readme', content=readme)
      content.etag = response.headers.get('ETag', None)
      content.put()
    except db.BadValueError:
      ver = key.get()
      ver.error = "Could not store README.md as a utf-8 string"
      ver.put()
      self.response.set_status(200)
      return

    response = github.markdown(readme)
    content = Content(parent=key, id='readme.html', content=response.content)
    content.put()

    response = urlfetch.fetch(util.content_url(owner, repo, version, 'bower.json'))
    try:
      json.loads(response.content)
    except ValueError:
      ver = key.get()
      ver.error = "This version has a missing or broken bower.json"
      ver.put()
      self.response.set_status(200)
      return

    content = Content(parent=key, id='bower', content=response.content)
    content.etag = response.headers.get('ETag', None)
    content.put()

    versions = Library.versions_for_key(key.parent())
    if versions[-1] == version:
      library = key.parent().get()
      if library.kind == "collection":
        task_url = util.ingest_dependencies_task(owner, repo, version)
        util.new_task(task_url)
      bower = json.loads(response.content)
      metadata = json.loads(library.metadata)
      logging.info('adding search index for %s', version)
      description = bower.get("description", metadata.get("description", ""))
      document = search.Document(doc_id='%s/%s' % (owner, repo), fields=[
          search.AtomField(name='full_name', value=metadata['full_name']),
          search.TextField(name='owner', value=owner),
          search.TextField(name='repo', value=repo),
          search.TextField(name='version', value=version),
          search.TextField(name='repoparts', value=' '.join(repo.split('-'))),
          search.TextField(name='description', value=description),
          search.TextField(name='keywords', value=' '.join(bower.get('keywords', []))),
          search.NumberField(name='stars', value=metadata.get('stargazers_count')),
          search.NumberField(name='subscribers', value=metadata.get('subscribers_count')),
          search.NumberField(name='forks', value=metadata.get('forks')),
          search.NumberField(name='contributors', value=library.contributor_count),
          search.DateField(name='updated_at', value=datetime.datetime.strptime(metadata.get('updated_at'), TIME_FORMAT))
      ])
      index = search.Index('repo')
      index.put(document)
    self.response.set_status(200)
Ejemplo n.º 48
0
 def update_versions_and_index(self):
     if VersionCache.update(self.version_key.parent()):
         task_url = util.update_indexes_task(self.owner, self.repo)
         util.new_task(task_url, target='manage', transactional=True)
Ejemplo n.º 49
0
Archivo: manage.py Proyecto: shans/v2
  def get(self, owner, repo, kind):
    if not (kind == 'element' or kind == 'collection'):
      self.response.set_status(400)
      return
    owner = owner.lower()
    repo = repo.lower()
    library = Library.maybe_create_with_kind(owner, repo, kind)
    library_dirty = False
    if library.error is not None:
      library_dirty = True
      library.error = None

    logging.info('created library')

    github = quota.GitHub()
    if not github.reserve(3):
      self.response.set_status(500)
      return

    response = github.github_resource('repos', owner, repo, etag=library.metadata_etag)
    if response.status_code != 304:
      if response.status_code == 200:
        library.metadata = response.content
        library.metadata_etag = response.headers.get('ETag', None)
        library_dirty = True
      else:
        library.error = 'repo metadata not found (%d)' % response.status_code
        github.release()
        library.put()
        return

    response = github.github_resource('repos', owner, repo, 'contributors', etag=library.contributors_etag)
    if response.status_code != 304:
      if response.status_code == 200:
        library.contributors = response.content
        library.contributors_etag = response.headers.get('ETag', None)
        library.contributor_count = len(json.loads(response.content))
        library_dirty = True
      else:
        library.error = 'repo contributors not found (%d)' % response.status_code
        github.release()
        library.put()
        return


    response = github.github_resource('repos', owner, repo, 'git/refs/tags', etag=library.tags_etag)
    if response.status_code != 304:
      if response.status_code == 200:
        library.tags = response.content
        library.tags_etag = response.headers.get('ETag', None)
        library_dirty = True

        data = json.loads(response.content)
        if not isinstance(data, object):
          library.error = 'repo contains no valid version tags'
          github.release()
          library.put()
          return
        for version in data:
          tag = version['ref'][10:]
          if not versiontag.is_valid(tag):
            continue
          sha = version['object']['sha']
          version_object = Version(parent=library.key, id=tag, sha=sha)
          version_object.put()
          task_url = util.ingest_version_task(owner, repo, tag)
          util.new_task(task_url)
          util.publish_analysis_request(owner, repo, tag)
      else:
        library.error = 'repo tags not found (%d)' % response.status_code
        github.release()
        library.put()
        return

    if library_dirty:
      library.put()
    github.release()
Ejemplo n.º 50
0
  def update_metadata(self):
    # Query NPM registry API for packages
    is_npm_package = self.scope.startswith('@')
    if is_npm_package:
      self.update_registry_info()
    else:
      self.owner = self.scope
      self.repo = self.package

    # Fetch GitHub metadata
    headers = {'Accept': 'application/vnd.github.drax-preview+json'}
    response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers)
    if response.status_code == 200:
      try:
        metadata = json.loads(response.content)
      except ValueError:
        return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata)

      self.owner = metadata.get('owner', {}).get('login', '').lower()
      self.repo = metadata.get('name', '').lower()

      # Deleting is only necessary if Library entity is a GitHub repo
      if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope):
        logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo))
        delete_library(self.library.key)
        task_url = util.ensure_library_task(self.owner, self.repo)
        util.new_task(task_url, target='manage')
        raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo))

      # If adding a NPM package that a Bower repo already points to, remove the bower one.
      bower_library_id = Library.id(self.owner, self.repo)
      if is_npm_package and bower_library_id is not None:
        logging.info('removing bower repo %s', Library.id(self.owner, self.repo))
        task_url = util.suppress_library_task(self.owner, self.repo)
        util.new_task(task_url, target='manage')

      self.library.github_owner = self.owner
      self.library.github_repo = self.repo

      self.library.metadata = response.content
      self.library.metadata_etag = response.headers.get('ETag', None)
      self.library.metadata_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 404:
      logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo))
      delete_library(self.library.key)
      raise RequestAborted('repo no longer exists')
    elif response.status_code != 304:
      return self.retry('could not update repo metadata (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors)
      self.library.contributors = response.content
      self.library.contributors_etag = response.headers.get('ETag', None)
      self.library.contributors_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code != 304:
      return self.retry('could not update contributors (%d)' % response.status_code)

    response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag)
    if response.status_code == 200:
      try:
        json.loads(response.content)
      except ValueError:
        return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats)
      self.library.participation = response.content
      self.library.participation_etag = response.headers.get('ETag', None)
      self.library.participation_updated = datetime.datetime.now()
      self.library_dirty = True
    elif response.status_code == 202:
      # GitHub is "computing" the data. We'll try again next update cycle.
      # TODO: Alternatively we could retry this task
      pass
    elif response.status_code != 304:
      return self.retry('could not update stats/participation (%d)' % response.status_code)