def handle_get(self): queue = taskqueue.Queue('update') if queue.fetch_statistics().tasks > 0: self.response.write('update already in progress') return query = Library.query() cursor = None more = True task_count = 0 while more: keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor) for key in keys: task_count = task_count + 1 task_url = util.update_library_task(key.id()) util.new_task(task_url, target='manage', queue_name='update') logging.info('triggered %d library updates', task_count) query = Author.query() cursor = None more = True task_count = 0 while more: keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor) for key in keys: task_count = task_count + 1 task_url = util.update_author_task(key.id()) util.new_task(task_url, target='manage', queue_name='update') logging.info('triggered %d author updates', task_count)
def handle_get(self, scope, package, version): # FIXME: Make deletion transactional with check on library that tag is excluded. version_key = ndb.Key(Library, Library.id(scope, package), Version, version) ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True)) if VersionCache.update(version_key.parent()): task_url = util.update_indexes_task(scope, package) util.new_task(task_url, target='manage')
def post(self): full_name = self.request.get('repo').lower() split = full_name.split('/') if len(split) != 2: self.response.set_status(400) self.response.write('Bad request, not repo') return owner = split[0] repo = split[1] access_token = exchange_token(self) if access_token is None: return # Validate access token against repo repos_response = util.github_get('repos/%s' % full_name, access_token=access_token) if repos_response.status_code != 200: self.response.set_status(401) self.response.write('Cannot access repo') return info = json.loads(repos_response.content) has_access = info['permissions']['admin'] if not has_access: self.response.set_status(401) self.response.write('Do not have access to the repo') return parsed_url = urlparse(self.request.url) params = {'name': 'web', 'events': ['pull_request']} params['config'] = { 'url': '%s://%s/api/preview-event' % (parsed_url.scheme, parsed_url.netloc), 'content_type': 'json', } # Check if the webhook exists list_webhooks_response = util.github_post('repos', owner, repo, 'hooks', access_token=access_token) if list_webhooks_response.status_code != 200: logging.error('Unable to query existing webhooks, continuing anyway. Github %s: %s', list_webhooks_response.status_code, list_webhooks_response.content) else: webhooks = json.loads(list_webhooks_response.content) for webhook in webhooks: if webhook['active'] and webhook['config'] == params['config']: self.response.write('Webhook is already configured') return # Create the webhook create_webhook_response = util.github_post('repos', owner, repo, 'hooks', params, access_token) if create_webhook_response.status_code != 201: self.response.set_status(500) self.response.write('Failed to create webhook.') logging.error('Failed to create webhook. Github %s: %s', create_webhook_response.status_code, create_webhook_response.content) return # Trigger shallow ingestion of the library so we can store the access token. util.new_task(util.ingest_webhook_task(owner, repo), params={'access_token': access_token}, target='manage') self.response.write('Created webhook')
def handle_post(self): message_json = json.loads( urllib.unquote(self.request.body).rstrip('=')) message = message_json['message'] data = base64.b64decode(str(message['data'])) attributes = message['attributes'] owner = attributes['owner'] repo = attributes['repo'] version = attributes['version'] error = attributes.get('error', None) version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) content = Content.get_by_id('analysis', parent=version_key) if content is None: return if data == '': content.set_json(None) else: content.set_json(json.loads(data)) if error is None: content.status = Status.ready content.error = None else: content.status = Status.error content.error = error content.put() if version_key.id() == Library.default_version_for_key_async( version_key.parent()).get_result(): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def handle_get(self, owner, repo, version): # FIXME: Make deletion transactional with check on library that tag is excluded. version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True)) if VersionCache.update(version_key.parent()): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def update_metadata(self): headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata") repo = metadata.get('name', '').lower() owner = metadata.get('owner', {}).get('login', '').lower() if repo != '' and owner != '' and (repo != self.repo or owner != self.owner): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(owner, repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo)) self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors") self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation") self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def trigger_analysis(self, tag, sha, transactional=False): analysis_sha = None if self.library.kind == 'collection': analysis_sha = sha version_key = ndb.Key(Library, self.library.key.id(), Version, tag) Content(id='analysis', parent=version_key, status=Status.pending).put() task_url = util.ingest_analysis_task(self.owner, self.repo, tag, analysis_sha) util.new_task(task_url, target='analysis', transactional=transactional)
def update_collection_dependencies(self, collection_version_key, bower): dependencies = bower.get('dependencies', {}) for name in dependencies.keys(): dep = Dependency.from_string(dependencies[name]) if dep is None: continue library_key = ndb.Key(Library, Library.id(dep.owner, dep.repo)) CollectionReference.ensure(library_key, collection_version_key, semver=dep.version) task_url = util.ensure_library_task(dep.owner.lower(), dep.repo.lower()) util.new_task(task_url, target='manage')
def post(self): if not validate_captcha(self): return url = self.request.get('url') match = re.match(r'https://github.com/(.*?)/([^/]*)(.*)', url) if match is None: self.response.set_status(400) self.response.write('Unable to understand url (%s)' % url) owner = match.group(1) repo = match.group(2) tail = match.group(3) # SHA already defined match = re.match(r'.*commits?/(.*)', tail) if match: self.response.headers['Access-Control-Allow-Origin'] = '*' self.response.headers['Content-Type'] = 'application/json' self.response.write('%s/%s/%s' % (owner, repo, match.group(1))) util.new_task(util.ingest_preview_task(owner, repo), params={ 'commit': match.group(1), 'url': url }, target='manage') return # Resolve SHA using these patterns and Github API tail = re.sub(r'/pull/(.*)', r'pull/\1/head', tail) tail = re.sub(r'/tree/(.*)', r'heads/\1', tail) tail = re.sub(r'^$', r'heads/master', tail) if not tail: self.response.set_status(400) self.response.write('Unable to understand url (%s)' % url) response = util.github_get('repos', owner, repo, 'git/refs/' + tail) if response.status_code == 404: self.response.set_status(400) self.response.write('Error resolving url (%s)' % url) sha = json.loads(response.content)['object']['sha'] util.new_task(util.ingest_preview_task(owner, repo), params={ 'commit': sha, 'url': url }, target='manage') self.response.headers['Access-Control-Allow-Origin'] = '*' self.response.headers['Content-Type'] = 'application/json' self.response.write('%s/%s/%s' % (owner, repo, sha))
def trigger_version_ingestion(self, tag, sha, url=None, preview=False): version_object = Version.get_by_id(tag, parent=self.library.key) if version_object is not None and (version_object.status == Status.ready or version_object.status == Status.pending): # Version object is already up to date or pending return False Version(id=tag, parent=self.library.key, sha=sha, url=url, preview=preview).put() task_url = util.ingest_version_task(self.owner, self.repo, tag) util.new_task(task_url, target='manage', transactional=True) self.trigger_analysis(tag, sha, transactional=True) return True
def trigger_analysis(self, tag, sha, transactional=False): analysis_sha = None if self.library.kind == 'collection': analysis_sha = sha version_key = ndb.Key(Library, self.library.key.id(), Version, tag) content = Content.get_by_id('analysis', parent=version_key) if content is None or content.status == Status.error: Content(id='analysis', parent=version_key, status=Status.pending).put() task_url = util.ingest_analysis_task(self.owner, self.repo, tag, analysis_sha) util.new_task(task_url, target='analysis', transactional=transactional, queue_name='analysis')
def trigger_analysis(self, tag, sha, transactional=False): analysis_sha = None if self.library.kind == 'collection': analysis_sha = sha version_key = ndb.Key(Library, self.library.key.id(), Version, tag) content = Content.get_by_id('analysis', parent=version_key) if content is None or content.status == Status.error: Content(id='analysis', parent=version_key, status=Status.pending).put() task_url = util.ingest_analysis_task(self.scope, self.package, tag, analysis_sha) util.new_task(task_url, target='analysis', transactional=transactional, queue_name='analysis')
def trigger_version_ingestion(self, tag, sha, url=None, preview=False): version_object = Version.get_by_id(tag, parent=self.library.key) if version_object is not None and (version_object.status == Status.ready or version_object.status == Status.pending): # Version object is already up to date or pending return False Version(id=tag, parent=self.library.key, sha=sha, url=url, preview=preview).put() task_url = util.ingest_version_task(self.scope, self.package, tag) util.new_task(task_url, target='manage', transactional=True) self.trigger_analysis(tag, sha, transactional=True) return True
def handle_get(self): query = Library.query() cursor = None more = True task_count = 0 while more: keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor) for key in keys: task_count = task_count + 1 owner, repo = key.id().split('/', 1) task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage') logging.info('triggered %d index updates', task_count)
def error(error_string): logging.info('ingestion error "%s" for %s/%s/%s', error_string, owner, repo, version) ver = key.get() ver.error = error_string ver.put() if generate_search: library = key.parent().get() versions = json.loads(library.tags) idx = versions.index(version) if idx > 0: logging.info('ingestion for %s/%s falling back to version %s', owner, repo, versions[idx - 1]) task_url = util.ingest_version_task(owner, repo, versions[idx - 1]) util.new_task(task_url, {'latestVersion':'True'}) self.response.set_status(200)
def handle_get(self): latest = self.request.get('latest', None) is not None query = Library.query() cursor = None more = True task_count = 0 while more: keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor) for key in keys: task_count = task_count + 1 owner, repo = key.id().split('/', 1) task_url = util.analyze_library_task(owner, repo, latest) util.new_task(task_url, target='manage') logging.info('triggered %d analyses', task_count)
def get(self, owner, repo, kind): commit = self.request.get('commit', None) url = self.request.get('url', None) assert commit is not None and url is not None self.init_library(owner, repo, kind) is_new = self.library.metadata is None and self.library.error is None if is_new: self.library.ingest_versions = False self.library_dirty = True self.update_metadata() version = Version(parent=self.library.key, id=commit, sha=commit, url=url) version.put() task_url = util.ingest_version_task(owner, repo, commit) util.new_task(task_url) self.commit()
def post(self): if self.request.headers.get('X-Github-Event') != 'pull_request': self.response.set_status(202) # Accepted self.response.write('Payload was not for a pull_request, aborting.') return payload = json.loads(self.request.body) if payload['action'] != 'opened' and payload['action'] != 'synchronize': self.response.set_status(202) # Accepted self.response.write('Payload was not opened or synchronize, aborting.') return # Original repo origin_owner = payload['repository']['owner']['login'] origin_repo = payload['repository']['name'] origin_full_name = payload['repository']['full_name'] # Repo where the pull request came from. pull_owner = payload['pull_request']['head']['repo']['owner']['login'] pull_repo = payload['pull_request']['head']['repo']['name'] key = ndb.Key(Library, Library.id(origin_owner, origin_repo)) library = key.get(read_policy=ndb.EVENTUAL_CONSISTENCY) if library is None: logging.error('No library object found for %s', origin_full_name) self.response.set_status(400) # Bad request self.response.write('It does not seem like this repository was registered') return sha = payload['pull_request']['head']['sha'] parsed_url = urlparse(self.request.url) params = { 'state': 'success', 'target_url': '%s://%s/preview/%s/%s/%s' % (parsed_url.scheme, parsed_url.netloc, pull_owner, pull_repo, sha), 'description': 'Preview is ready!', # TODO: Don't lie 'context': 'webcomponents/preview' } response = util.github_post('repos', origin_owner, origin_repo, 'statuses/%s' % sha, params, library.github_access_token) if response.status_code != 201: logging.error('Failed to set status on Github PR. Github returned %s:%s', response.status_code, response.content) self.response.set_status(500) self.response.write('Failed to set status on PR.') return pull_request_url = payload['pull_request']['url'] util.new_task(util.ingest_preview_task(pull_owner, pull_repo), params={'commit': sha, 'url': pull_request_url}, target='manage')
def post(self, library): if not validate_captcha(self): return split = library.split('/') if len(split) is 2: scope = split[0] package = split[1] elif len(split) is 1: scope = '@@npm' package = library else: self.response.set_status(400) self.response.write('Invalid name') return # TODO: validate valid repo and return result task_url = util.ingest_library_task(scope, package) util.new_task(task_url, target='manage')
def get(self, owner, repo, version): logging.info('ingesting version %s/%s/%s', owner, repo, version) key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, version, Content, 'bower') bower = json.loads(key.get().content) ver = key.parent().get() dependencies = bower.get('dependencies', {}) library_keys = [] dep_list = [] for name in dependencies.keys(): ver.dependencies.append(dependencies[name]) dep = Dependency.from_string(dependencies[name]) dep_list.append(dep) library_keys.append(ndb.Key(Library, '%s/%s' % (dep.owner.lower(), dep.repo.lower()))) libraries = Library.get_or_create_list(library_keys) for i, library in enumerate(libraries): dep = dep_list[i] library.collections.append(CollectionReference(version=key.parent(), semver=dep.version)) util.new_task('ingest/library', dep.owner.lower(), dep.repo.lower()) libraries.append(ver) ndb.put_multi(libraries)
def ingest_versions(self): if not self.library.ingest_versions: return response = self.github.github_resource('repos', self.owner, self.repo, 'git/refs/tags', etag=self.library.tags_etag) if response.status_code != 304: if response.status_code != 200: return self.error('repo tags not found (%d)' % response.status_code) self.library.tags = response.content self.library.tags_etag = response.headers.get('ETag', None) self.library_dirty = True data = json.loads(response.content) if not isinstance(data, object): data = [] data = [d for d in data if versiontag.is_valid(d['ref'][10:])] if len(data) is 0: return self.error('repo contains no valid version tags') data.sort(lambda a, b: versiontag.compare(a['ref'][10:], b['ref'][10:])) data_refs = [d['ref'][10:] for d in data] self.library.tags = json.dumps(data_refs) self.library.tags_etag = response.headers.get('ETag', None) data.reverse() is_newest = True for version in data: tag = version['ref'][10:] if not versiontag.is_valid(tag): continue sha = version['object']['sha'] params = {} if is_newest: params["latestVersion"] = "True" is_newest = False version_object = Version(parent=self.library.key, id=tag, sha=sha) version_object.put() task_url = util.ingest_version_task(self.owner, self.repo, tag) util.new_task(task_url, params) util.publish_analysis_request(self.owner, self.repo, tag)
def handle_post(self): # Ignore payloads larger than 5 MB. if len(self.request.body) > 1048487 * 5: return message_json = json.loads(urllib.unquote(self.request.body).rstrip('=')) message = message_json['message'] data = base64.b64decode(str(message['data'])) attributes = message['attributes'] if len(attributes) == 0: logging.error(message) return owner = attributes['owner'] repo = attributes['repo'] version = attributes['version'] error = attributes.get('error', None) version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) content = Content.get_by_id('analysis', parent=version_key) if content is None: return if data == '': content.set_json(None) else: content.set_json(json.loads(data)) if error is None: content.status = Status.ready content.error = None else: content.status = Status.error content.error = error content.put() if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result(): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def handle_post(self): message_json = json.loads(urllib.unquote(self.request.body).rstrip('=')) message = message_json['message'] data = base64.b64decode(str(message['data'])) attributes = message['attributes'] owner = attributes['owner'] repo = attributes['repo'] version = attributes['version'] error = attributes.get('error', None) version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) content = Content.get_by_id('analysis', parent=version_key) if content is None: return if data == '': content.content = None elif len(data) > 500000: # Max entity size is only 1MB. logging.error('content was too large: %d %s %s', len(data), Library.id(owner, repo), version) error = 'content was too large: %d' % len(data) else: content.content = data if error is None: content.status = Status.ready content.error = None else: content.status = Status.error content.error = error content.put() if version_key.id() == Library.default_version_for_key_async(version_key.parent()).get_result(): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def trigger_author_ingestion(self): if self.library.shallow_ingestion: return task_url = util.ensure_author_task(self.owner) util.new_task(task_url, target='manage', transactional=True)
def post(self, owner, repo): if not validate_captcha(self): return # TODO: validate valid repo and return result task_url = util.ingest_library_task(owner, repo) util.new_task(task_url, target='manage')
def trigger_version_deletion(self, tag): task_url = util.delete_version_task(self.scope, self.package, tag) util.new_task(task_url, target='manage', transactional=True)
def update_metadata(self): # Query NPM registry API for packages is_npm_package = self.scope.startswith('@') if is_npm_package: self.update_registry_info() else: self.owner = self.scope self.repo = self.package # Fetch GitHub metadata headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) self.owner = metadata.get('owner', {}).get('login', '').lower() self.repo = metadata.get('name', '').lower() # Deleting is only necessary if Library entity is a GitHub repo if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo)) # If adding a NPM package that a Bower repo already points to, remove the bower one. bower_library_id = Library.id(self.owner, self.repo) if is_npm_package and bower_library_id is not None: task_url = util.migrate_library_task(self.owner, self.repo, self.scope, self.package) util.new_task(task_url, target='manage') self.library.github_owner = self.owner self.library.github_repo = self.repo self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def get(self, owner, repo, kind): task_url = util.ingest_library_task(owner, repo, kind) util.new_task(task_url) self.response.write('OK')
def trigger_version_deletion(self, tag): task_url = util.delete_task(self.owner, self.repo, tag) util.new_task(task_url, target='manage', transactional=True)
def handle_get(self, owner, repo): task_url = util.ingest_library_task(owner, repo) util.new_task(task_url, target='manage') self.response.write('OK')
def handle_get(self, owner, repo): library = Library.get_by_id(Library.id(owner, repo)) if library is None or library.shallow_ingestion: task_url = util.ingest_library_task(owner, repo) util.new_task(task_url, target='manage')
def handle_get(self, name): author = Author.get_by_id(name.lower()) if author is None: task_url = util.ingest_author_task(name) util.new_task(task_url, target='manage')
def get(self, owner, repo, kind): if not (kind == 'element' or kind == 'collection'): self.response.set_status(400) return owner = owner.lower() repo = repo.lower() library = Library.maybe_create_with_kind(owner, repo, kind) logging.info('created library') github = quota.GitHub() if not github.reserve(3): self.response.set_status(500) return response = github.github_resource('repos', owner, repo) if not response.status_code == 200: library.error = 'repo metadata not found' github.release() library.put() return library.metadata = response.content response = github.github_resource('repos', owner, repo, 'contributors') if not response.status_code == 200: library.error = 'repo contributors not found' github.release() library.put() return library.contributors = response.content library.contributor_count = len(json.loads(response.content)) response = github.github_resource('repos', owner, repo, 'git/refs/tags') if not response.status_code == 200: library.error = 'repo tags not found' github.release() library.put() return data = json.loads(response.content) if not isinstance(data, object): library.error = 'repo contians no valid version tags' github.release() library.put() return library.put() for version in data: tag = version['ref'][10:] if not versiontag.is_valid(tag): continue sha = version['object']['sha'] version_object = Version(parent=library.key, id=tag, sha=sha) version_object.put() util.new_task('ingest/version', owner, repo, detail=tag) util.publish_hydrolyze_pending( '/task/ingest/hydrolyzer/%s/%s/%s' % (owner, repo, tag), owner, repo, tag)
def update_versions_and_index(self): if VersionCache.update(self.version_key.parent()): task_url = util.update_indexes_task(self.owner, self.repo) util.new_task(task_url, target='manage', transactional=True)
def get(self, kind, owner, repo): util.new_task('ingest/library', owner, repo, detail=kind) self.response.write('OK')
def update_metadata(self): headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) repo = metadata.get('name', '').lower() owner = metadata.get('owner', {}).get('login', '').lower() if repo != '' and owner != '' and (repo != self.repo or owner != self.owner): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(owner, repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo)) self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get( 'ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def get(self, owner, repo, version): logging.info('ingesting version %s/%s/%s', owner, repo, version) github = quota.GitHub() if not github.reserve(1): self.response.set_status(500) return key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, version) response = urlfetch.fetch(util.content_url(owner, repo, version, 'README.md')) readme = response.content try: content = Content(parent=key, id='readme', content=readme) content.etag = response.headers.get('ETag', None) content.put() except db.BadValueError: ver = key.get() ver.error = "Could not store README.md as a utf-8 string" ver.put() self.response.set_status(200) return response = github.markdown(readme) content = Content(parent=key, id='readme.html', content=response.content) content.put() response = urlfetch.fetch(util.content_url(owner, repo, version, 'bower.json')) try: json.loads(response.content) except ValueError: ver = key.get() ver.error = "This version has a missing or broken bower.json" ver.put() self.response.set_status(200) return content = Content(parent=key, id='bower', content=response.content) content.etag = response.headers.get('ETag', None) content.put() versions = Library.versions_for_key(key.parent()) if versions[-1] == version: library = key.parent().get() if library.kind == "collection": task_url = util.ingest_dependencies_task(owner, repo, version) util.new_task(task_url) bower = json.loads(response.content) metadata = json.loads(library.metadata) logging.info('adding search index for %s', version) description = bower.get("description", metadata.get("description", "")) document = search.Document(doc_id='%s/%s' % (owner, repo), fields=[ search.AtomField(name='full_name', value=metadata['full_name']), search.TextField(name='owner', value=owner), search.TextField(name='repo', value=repo), search.TextField(name='version', value=version), search.TextField(name='repoparts', value=' '.join(repo.split('-'))), search.TextField(name='description', value=description), search.TextField(name='keywords', value=' '.join(bower.get('keywords', []))), search.NumberField(name='stars', value=metadata.get('stargazers_count')), search.NumberField(name='subscribers', value=metadata.get('subscribers_count')), search.NumberField(name='forks', value=metadata.get('forks')), search.NumberField(name='contributors', value=library.contributor_count), search.DateField(name='updated_at', value=datetime.datetime.strptime(metadata.get('updated_at'), TIME_FORMAT)) ]) index = search.Index('repo') index.put(document) self.response.set_status(200)
def get(self, owner, repo, kind): if not (kind == 'element' or kind == 'collection'): self.response.set_status(400) return owner = owner.lower() repo = repo.lower() library = Library.maybe_create_with_kind(owner, repo, kind) library_dirty = False if library.error is not None: library_dirty = True library.error = None logging.info('created library') github = quota.GitHub() if not github.reserve(3): self.response.set_status(500) return response = github.github_resource('repos', owner, repo, etag=library.metadata_etag) if response.status_code != 304: if response.status_code == 200: library.metadata = response.content library.metadata_etag = response.headers.get('ETag', None) library_dirty = True else: library.error = 'repo metadata not found (%d)' % response.status_code github.release() library.put() return response = github.github_resource('repos', owner, repo, 'contributors', etag=library.contributors_etag) if response.status_code != 304: if response.status_code == 200: library.contributors = response.content library.contributors_etag = response.headers.get('ETag', None) library.contributor_count = len(json.loads(response.content)) library_dirty = True else: library.error = 'repo contributors not found (%d)' % response.status_code github.release() library.put() return response = github.github_resource('repos', owner, repo, 'git/refs/tags', etag=library.tags_etag) if response.status_code != 304: if response.status_code == 200: library.tags = response.content library.tags_etag = response.headers.get('ETag', None) library_dirty = True data = json.loads(response.content) if not isinstance(data, object): library.error = 'repo contains no valid version tags' github.release() library.put() return for version in data: tag = version['ref'][10:] if not versiontag.is_valid(tag): continue sha = version['object']['sha'] version_object = Version(parent=library.key, id=tag, sha=sha) version_object.put() task_url = util.ingest_version_task(owner, repo, tag) util.new_task(task_url) util.publish_analysis_request(owner, repo, tag) else: library.error = 'repo tags not found (%d)' % response.status_code github.release() library.put() return if library_dirty: library.put() github.release()
def update_metadata(self): # Query NPM registry API for packages is_npm_package = self.scope.startswith('@') if is_npm_package: self.update_registry_info() else: self.owner = self.scope self.repo = self.package # Fetch GitHub metadata headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) self.owner = metadata.get('owner', {}).get('login', '').lower() self.repo = metadata.get('name', '').lower() # Deleting is only necessary if Library entity is a GitHub repo if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo)) # If adding a NPM package that a Bower repo already points to, remove the bower one. bower_library_id = Library.id(self.owner, self.repo) if is_npm_package and bower_library_id is not None: logging.info('removing bower repo %s', Library.id(self.owner, self.repo)) task_url = util.suppress_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') self.library.github_owner = self.owner self.library.github_repo = self.repo self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)