def test_ingest_element(self): self.respond_to_github('https://raw.githubusercontent.com/org/repo/master/bower.json', '{"license": "MIT"}') self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/tags', '''[{"name": "v0.5.0", "commit": {"sha": "old"}},{"name": "v1.0.0", "commit": {"sha": "lol"}}]''') self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') response = self.app.get(util.ingest_library_task('org', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertEqual(library.metadata, '{"owner":{"login":"******"},"name":"repo"}') self.assertEqual(library.contributors, '["a"]') self.assertEqual(library.tags, ['v0.5.0', 'v1.0.0']) version = ndb.Key(Library, 'org/repo', Version, 'v1.0.0').get() self.assertIsNotNone(version) self.assertIsNone(version.error) self.assertEqual(version.sha, 'lol') tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'v1.0.0'), util.ensure_author_task('org'), util.ingest_version_task('org', 'repo', 'v1.0.0'), ], [task.url for task in tasks])
def test_ingest_collection(self): self.respond_to_github('https://raw.githubusercontent.com/org/repo/master/bower.json', '{"keywords": ["element-collection"], "license": "MIT"}') self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/git/refs/heads/master', '{"ref": "refs/heads/master", "object": {"sha": "master-sha"}}') self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') response = self.app.get(util.ingest_library_task('org', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertEqual(library.metadata, '{"owner":{"login":"******"},"name":"repo"}') self.assertEqual(library.contributors, '["a"]') self.assertEqual(library.tags, ['v0.0.1']) version = ndb.Key(Library, 'org/repo', Version, 'v0.0.1').get() self.assertIsNone(version.error) self.assertEqual(version.status, Status.pending) self.assertEqual(version.sha, 'master-sha') tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'v0.0.1', 'master-sha'), util.ensure_author_task('org'), util.ingest_version_task('org', 'repo', 'v0.0.1'), ], [task.url for task in tasks])
def test_add_element(self): response = self.app.get('/manage/add/element/org/repo') self.assertEqual(response.status_int, 200) self.assertEqual(response.normal_body, 'OK') tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].url, util.ingest_library_task('org', 'repo', 'element')) self.respond_to_github('https://api.github.com/repos/org/repo', 'metadata bits') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/git/refs/tags', '[{"ref": "refs/tags/v1.0.0", "object": {"sha": "lol"}}]') response = self.app.get(util.ingest_library_task('org', 'repo', 'element')) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertEqual(library.kind, 'element') self.assertEqual(library.metadata, 'metadata bits') self.assertEqual(library.contributors, '["a"]') self.assertEqual(library.contributor_count, 1) version = ndb.Key(Library, 'org/repo', Version, 'v1.0.0').get() self.assertIsNone(version.error) self.assertEqual(version.sha, 'lol') tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 2) self.assertEqual(tasks[1].url, util.ingest_version_task('org', 'repo', 'v1.0.0') + '?latestVersion=True')
def test_update_collection(self): library_key = Library(id='org/repo', tags=['v0.0.1'], collection_sequence_number=1, kind='collection', spdx_identifier='MIT').put() Version(id='v0.0.1', parent=library_key, sha="old", status=Status.ready).put() self.respond_to_github('https://api.github.com/repos/org/repo', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/contributors', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') self.respond_to_github('https://api.github.com/repos/org/repo/git/refs/heads/master', """{ "ref": "refs/heads/master", "object": {"sha": "new-master-sha"} }""") response = self.app.get(util.update_library_task('org/repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = library_key.get() self.assertEqual(library.error, None) self.assertEqual(library.status, Status.ready) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'v0.0.2', 'new-master-sha'), util.ingest_version_task('org', 'repo', 'v0.0.2'), ], [task.url for task in tasks]) version = Version.get_by_id('v0.0.2', parent=library_key) self.assertEqual(version.sha, 'new-master-sha') self.assertEqual(version.status, Status.pending)
def test_ingest_version(self): library_key = Library(id='org/repo', metadata='{"full_name": "NSS Bob", "stargazers_count": 420, "subscribers_count": 419, "forks": 418, "updated_at": "2011-8-10T13:47:12Z"}').put() Version(id='v1.0.0', parent=library_key, sha='sha').put() self.respond_to_github(r'https://api.github.com/repos/org/repo/readme\?ref=sha', '{"content":"%s"}' % b64encode('README')) self.respond_to('https://raw.githubusercontent.com/org/repo/sha/bower.json', '{}') self.respond_to_github('https://api.github.com/markdown', '<html>README</html>') response = self.app.get(util.ingest_version_task('org', 'repo', 'v1.0.0'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) version = Version.get_by_id('v1.0.0', parent=library_key) self.assertIsNone(version.error) self.assertEqual(version.status, Status.ready) self.assertFalse(version.preview) versions = Library.versions_for_key_async(library_key).get_result() self.assertEqual(['v1.0.0'], versions) readme = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme').get() self.assertEqual(readme.content, 'README') readme_html = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme.html').get() self.assertEqual(readme_html.content, '<html>README</html>') bower = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'bower').get() self.assertEqual(bower.content, '{}')
def test_ingest_preview(self): self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') self.respond_to_github('https://raw.githubusercontent.com/org/repo/master/bower.json', '{"license": "MIT"}') response = self.app.get(util.ingest_preview_task('org', 'repo'), params={'commit': 'commit-sha', 'url': 'url'}, headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertTrue(library.shallow_ingestion) version = Version.get_by_id('commit-sha', parent=library.key) self.assertEquals(version.status, Status.pending) self.assertEquals(version.sha, 'commit-sha') self.assertEquals(version.url, 'url') self.assertTrue(version.preview) tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 2) self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'commit-sha'), util.ingest_version_task('org', 'repo', 'commit-sha'), ], [task.url for task in tasks])
def test_ingest_preview(self): self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github( 'https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github( 'https://api.github.com/repos/org/repo/stats/participation', '{}') self.respond_to_github( 'https://raw.githubusercontent.com/org/repo/master/bower.json', '{"license": "MIT"}') response = self.app.get(util.ingest_preview_task('org', 'repo'), params={ 'commit': 'commit-sha', 'url': 'url' }, headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertTrue(library.shallow_ingestion) version = Version.get_by_id('commit-sha', parent=library.key) self.assertEquals(version.status, Status.pending) self.assertEquals(version.sha, 'commit-sha') self.assertEquals(version.url, 'url') self.assertTrue(version.preview) tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 2) self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'commit-sha'), util.ingest_version_task('org', 'repo', 'commit-sha'), ], [task.url for task in tasks])
def test_ingest_version(self): library_key = Library(id='org/repo', metadata='{"full_name": "NSS Bob", "stargazers_count": 420, "subscribers_count": 419, "forks": 418, "updated_at": "2011-8-10T13:47:12Z"}').put() Version(id='v1.0.0', parent=library_key, sha='sha').put() self.respond_to_github(r'https://api.github.com/repos/org/repo/readme\?ref=sha', '{"content":"%s"}' % b64encode('README')) self.respond_to('https://raw.githubusercontent.com/org/repo/sha/bower.json', '{}') self.respond_to_github('https://api.github.com/markdown', '<html>README</html>') response = self.app.get(util.ingest_version_task('org', 'repo', 'v1.0.0'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) version = Version.get_by_id('v1.0.0', parent=library_key) self.assertIsNone(version.error) self.assertEqual(version.status, Status.ready) self.assertFalse(version.preview) versions = Library.versions_for_key_async(library_key).get_result() self.assertEqual(['v1.0.0'], versions) readme = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme').get() self.assertEqual(readme.content, 'README') readme_html = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme.html').get() self.assertEqual(readme_html.content, '<html>README</html>') bower = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'bower').get() self.assertEqual(bower.get_json(), {})
def trigger_version_ingestion(self, tag, sha, url=None, preview=False): version_object = Version.get_by_id(tag, parent=self.library.key) if version_object is not None and (version_object.status == Status.ready or version_object.status == Status.pending): # Version object is already up to date or pending return False Version(id=tag, parent=self.library.key, sha=sha, url=url, preview=preview).put() task_url = util.ingest_version_task(self.scope, self.package, tag) util.new_task(task_url, target='manage', transactional=True) self.trigger_analysis(tag, sha, transactional=True) return True
def trigger_version_ingestion(self, tag, sha, url=None, preview=False): version_object = Version.get_by_id(tag, parent=self.library.key) if version_object is not None and (version_object.status == Status.ready or version_object.status == Status.pending): # Version object is already up to date or pending return False Version(id=tag, parent=self.library.key, sha=sha, url=url, preview=preview).put() task_url = util.ingest_version_task(self.owner, self.repo, tag) util.new_task(task_url, target='manage', transactional=True) self.trigger_analysis(tag, sha, transactional=True) return True
def test_ingest_version_falls_back(self): library = Library(id='org/repo', metadata='{"full_name": "NSS Bob", "stargazers_count": 420, "subscribers_count": 419, "forks": 418, "updated_at": "2011-8-10T13:47:12Z"}', contributor_count=417) library.tags = json.dumps(["v1.0.0", "v1.0.1"]) library.put() version1 = Version(parent=library.key, id='v1.0.0', sha='lol') version1.put() version2 = Version(parent=library.key, id='v1.0.1', sha='lol') version2.put() self.respond_to('https://raw.githubusercontent.com/org/repo/v1.0.1/README.md', chr(248)) tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 0) self.app.get(util.ingest_version_task('org', 'repo', 'v1.0.1'), params={'latestVersion': 'True'}) version2 = version2.key.get() self.assertEqual(version2.error, "Could not store README.md as a utf-8 string") tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].url, util.ingest_version_task('org', 'repo', 'v1.0.0') + '?latestVersion=True')
def error(error_string): logging.info('ingestion error "%s" for %s/%s/%s', error_string, owner, repo, version) ver = key.get() ver.error = error_string ver.put() if generate_search: library = key.parent().get() versions = json.loads(library.tags) idx = versions.index(version) if idx > 0: logging.info('ingestion for %s/%s falling back to version %s', owner, repo, versions[idx - 1]) task_url = util.ingest_version_task(owner, repo, versions[idx - 1]) util.new_task(task_url, {'latestVersion':'True'}) self.response.set_status(200)
def test_ingest_version_pages(self): library_key = Library(id='org/repo', metadata='{"full_name": "NSS Bob", "stargazers_count": 420, "subscribers_count": 419, "forks": 418, "updated_at": "2011-8-10T13:47:12Z"}').put() Version(id='v1.0.0', parent=library_key, sha='sha').put() self.respond_to_github(r'https://api.github.com/repos/org/repo/readme\?ref=sha', '{"content":"%s"}' % b64encode('README')) self.respond_to('https://raw.githubusercontent.com/org/repo/sha/bower.json', '{"pages":{"custom doc":"doc.md"}}') self.respond_to_github('https://api.github.com/markdown', '<html>README</html>') self.respond_to_github(r'https://api.github.com/repos/org/repo/contents/doc.md\?ref=sha', '{"content":"%s", "type":"file"}' % b64encode('doc.md')) self.respond_to_github('https://api.github.com/markdown', '<html>doc.md</html>') response = self.app.get(util.ingest_version_task('org', 'repo', 'v1.0.0'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) page = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'page-doc.md').get() self.assertEqual(page.content, '<html>doc.md</html>')
def get(self, owner, repo, kind): commit = self.request.get('commit', None) url = self.request.get('url', None) assert commit is not None and url is not None self.init_library(owner, repo, kind) is_new = self.library.metadata is None and self.library.error is None if is_new: self.library.ingest_versions = False self.library_dirty = True self.update_metadata() version = Version(parent=self.library.key, id=commit, sha=commit, url=url) version.put() task_url = util.ingest_version_task(owner, repo, commit) util.new_task(task_url) self.commit()
def test_ingest_commit(self): self.respond_to_github('https://api.github.com/repos/org/repo', 'metadata bits') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.app.get(util.ingest_commit_task('org', 'repo', 'element'), params={'commit': 'commit-sha', 'url': 'url'}) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertFalse(library.ingest_versions) version = Version.get_by_id(parent=library.key, id='commit-sha') self.assertEqual(version.sha, 'commit-sha') self.assertEqual(version.url, 'url') tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].url, util.ingest_version_task('org', 'repo', 'commit-sha'))
def test_subsequent_update_triggers_version_ingestion(self): library_key = Library(id='org/repo', spdx_identifier='MIT', tag_map='{"v1.0.0":"new","v2.0.0":"old","v3.0.0":"new"}').put() Version(id='v0.1.0', parent=library_key, sha="old", status=Status.ready).put() Version(id='v1.0.0', parent=library_key, sha="old", status=Status.ready).put() Version(id='v2.0.0', parent=library_key, sha="old", status=Status.ready).put() VersionCache.update(library_key) self.respond_to_github('https://api.github.com/repos/org/repo', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/contributors', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/tags', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') response = self.app.get(util.update_library_task('org/repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'v3.0.0'), util.ingest_version_task('org', 'repo', 'v3.0.0'), ], [task.url for task in tasks])
def test_ingest_version(self): library = Library(id='org/repo', metadata='{"full_name": "NSS Bob", "stargazers_count": 420, "subscribers_count": 419, "forks": 418, "updated_at": "2011-8-10T13:47:12Z"}', contributor_count=417) version = Version(parent=library.key, id='v1.0.0', sha='lol') library.put() version.put() self.respond_to('https://raw.githubusercontent.com/org/repo/v1.0.0/README.md', 'README') self.respond_to('https://raw.githubusercontent.com/org/repo/v1.0.0/bower.json', '{}') self.respond_to_github('https://api.github.com/markdown', '<html>README</html>') response = self.app.get(util.ingest_version_task('org', 'repo', 'v1.0.0')) self.assertEqual(response.status_int, 200) version = version.key.get() self.assertIsNone(version.error) readme = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme').get() self.assertEqual(readme.content, 'README') readme_html = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme.html').get() self.assertEqual(readme_html.content, '<html>README</html>') bower = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'bower').get() self.assertEqual(bower.content, '{}')
def ingest_versions(self): if not self.library.ingest_versions: return response = self.github.github_resource('repos', self.owner, self.repo, 'git/refs/tags', etag=self.library.tags_etag) if response.status_code != 304: if response.status_code != 200: return self.error('repo tags not found (%d)' % response.status_code) self.library.tags = response.content self.library.tags_etag = response.headers.get('ETag', None) self.library_dirty = True data = json.loads(response.content) if not isinstance(data, object): data = [] data = [d for d in data if versiontag.is_valid(d['ref'][10:])] if len(data) is 0: return self.error('repo contains no valid version tags') data.sort(lambda a, b: versiontag.compare(a['ref'][10:], b['ref'][10:])) data_refs = [d['ref'][10:] for d in data] self.library.tags = json.dumps(data_refs) self.library.tags_etag = response.headers.get('ETag', None) data.reverse() is_newest = True for version in data: tag = version['ref'][10:] if not versiontag.is_valid(tag): continue sha = version['object']['sha'] params = {} if is_newest: params["latestVersion"] = "True" is_newest = False version_object = Version(parent=self.library.key, id=tag, sha=sha) version_object.put() task_url = util.ingest_version_task(self.owner, self.repo, tag) util.new_task(task_url, params) util.publish_analysis_request(self.owner, self.repo, tag)
def get(self, owner, repo, kind): if not (kind == 'element' or kind == 'collection'): self.response.set_status(400) return owner = owner.lower() repo = repo.lower() library = Library.maybe_create_with_kind(owner, repo, kind) library_dirty = False if library.error is not None: library_dirty = True library.error = None logging.info('created library') github = quota.GitHub() if not github.reserve(3): self.response.set_status(500) return response = github.github_resource('repos', owner, repo, etag=library.metadata_etag) if response.status_code != 304: if response.status_code == 200: library.metadata = response.content library.metadata_etag = response.headers.get('ETag', None) library_dirty = True else: library.error = 'repo metadata not found (%d)' % response.status_code github.release() library.put() return response = github.github_resource('repos', owner, repo, 'contributors', etag=library.contributors_etag) if response.status_code != 304: if response.status_code == 200: library.contributors = response.content library.contributors_etag = response.headers.get('ETag', None) library.contributor_count = len(json.loads(response.content)) library_dirty = True else: library.error = 'repo contributors not found (%d)' % response.status_code github.release() library.put() return response = github.github_resource('repos', owner, repo, 'git/refs/tags', etag=library.tags_etag) if response.status_code != 304: if response.status_code == 200: library.tags = response.content library.tags_etag = response.headers.get('ETag', None) library_dirty = True data = json.loads(response.content) if not isinstance(data, object): library.error = 'repo contains no valid version tags' github.release() library.put() return for version in data: tag = version['ref'][10:] if not versiontag.is_valid(tag): continue sha = version['object']['sha'] version_object = Version(parent=library.key, id=tag, sha=sha) version_object.put() task_url = util.ingest_version_task(owner, repo, tag) util.new_task(task_url) util.publish_analysis_request(owner, repo, tag) else: library.error = 'repo tags not found (%d)' % response.status_code github.release() library.put() return if library_dirty: library.put() github.release()