def test_analyze_leaves_existing_content_when_reanalyzing(self): library_key = Library(id='owner/repo').put() version_key = Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put() content = Content(id='analysis', parent=version_key, status=Status.pending) content.content = 'existing data' content.status = Status.ready content.put() response = self.app.get('/task/analyze/owner/repo', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) content = Content.get_by_id('analysis', parent=version_key) self.assertEqual(content.content, 'existing data') self.assertEqual(content.status, Status.ready) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('owner', 'repo', 'v1.1.1'), ], [task.url for task in tasks])
def test_analyzer_index_empty(self): metadata = """{ "full_name": "full-name" }""" library_key = Library(id='owner/repo', metadata=metadata).put() version_key = Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put() content = Content(id='analysis', parent=version_key, status=Status.pending) data = {"analyzerData": {}} content.json = data content.status = Status.ready content.put() VersionCache.update(library_key) response = self.app.get(util.update_indexes_task('owner', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) index = search.Index('repo') document = index.get('owner/repo') self.assertIsNotNone(document) self.assertTrue(len(document.fields) > 0) elements = [field for field in document.fields if field.name == 'element'] self.assertEqual(len(elements), 0) behaviors = [field for field in document.fields if field.name == 'behavior'] self.assertEqual(len(behaviors), 0)
def test_compressed(self): library_key = Library(id='owner/repo').put() version_key = Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put() content = Content(id='analysis', parent=version_key, status=Status.pending) content.json = dict({"analyzerData": "some data"}) content.status = Status.ready content.put() response = self.app.get('/api/docs/owner/repo/v1.1.1?use_analyzer_data') self.assertEqual(response.status_int, 200) self.assertEqual(json.loads(response.normal_body).get('analysis'), "some data")
def test_analyze_resets_error_content_when_reanalyzing(self): library_key = Library(id='owner/repo').put() version_key = Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put() content = Content(id='analysis', parent=version_key, status=Status.pending) content.status = Status.error content.put() response = self.app.get('/task/analyze/owner/repo', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) content = Content.get_by_id('analysis', parent=version_key) self.assertEqual(content.status, Status.pending) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('owner', 'repo', 'v1.1.1'), ], [task.url for task in tasks])
def test_compressed(self): library_key = Library(id='owner/repo').put() version_key = Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put() content = Content(id='analysis', parent=version_key, status=Status.pending) content.json = dict({"analyzerData": "some data"}) content.status = Status.ready content.put() response = self.app.get( '/api/docs/owner/repo/v1.1.1?use_analyzer_data') self.assertEqual(response.status_int, 200) self.assertEqual( json.loads(response.normal_body).get('analysis'), "some data")
def post(self): message_json = json.loads(urllib.unquote(self.request.body).rstrip('=')) message = message_json['message'] data = base64.b64decode(str(message['data'])) attributes = message['attributes'] owner = attributes['owner'] repo = attributes['repo'] version = attributes['version'] logging.info('Ingesting analysis data %s/%s/%s', owner, repo, version) parent = Version.get_by_id(version, parent=ndb.Key(Library, '%s/%s' % (owner, repo))) # Don't accept the analysis data unless the version still exists in the datastore if parent is not None: content = Content(parent=parent.key, id='analysis', content=data) try: content.put() # TODO: Which exception is this for? # pylint: disable=bare-except except: logging.error(sys.exc_info()[0]) self.response.set_status(200)
def get(self, owner, repo, version): logging.info('ingesting version %s/%s/%s', owner, repo, version) github = quota.GitHub() if not github.reserve(1): self.response.set_status(500) return key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, version) response = urlfetch.fetch(util.content_url(owner, repo, version, 'README.md')) readme = response.content try: content = Content(parent=key, id='readme', content=readme) content.etag = response.headers.get('ETag', None) content.put() except db.BadValueError: ver = key.get() ver.error = "Could not store README.md as a utf-8 string" ver.put() self.response.set_status(200) return response = github.markdown(readme) content = Content(parent=key, id='readme.html', content=response.content) content.put() response = urlfetch.fetch(util.content_url(owner, repo, version, 'bower.json')) try: json.loads(response.content) except ValueError: ver = key.get() ver.error = "This version has a missing or broken bower.json" ver.put() self.response.set_status(200) return content = Content(parent=key, id='bower', content=response.content) content.etag = response.headers.get('ETag', None) content.put() versions = Library.versions_for_key(key.parent()) if versions[-1] == version: library = key.parent().get() if library.kind == "collection": task_url = util.ingest_dependencies_task(owner, repo, version) util.new_task(task_url) bower = json.loads(response.content) metadata = json.loads(library.metadata) logging.info('adding search index for %s', version) description = bower.get("description", metadata.get("description", "")) document = search.Document(doc_id='%s/%s' % (owner, repo), fields=[ search.AtomField(name='full_name', value=metadata['full_name']), search.TextField(name='owner', value=owner), search.TextField(name='repo', value=repo), search.TextField(name='version', value=version), search.TextField(name='repoparts', value=' '.join(repo.split('-'))), search.TextField(name='description', value=description), search.TextField(name='keywords', value=' '.join(bower.get('keywords', []))), search.NumberField(name='stars', value=metadata.get('stargazers_count')), search.NumberField(name='subscribers', value=metadata.get('subscribers_count')), search.NumberField(name='forks', value=metadata.get('forks')), search.NumberField(name='contributors', value=library.contributor_count), search.DateField(name='updated_at', value=datetime.datetime.strptime(metadata.get('updated_at'), TIME_FORMAT)) ]) index = search.Index('repo') index.put(document) self.response.set_status(200)