def analyses(app): e1 = Ecosystem(name='npm', backend=EcosystemBackend.npm) p1 = Package(ecosystem=e1, name='arrify') v1 = Version(package=p1, identifier='1.0.1') model1 = Analysis(version=v1, started_at=now, finished_at=later) app.rdb.session.add(model1) e2 = Ecosystem(name='pypi', backend=EcosystemBackend.pypi) p2 = Package(ecosystem=e2, name='flexmock') v2 = Version(package=p2, identifier='0.10.1') model2 = Analysis(version=v2, started_at=later, access_count=1) app.rdb.session.add(model2) app.rdb.session.commit() worker_results2 = {'a': 'b', 'c': 'd', 'e': 'f', 'g': 'h', 'i': 'j', 'digests': {'details': [{'artifact': True, 'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}} for w, tr in worker_results2.items(): app.rdb.session.add(WorkerResult(analysis_id=model2.id, worker=w, task_result=tr)) model3 = Analysis(version=v2, started_at=later, access_count=1, audit={'audit': {'audit': 'audit', 'e': 'f', 'g': 'h'}, 'a': 'b', 'c': 'd'}) app.rdb.session.add(model3) app.rdb.session.commit() worker_results3 = {'digests': {'details': [{'artifact': True, 'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}} for w, tr in worker_results3.items(): app.rdb.session.add(WorkerResult(analysis_id=model3.id, worker=w, task_result=tr)) app.rdb.session.commit() return (model1, model2, model3)
def setup_method(self, method): rdb() self.s = create_db_scoped_session() self.en = 'foo' self.pn = 'bar' self.vi = '1.1.1' self.e = Ecosystem(name=self.en, backend=EcosystemBackend.maven) self.p = Package(ecosystem=self.e, name=self.pn) self.v = Version(package=self.p, identifier=self.vi) self.a = Analysis(version=self.v, finished_at=datetime.datetime.now()) self.a2 = Analysis(version=self.v, finished_at=datetime.datetime.now() + datetime.timedelta(seconds=10)) self.s.add(self.a) self.s.add(self.a2) self.s.commit() self.bp = BayesianPostgres( connection_string=get_postgres_connection_string())
def get(self, analysis_id): args = self._parse_args() projection = self._get_projection(args['fields']) try: result = Analysis.by_id(rdb.session, analysis_id) except NoResultFound: return {}, 404 self._inc_access_counter(result) result = do_projection(projection, result) return self._sanitize_result(result, debuginfo=args['debuginfo'])
def test_cucos_fetcher(self, rdb, npm): # create initial dataset package = Package(ecosystem=npm, name='cucos') rdb.add(package) rdb.commit() versions = { '0.5.0', '0.5.1', '0.6.0', '0.6.4', '0.7.0', '0.8.0', '0.9.0', '1.0.0', '1.0.5' } for v in versions: version = Version(package=package, identifier=v) rdb.add(version) rdb.commit() analysis = Analysis(version=version) # Fetcher only selects finished analyses analysis.finished_at = datetime.datetime.now() rdb.add(analysis) rdb.commit() f = CucosReleasesFetcher(npm, rdb) r = f.fetch_releases('cucos')[1] # make sure we fetched the same stuff we inserted assert set(r) == versions # first should be the latest assert r.pop() == '1.0.5' # try different dependency specs s = get_ecosystem_solver(npm, f) assert s.solve(['cucos ^0.5.0'])['cucos'] == '0.5.1' assert s.solve(['cucos 0.x.x'])['cucos'] == '0.9.0' assert s.solve(['cucos >1.0.0'])['cucos'] == '1.0.5' assert s.solve(['cucos ~>0.6.0'])['cucos'] == '0.6.4' # check that with `all_versions` we return all the relevant ones assert set(s.solve(['cucos >=0.6.0'], all_versions=True)['cucos']) == \ (versions - {'0.5.0', '0.5.1'})
def fill_analyses(app): ecosystems = [ Ecosystem(name='pypi', backend=EcosystemBackend.pypi, url='https://pypi.python.org/', fetch_url='https://pypi.python.org/pypi'), Ecosystem(name='npm', backend=EcosystemBackend.npm, url='https://www.npmjs.com/', fetch_url='https://registry.npmjs.org/'), Ecosystem(name='go', backend=EcosystemBackend.scm), ] packages = [ Package(name='flexmock', ecosystem=ecosystems[0]), Package(name='requests', ecosystem=ecosystems[0]), Package(name='sequence', ecosystem=ecosystems[1]), Package(name='arrify', ecosystem=ecosystems[1]), Package(name='serve-static', ecosystem=ecosystems[1]), ] versions = [ Version(identifier='0.10.1', package=packages[0]), Version(identifier='0.9.1', package=packages[0]), Version(identifier='2.0.0', package=packages[1]), Version(identifier='2.2.1', package=packages[2]), Version(identifier='1.0.1', package=packages[3]), Version(identifier='1.7.1', package=packages[4]), ] analyses = [ Analysis(version=versions[0], started_at=now), # pypi/flexmock/0.10.1 Analysis(version=versions[0], started_at=later, access_count=1), # pypi/flexmock/0.10.1 Analysis(version=versions[1], started_at=even_later), # pypi/flexmock/0.9.1 Analysis(version=versions[2], started_at=now), # pypi/requests/2.0.0 Analysis(version=versions[3], started_at=later), # npm/sequence/2.2.1 Analysis(version=versions[4], started_at=now, finished_at=later), # npm/arrify/1.0.1 Analysis(version=versions[5], started_at=now, finished_at=later, release='npm:serve-static:1.7.1'), # npm/serve-static/1.7.1 ] # worker results that correspond to analyses above worker_results = [ WorkerResult(worker='digests', analysis=analyses[1], task_result={'details': [{'artifact': True, 'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}), WorkerResult(worker='static_analysis', task_result={'details': []}, analysis=analyses[1]), WorkerResult(worker='source_licenses', task_result={'schema': {'name': 'source_licenses', 'version': '1-0-0'}}, analysis=analyses[1]) ] package_gh_usage = [ PackageGHUsage(name='arrify', count=100, ecosystem_backend='npm') ] for a in ecosystems + packages + versions + analyses + worker_results + package_gh_usage: app.rdb.session.add(a) app.rdb.session.commit() return (ecosystems, packages, versions, analyses, worker_results, package_gh_usage)
def execute(self, arguments): self._strict_assert(arguments.get('url')) self._strict_assert(arguments.get('ecosystem')) self._strict_assert(arguments.get('repo_name')) db = self.storage.session a = Analysis(started_at=datetime.datetime.now()) db.add(a) db.commit() arguments['document_id'] = a.id return arguments
def execute(self, arguments): self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('version')) self._strict_assert(arguments.get('ecosystem')) db = self.storage.session e = Ecosystem.by_name(db, arguments['ecosystem']) p = Package.get_or_create(db, ecosystem_id=e.id, name=arguments['name']) v = Version.get_or_create(db, package_id=p.id, identifier=arguments['version']) if not arguments.get('force'): # TODO: this is OK for now, but if we will scale and there will be 2+ workers running this task # they can potentially schedule two flows of a same type at the same time if db.query(Analysis).filter( Analysis.version_id == v.id).count() > 0: # we need to propagate flags that were passed to flow, but not E/P/V - this way we are sure that for # example graph import is scheduled (arguments['force_graph_sync'] == True) arguments.pop('name') arguments.pop('version') arguments.pop('ecosystem') return arguments cache_path = mkdtemp(dir=self.configuration.worker_data_dir) epv_cache = ObjectCache.get_from_dict(arguments) ecosystem = Ecosystem.by_name(db, arguments['ecosystem']) try: if not epv_cache.has_source_tarball(): _, source_tarball_path = IndianaJones.fetch_artifact( ecosystem=ecosystem, artifact=arguments['name'], version=arguments['version'], target_dir=cache_path) epv_cache.put_source_tarball(source_tarball_path) if ecosystem.is_backed_by(EcosystemBackend.maven): if not epv_cache.has_source_jar(): try: source_jar_path = self._download_source_jar( cache_path, ecosystem, arguments) epv_cache.put_source_jar(source_jar_path) except Exception as e: self.log.info( 'Failed to fetch source jar for maven artifact "{e}/{p}/{v}": {err}' .format(e=arguments.get('ecosystem'), p=arguments.get('name'), v=arguments.get('version'), err=str(e))) if not epv_cache.has_pom_xml(): pom_xml_path = self._download_pom_xml( cache_path, ecosystem, arguments) epv_cache.put_pom_xml(pom_xml_path) finally: # always clean up cache shutil.rmtree(cache_path) a = Analysis(version=v, access_count=1, started_at=datetime.datetime.now()) db.add(a) db.commit() arguments['document_id'] = a.id return arguments