def test_execute(self, tmpdir): npm = Ecosystem(name='npm', backend=EcosystemBackend.npm) flexmock(self.m.storage).should_receive('get_ecosystem').with_args( 'npm').and_return(npm) name = 'wrappy' version = '1.0.2' required = { 'homepage', 'version', 'declared_license', 'code_repository', 'bug_reporting', 'description', 'name', 'author' } IndianaJones.fetch_artifact(npm, artifact=name, version=version, target_dir=str(tmpdir)) args = {'ecosystem': npm.name, 'name': 'foo', 'version': 'bar'} flexmock(EPVCache).should_receive( 'get_extracted_source_tarball').and_return(str(tmpdir)) results = self.m.execute(arguments=args) assert results is not None assert isinstance(results, dict) details = results['details'][0] assert required.issubset(set( details.keys())) # check at least the required are there assert all([details[key] for key in list(required)]) # assert required are not None assert details['name'] == name
def test_fetch_npm_latest(tmpdir): cache_path = subprocess.check_output(["npm", "config", "get", "cache"], universal_newlines=True).strip() assert ".npm" in cache_path module_cache_path = osp.join(cache_path, NPM_MODULE_NAME) # this could go really really bad if npm returns "/" shutil.rmtree(module_cache_path, ignore_errors=True) # we don't care if it doesn't exist npm_url = "https://registry.npmjs.org/{}".format(NPM_MODULE_NAME) response = requests.get(npm_url, json=True) try: assert response.status_code == 200, response.text except AssertionError: # Let's try again, but give the remote service some time to catch a breath time.sleep(1) raise module_json = response.json() latest_version = sorted(module_json["versions"].keys()).pop() IndianaJones.fetch_artifact(npm, artifact=NPM_MODULE_NAME, target_dir=str(tmpdir)) assert len(glob.glob(osp.join(cache_path, NPM_MODULE_NAME, "*"))) == 1,\ "there should be just one version of the artifact in the NPM cache" assert osp.exists(osp.join(module_cache_path, latest_version)) assert osp.exists(osp.join(str(tmpdir), "package.tgz"))
def test_execute(self, tmpdir): IndianaJones.fetch_artifact( ecosystem=ECOSYSTEM, artifact=MODULE_NAME, version=MODULE_VERSION, target_dir=str(tmpdir)) args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value') flexmock(EPVCache).should_receive('get_extracted_source_tarball').and_return(str(tmpdir)) task = LinguistTask.create_test_instance(task_name='languages') results = task.execute(args) assert results is not None assert isinstance(results, dict) assert set(results.keys()) == {'details', 'status', 'summary'} details = results['details'] assert len(details) > 3 # tarball, setup.py, LICENSE, README, etc. for f in details: if f.get('path') and f['path'].endswith('six.py'): # {'output': {'language': 'Python', # 'lines': '869', # 'mime': 'application/x-python', # 'sloc': '869', # 'type': 'Text'}, # 'path': 'six-1.10.0/six.py', # 'type': ['Python script, ASCII text executable']}, assert set(f.keys()) == {'output', 'path', 'type'} assert set(f['output'].keys()) == {'language', 'lines', 'mime', 'sloc', 'type'} assert f['output']['language'] == 'Python' assert f['type'].pop().startswith('Python') assert results['status'] == 'success'
def test_fetch_pypi_latest(tmpdir): # stolen from internets # http://code.activestate.com/recipes/577708-check-for-package-updates-on-pypi-works-best-in-pi/ pypi_rpc = ServerProxy('https://pypi.python.org/pypi') latest_version = pypi_rpc.package_releases(PYPI_MODULE_NAME)[0] IndianaJones.fetch_artifact(pypi, artifact=PYPI_MODULE_NAME, target_dir=str(tmpdir)) assert len(os.listdir(str(tmpdir))) > 1 glob_whl_path = glob.glob(osp.join(str(tmpdir), "{}-{}*".format(PYPI_MODULE_NAME, latest_version))).pop() assert osp.exists(glob_whl_path)
def test_fetch_rubygems_latest(tmpdir): rubygems_url = "https://rubygems.org/api/v1/versions/{}/latest.json".format(RUBYGEMS_MODULE_NAME) response = requests.get(rubygems_url, json=True) try: assert response.status_code == 200, response.text except AssertionError: # Let's try again, but give the remote service some time to catch a breath time.sleep(1) raise latest_version = response.json()["version"] IndianaJones.fetch_artifact(rubygems, artifact=RUBYGEMS_MODULE_NAME, target_dir=str(tmpdir)) assert osp.exists(osp.join(str(tmpdir), "{}-{}.gem".format(RUBYGEMS_MODULE_NAME, latest_version)))
def test_execute(self, tmpdir): artifact_digest, artifact_path = IndianaJones.fetch_artifact( Ecosystem(name='pypi', backend=EcosystemBackend.pypi), artifact=PYPI_MODULE_NAME, version=PYPI_MODULE_VERSION, target_dir=str(tmpdir)) args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value') flexmock(EPVCache).should_receive( 'get_extracted_source_tarball').and_return(str(tmpdir)) flexmock(EPVCache).should_receive('get_source_tarball').and_return( artifact_path) task = DigesterTask.create_test_instance(task_name='digests') results = task.execute(arguments=args) assert results is not None assert isinstance(results, dict) assert set(results.keys()) == {'details', 'status', 'summary'} artifact_details = None for details in results['details']: assert {'sha256', 'sha1', 'md5', 'ssdeep', 'path'}.issubset(set(details.keys())) if details.get('artifact'): artifact_details = details # there are artifact details assert artifact_details is not None # the artifact digest which Indy returns is the same as the one from DigesterTask assert artifact_digest == artifact_details['sha256'] == compute_digest( artifact_path) assert artifact_details['path'] == 'six-1.0.0.tar.gz'
def test_fetch_maven_specific(tmpdir): digest, path = IndianaJones.fetch_artifact(maven, artifact=MAVEN_MODULE_NAME, version=MAVEN_MODULE_VERSION, target_dir=str(tmpdir)) _, artifactId = MAVEN_MODULE_NAME.split(':', 1) assert digest == MAVEN_MODULE_DIGEST assert osp.exists(osp.join(str(tmpdir), '{}-{}.jar'.format(artifactId, MAVEN_MODULE_VERSION)))
def test_fetch_rubygems_specific(tmpdir): digest, path = IndianaJones.fetch_artifact( rubygems, artifact=RUBYGEMS_MODULE_NAME, version=RUBYGEMS_MODULE_VERSION, target_dir=str(tmpdir)) assert digest == RUBYGEMS_MODULE_DIGEST assert osp.exists(osp.join(str(tmpdir), "{}-{}.gem".format(RUBYGEMS_MODULE_NAME, RUBYGEMS_MODULE_VERSION)))
def test_fetch_pypi_specific(tmpdir): digest, path = IndianaJones.fetch_artifact( pypi, artifact=PYPI_MODULE_NAME, version=PYPI_MODULE_VERSION, target_dir=str(tmpdir)) assert digest == PYPI_MODULE_DIGEST assert len(os.listdir(str(tmpdir))) > 1 glob_whl_path = glob.glob(osp.join(str(tmpdir), "{}-{}*".format(PYPI_MODULE_NAME, PYPI_MODULE_VERSION))).pop() assert osp.exists(glob_whl_path)
def _download_pom_xml(target, ecosystem, arguments): artifact_coords = MavenCoordinates.from_str(arguments['name']) artifact_coords.packaging = 'pom' artifact_coords.classifier = '' # pom.xml files have no classifiers IndianaJones.fetch_artifact( ecosystem=ecosystem, artifact=artifact_coords.to_str(omit_version=True), version=arguments['version'], target_dir=target) # pom has to be named precisely pom.xml, otherwise mercator's Java handler # which uses maven as subprocess won't see it pom_xml_path = os.path.join(target, 'pom.xml') os.rename( os.path.join( target, '{}-{}.pom'.format(artifact_coords.artifactId, arguments['version'])), pom_xml_path) return pom_xml_path
def test_fetch_maven_latest(tmpdir): maven_central_url = 'http://repo1.maven.org/maven2' groupId, artifactId = MAVEN_MODULE_NAME.split(':', 1) groupId = groupId.replace('.', '/') # get maven-metadata.xml from the repository url_template = '{base}/{group}/{artifact}/maven-metadata.xml'.format(base=maven_central_url, group=groupId, artifact=artifactId) meta = etree.parse(url_template) # get latest version version = meta.xpath('/metadata/versioning/latest')[0].text IndianaJones.fetch_artifact(maven, artifact=MAVEN_MODULE_NAME, version=None, target_dir=str(tmpdir)) assert osp.exists(osp.join(str(tmpdir), '{}-{}.jar'.format(artifactId, version)))
def test_fetch_npm_specific(tmpdir, package, version, digest): cache_path = subprocess.check_output(["npm", "config", "get", "cache"], universal_newlines=True).strip() assert ".npm" in cache_path package_digest, path = IndianaJones.fetch_artifact( npm, artifact=package, version=version, target_dir=tmpdir) assert len(glob.glob(osp.join(cache_path, package, "*"))) == 1,\ "there should be just one version of the artifact in the NPM cache" assert package_digest == digest assert osp.exists(path) assert osp.exists(osp.join(osp.join(cache_path, package), version)) assert osp.exists(osp.join(tmpdir, "package.tgz"))
def _download_source_jar(target, ecosystem, arguments): artifact_coords = MavenCoordinates.from_str(arguments['name']) sources_classifiers = ['sources', 'src'] if artifact_coords.classifier not in sources_classifiers: for sources_classifier in sources_classifiers: artifact_coords.classifier = sources_classifier try: _, source_jar_path = IndianaJones.fetch_artifact( ecosystem=ecosystem, artifact=artifact_coords.to_str(omit_version=True), version=arguments['version'], target_dir=target) except Exception: if sources_classifier == sources_classifiers[-1]: # fetching of all variants failed raise else: return source_jar_path
def execute(self, arguments): self._strict_assert(arguments.get('name')) self._strict_assert(arguments.get('version')) self._strict_assert(arguments.get('ecosystem')) db = self.storage.session e = Ecosystem.by_name(db, arguments['ecosystem']) p = Package.get_or_create(db, ecosystem_id=e.id, name=arguments['name']) v = Version.get_or_create(db, package_id=p.id, identifier=arguments['version']) if not arguments.get('force'): # TODO: this is OK for now, but if we will scale and there will be 2+ workers running this task # they can potentially schedule two flows of a same type at the same time if db.query(Analysis).filter( Analysis.version_id == v.id).count() > 0: # we need to propagate flags that were passed to flow, but not E/P/V - this way we are sure that for # example graph import is scheduled (arguments['force_graph_sync'] == True) arguments.pop('name') arguments.pop('version') arguments.pop('ecosystem') return arguments cache_path = mkdtemp(dir=self.configuration.worker_data_dir) epv_cache = ObjectCache.get_from_dict(arguments) ecosystem = Ecosystem.by_name(db, arguments['ecosystem']) try: if not epv_cache.has_source_tarball(): _, source_tarball_path = IndianaJones.fetch_artifact( ecosystem=ecosystem, artifact=arguments['name'], version=arguments['version'], target_dir=cache_path) epv_cache.put_source_tarball(source_tarball_path) if ecosystem.is_backed_by(EcosystemBackend.maven): if not epv_cache.has_source_jar(): try: source_jar_path = self._download_source_jar( cache_path, ecosystem, arguments) epv_cache.put_source_jar(source_jar_path) except Exception as e: self.log.info( 'Failed to fetch source jar for maven artifact "{e}/{p}/{v}": {err}' .format(e=arguments.get('ecosystem'), p=arguments.get('name'), v=arguments.get('version'), err=str(e))) if not epv_cache.has_pom_xml(): pom_xml_path = self._download_pom_xml( cache_path, ecosystem, arguments) epv_cache.put_pom_xml(pom_xml_path) finally: # always clean up cache shutil.rmtree(cache_path) a = Analysis(version=v, access_count=1, started_at=datetime.datetime.now()) db.add(a) db.commit() arguments['document_id'] = a.id return arguments