def analyses(app):
    e1 = Ecosystem(name='npm', backend=EcosystemBackend.npm)
    p1 = Package(ecosystem=e1, name='arrify')
    v1 = Version(package=p1, identifier='1.0.1')
    model1 = Analysis(version=v1, started_at=now, finished_at=later)
    app.rdb.session.add(model1)

    e2 = Ecosystem(name='pypi', backend=EcosystemBackend.pypi)
    p2 = Package(ecosystem=e2, name='flexmock')
    v2 = Version(package=p2, identifier='0.10.1')
    model2 = Analysis(version=v2, started_at=later, access_count=1)
    app.rdb.session.add(model2)
    app.rdb.session.commit()

    worker_results2 = {'a': 'b', 'c': 'd', 'e': 'f', 'g': 'h', 'i': 'j',
                       'digests': {'details':
                                   [{'artifact': True,
                                     'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}}
    for w, tr in worker_results2.items():
        app.rdb.session.add(WorkerResult(analysis_id=model2.id, worker=w, task_result=tr))


    model3 = Analysis(version=v2, started_at=later, access_count=1,
                      audit={'audit': {'audit': 'audit', 'e': 'f', 'g': 'h'}, 'a': 'b', 'c': 'd'})
    app.rdb.session.add(model3)
    app.rdb.session.commit()
    worker_results3 = {'digests': {'details':
                                   [{'artifact': True,
                                     'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}}
    for w, tr in worker_results3.items():
        app.rdb.session.add(WorkerResult(analysis_id=model3.id, worker=w, task_result=tr))
    app.rdb.session.commit()
    return (model1, model2, model3)
    def setup_method(self, method):
        rdb()
        self.s = create_db_scoped_session()
        self.en = 'foo'
        self.pn = 'bar'
        self.vi = '1.1.1'
        self.e = Ecosystem(name=self.en, backend=EcosystemBackend.maven)
        self.p = Package(ecosystem=self.e, name=self.pn)
        self.v = Version(package=self.p, identifier=self.vi)
        self.a = Analysis(version=self.v, finished_at=datetime.datetime.now())
        self.a2 = Analysis(version=self.v,
                           finished_at=datetime.datetime.now() +
                           datetime.timedelta(seconds=10))
        self.s.add(self.a)
        self.s.add(self.a2)
        self.s.commit()

        self.bp = BayesianPostgres(
            connection_string=get_postgres_connection_string())
Exemple #3
0
    def get(self, analysis_id):
        args = self._parse_args()

        projection = self._get_projection(args['fields'])
        try:
            result = Analysis.by_id(rdb.session, analysis_id)
        except NoResultFound:
            return {}, 404

        self._inc_access_counter(result)
        result = do_projection(projection, result)
        return self._sanitize_result(result, debuginfo=args['debuginfo'])
Exemple #4
0
    def test_cucos_fetcher(self, rdb, npm):
        # create initial dataset
        package = Package(ecosystem=npm, name='cucos')
        rdb.add(package)
        rdb.commit()
        versions = {
            '0.5.0', '0.5.1', '0.6.0', '0.6.4', '0.7.0', '0.8.0', '0.9.0',
            '1.0.0', '1.0.5'
        }
        for v in versions:
            version = Version(package=package, identifier=v)
            rdb.add(version)
            rdb.commit()
            analysis = Analysis(version=version)
            # Fetcher only selects finished analyses
            analysis.finished_at = datetime.datetime.now()
            rdb.add(analysis)
            rdb.commit()

        f = CucosReleasesFetcher(npm, rdb)

        r = f.fetch_releases('cucos')[1]

        # make sure we fetched the same stuff we inserted
        assert set(r) == versions

        # first should be the latest
        assert r.pop() == '1.0.5'

        # try different dependency specs
        s = get_ecosystem_solver(npm, f)
        assert s.solve(['cucos ^0.5.0'])['cucos'] == '0.5.1'
        assert s.solve(['cucos 0.x.x'])['cucos'] == '0.9.0'
        assert s.solve(['cucos >1.0.0'])['cucos'] == '1.0.5'
        assert s.solve(['cucos ~>0.6.0'])['cucos'] == '0.6.4'

        # check that with `all_versions` we return all the relevant ones
        assert set(s.solve(['cucos >=0.6.0'], all_versions=True)['cucos']) == \
            (versions - {'0.5.0', '0.5.1'})
def fill_analyses(app):
    ecosystems = [
        Ecosystem(name='pypi', backend=EcosystemBackend.pypi, url='https://pypi.python.org/',
                  fetch_url='https://pypi.python.org/pypi'),
        Ecosystem(name='npm', backend=EcosystemBackend.npm, url='https://www.npmjs.com/',
                  fetch_url='https://registry.npmjs.org/'),
        Ecosystem(name='go', backend=EcosystemBackend.scm),
    ]

    packages = [
        Package(name='flexmock', ecosystem=ecosystems[0]),
        Package(name='requests', ecosystem=ecosystems[0]),
        Package(name='sequence', ecosystem=ecosystems[1]),
        Package(name='arrify', ecosystem=ecosystems[1]),
        Package(name='serve-static', ecosystem=ecosystems[1]),
    ]

    versions = [
        Version(identifier='0.10.1', package=packages[0]),
        Version(identifier='0.9.1', package=packages[0]),
        Version(identifier='2.0.0', package=packages[1]),
        Version(identifier='2.2.1', package=packages[2]),
        Version(identifier='1.0.1', package=packages[3]),
        Version(identifier='1.7.1', package=packages[4]),
    ]

    analyses = [
        Analysis(version=versions[0], started_at=now),                    # pypi/flexmock/0.10.1
        Analysis(version=versions[0], started_at=later, access_count=1),  # pypi/flexmock/0.10.1
        Analysis(version=versions[1], started_at=even_later),             # pypi/flexmock/0.9.1
        Analysis(version=versions[2], started_at=now),                    # pypi/requests/2.0.0
        Analysis(version=versions[3], started_at=later),                  # npm/sequence/2.2.1
        Analysis(version=versions[4], started_at=now, finished_at=later), # npm/arrify/1.0.1
        Analysis(version=versions[5], started_at=now, finished_at=later,
                 release='npm:serve-static:1.7.1'),                      # npm/serve-static/1.7.1
    ]
    # worker results that correspond to analyses above
    worker_results = [
        WorkerResult(worker='digests', analysis=analyses[1],
                    task_result={'details': [{'artifact': True,
                                                'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}),
        WorkerResult(worker='static_analysis', task_result={'details': []}, analysis=analyses[1]),
        WorkerResult(worker='source_licenses',
                     task_result={'schema': {'name': 'source_licenses', 'version': '1-0-0'}},
                     analysis=analyses[1])
    ]
    package_gh_usage = [
        PackageGHUsage(name='arrify', count=100, ecosystem_backend='npm')
    ]
    for a in ecosystems + packages + versions + analyses + worker_results + package_gh_usage:
        app.rdb.session.add(a)
        app.rdb.session.commit()

    return (ecosystems, packages, versions, analyses, worker_results, package_gh_usage)
    def execute(self, arguments):
        self._strict_assert(arguments.get('url'))
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('repo_name'))

        db = self.storage.session

        a = Analysis(started_at=datetime.datetime.now())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id
        return arguments
    def execute(self, arguments):
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))
        self._strict_assert(arguments.get('ecosystem'))

        db = self.storage.session
        e = Ecosystem.by_name(db, arguments['ecosystem'])
        p = Package.get_or_create(db,
                                  ecosystem_id=e.id,
                                  name=arguments['name'])
        v = Version.get_or_create(db,
                                  package_id=p.id,
                                  identifier=arguments['version'])

        if not arguments.get('force'):
            # TODO: this is OK for now, but if we will scale and there will be 2+ workers running this task
            # they can potentially schedule two flows of a same type at the same time
            if db.query(Analysis).filter(
                    Analysis.version_id == v.id).count() > 0:
                # we need to propagate flags that were passed to flow, but not E/P/V - this way we are sure that for
                # example graph import is scheduled (arguments['force_graph_sync'] == True)
                arguments.pop('name')
                arguments.pop('version')
                arguments.pop('ecosystem')
                return arguments

        cache_path = mkdtemp(dir=self.configuration.worker_data_dir)
        epv_cache = ObjectCache.get_from_dict(arguments)
        ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])

        try:
            if not epv_cache.has_source_tarball():
                _, source_tarball_path = IndianaJones.fetch_artifact(
                    ecosystem=ecosystem,
                    artifact=arguments['name'],
                    version=arguments['version'],
                    target_dir=cache_path)
                epv_cache.put_source_tarball(source_tarball_path)

            if ecosystem.is_backed_by(EcosystemBackend.maven):
                if not epv_cache.has_source_jar():
                    try:
                        source_jar_path = self._download_source_jar(
                            cache_path, ecosystem, arguments)
                        epv_cache.put_source_jar(source_jar_path)
                    except Exception as e:
                        self.log.info(
                            'Failed to fetch source jar for maven artifact "{e}/{p}/{v}": {err}'
                            .format(e=arguments.get('ecosystem'),
                                    p=arguments.get('name'),
                                    v=arguments.get('version'),
                                    err=str(e)))

                if not epv_cache.has_pom_xml():
                    pom_xml_path = self._download_pom_xml(
                        cache_path, ecosystem, arguments)
                    epv_cache.put_pom_xml(pom_xml_path)
        finally:
            # always clean up cache
            shutil.rmtree(cache_path)

        a = Analysis(version=v,
                     access_count=1,
                     started_at=datetime.datetime.now())
        db.add(a)
        db.commit()

        arguments['document_id'] = a.id
        return arguments