コード例 #1
0
def analyses(app):
    """Prepare the known set of data used by tests."""
    e1 = Ecosystem(name='npm', backend=EcosystemBackend.npm)
    p1 = Package(ecosystem=e1, name='arrify')
    v1 = Version(package=p1, identifier='1.0.1')
    model1 = Analysis(version=v1, started_at=now, finished_at=later)
    app.rdb.session.add(model1)

    e2 = Ecosystem(name='pypi', backend=EcosystemBackend.pypi)
    p2 = Package(ecosystem=e2, name='flexmock')
    v2 = Version(package=p2, identifier='0.10.1')
    model2 = Analysis(version=v2, started_at=later, access_count=1)
    app.rdb.session.add(model2)
    app.rdb.session.commit()

    worker_results2 = {'a': 'b', 'c': 'd', 'e': 'f', 'g': 'h', 'i': 'j',
                       'digests': {'details':
                                   [{'artifact': True,
                                     'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}}
    for w, tr in worker_results2.items():
        app.rdb.session.add(WorkerResult(analysis_id=model2.id, worker=w, task_result=tr))

    model3 = Analysis(version=v2, started_at=later, access_count=1,
                      audit={'audit': {'audit': 'audit', 'e': 'f', 'g': 'h'}, 'a': 'b', 'c': 'd'})
    app.rdb.session.add(model3)
    app.rdb.session.commit()
    worker_results3 = {'digests': {'details':
                                   [{'artifact': True,
                                     'sha1': '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}}
    for w, tr in worker_results3.items():
        app.rdb.session.add(WorkerResult(analysis_id=model3.id, worker=w, task_result=tr))
    app.rdb.session.commit()
    return (model1, model2, model3)
コード例 #2
0
def fill_analyses(app):
    """Prepare static data used by unit tests."""
    # TODO can not find any usage of this function
    ecosystems = [
        Ecosystem(name='pypi', backend=EcosystemBackend.pypi, url='https://pypi.python.org/',
                  fetch_url='https://pypi.python.org/pypi'),
        Ecosystem(name='npm', backend=EcosystemBackend.npm, url='https://www.npmjs.com/',
                  fetch_url='https://registry.npmjs.org/'),
        Ecosystem(name='go', backend=EcosystemBackend.scm),
    ]

    packages = [
        Package(name='flexmock', ecosystem=ecosystems[0]),
        Package(name='requests', ecosystem=ecosystems[0]),
        Package(name='sequence', ecosystem=ecosystems[1]),
        Package(name='arrify', ecosystem=ecosystems[1]),
        Package(name='serve-static', ecosystem=ecosystems[1]),
    ]

    versions = [
        Version(identifier='0.10.1', package=packages[0]),
        Version(identifier='0.9.1', package=packages[0]),
        Version(identifier='2.0.0', package=packages[1]),
        Version(identifier='2.2.1', package=packages[2]),
        Version(identifier='1.0.1', package=packages[3]),
        Version(identifier='1.7.1', package=packages[4]),
    ]

    analyses = [
        Analysis(version=versions[0], started_at=now),                     # pypi/flexmock/0.10.1
        Analysis(version=versions[0], started_at=later, access_count=1),   # pypi/flexmock/0.10.1
        Analysis(version=versions[1], started_at=even_later),              # pypi/flexmock/0.9.1
        Analysis(version=versions[2], started_at=now),                     # pypi/requests/2.0.0
        Analysis(version=versions[3], started_at=later),                   # npm/sequence/2.2.1
        Analysis(version=versions[4], started_at=now, finished_at=later),  # npm/arrify/1.0.1
        Analysis(version=versions[5], started_at=now, finished_at=later,
                 release='npm:serve-static:1.7.1'),                      # npm/serve-static/1.7.1
    ]
    # worker results that correspond to analyses above
    worker_results = [
        WorkerResult(worker='digests', analysis=analyses[1],
                     task_result={'details': [{'artifact': True,
                                               'sha1':
                                               '6be7ae55bae2372c7be490321bbe5ead278bb51b'}]}),
        WorkerResult(worker='static_analysis', task_result={'details': []}, analysis=analyses[1]),
        WorkerResult(worker='source_licenses',
                     task_result={'schema': {'name': 'source_licenses', 'version': '1-0-0'}},
                     analysis=analyses[1])
    ]

    # TODO: just a placeholder, it won't work in real tests!!!
    package_gh_usage = None

    for a in ecosystems + packages + versions + analyses + worker_results + package_gh_usage:
        app.rdb.session.add(a)
        app.rdb.session.commit()

    return (ecosystems, packages, versions, analyses, worker_results, package_gh_usage)
コード例 #3
0
def db_results():
    """Mimic SQLAlchemy query result."""
    ecosystem = Ecosystem()
    ecosystem.name = 'maven'
    package = Package()
    package.ecosystem = ecosystem
    package.name = 'net.iharder:base64'
    upstream = Upstream()
    upstream.url = 'https://github.com/omalley/base64'
    upstream.package = package

    return [upstream]
コード例 #4
0
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))
        eco = arguments['ecosystem']
        pkg = arguments['name']
        ver = arguments['version']

        try:
            cache_path = ObjectCache.get_from_dict(arguments).get_sources()
        except Exception:
            if not Ecosystem.by_name(
                    StoragePool.get_connected_storage('BayesianPostgres').
                    session, eco).is_backed_by(EcosystemBackend.maven):
                self.log.error(
                    'Could not get sources for package {e}/{p}/{v}'.format(
                        e=eco, p=pkg, v=ver))
                raise
            self.log.info('Could not get sources for maven package {p}/{v},'
                          'will try to run on binary jar'.format(p=pkg, v=ver))
            cache_path = ObjectCache.get_from_dict(
                arguments).get_extracted_source_tarball()

        result_data = self.run_scancode(cache_path)
        return result_data
コード例 #5
0
    def test_execute_with_mock_anitya(self, ecosystem, project, md5sum,
                                      dist_git):
        rdb()
        s = create_db_scoped_session()
        dummy_homepage = "http://project-homepage.com"

        dummy_response = Response()
        dummy_response.status_code = 200
        s.add(Ecosystem(name='npm', backend=EcosystemBackend.npm))
        s.commit()
        DownstreamMapCache(
        )[md5sum] = dist_git  # fill in key-value mapping in cache

        task = AnityaTask.create_test_instance(task_name='anitya')
        args = {'ecosystem': ecosystem, 'name': project}
        flexmock(task).should_receive(
            "_get_project_homepage").once().and_return(dummy_homepage)
        flexmock(task).should_receive("_get_artifact_hash").once().and_return(
            md5sum)
        flexmock(task).should_receive(
            "_create_anitya_project").once().and_return(dummy_response)
        flexmock(task).should_receive(
            "_add_downstream_mapping").once().and_return(dummy_response)

        results = task.execute(arguments=args)
        assert results is None
コード例 #6
0
    def execute(self, arguments):
        """Task to mark vulnerable packages in graph.

        :param arguments: dictionary with task arguments
        :return: None
        """
        self._strict_assert(arguments.get('ecosystem'))

        wanted_cves = set(arguments.get('cve_filter', []))
        victims_cls = VictimsDB if not wanted_cves else FilteredVictimsDB

        rdb = StoragePool.get_connected_storage('BayesianPostgres')
        ecosystem = Ecosystem.by_name(rdb.session, arguments.get('ecosystem'))

        with victims_cls.build_from_git(wanted=wanted_cves) as db:

            self.log.info('Storing the VictimsDB zip on S3')
            db.store_on_s3()

            vulnerable_packages = self.get_vulnerable_packages(db, ecosystem)
            self.create_in_graph(vulnerable_packages, ecosystem)

            self.mark_in_graph(vulnerable_packages, ecosystem)

            self.notify_gemini(vulnerable_packages, ecosystem)
コード例 #7
0
    def retrieve_bookkeeping_for_epv(self, ecosystem, package, version):
        """Retrieve BookKeeping data for the given ecosystem, package, and version.

        :param ecosystem: ecosystem for which the data should be retrieved
        :param package: package for which the data should be retrieved
        :param version: package version for which the data should be retrieved
        """
        e = Ecosystem.by_name(self.db, ecosystem)
        p = Package.by_name(self.db, package)
        v = self.db.query(Version).join(Package).join(Ecosystem). \
            filter(Package.ecosystem == e). \
            filter(Version.package == p). \
            filter(Version.identifier == version).one()

        stat = self.db.query(WorkerResult).\
            join(Analysis).join(Version).\
            filter(Analysis.version == v)
        worker_stats = []
        for worker_result in stat.all():
            entry = {"worker_name": worker_result.worker,
                     "has_error": worker_result.error,
                     "task_result": worker_result.task_result,
                     "started_at": worker_result.started_at,
                     "ended_at": worker_result.ended_at}
            worker_stats.append(entry)

        return {"ecosystem": e.name,
                "package": p.name,
                "version": v.identifier,
                "workers": worker_stats}
コード例 #8
0
    def test_execute(self, tmpdir):
        artifact_digest, artifact_path = IndianaJones.fetch_artifact(
            Ecosystem(name='pypi', backend=EcosystemBackend.pypi),
            artifact=PYPI_MODULE_NAME,
            version=PYPI_MODULE_VERSION,
            target_dir=str(tmpdir))

        args = dict.fromkeys(('ecosystem', 'name', 'version'), 'some-value')
        # flexmock(EPVCache).should_receive('get_extracted_source_tarball').and_return(str(tmpdir))
        flexmock(EPVCache).should_receive('get_source_tarball').and_return(
            artifact_path)
        task = DigesterTask.create_test_instance(task_name='digests')
        results = task.execute(arguments=args)

        assert results is not None
        assert isinstance(results, dict)
        assert set(results.keys()) == {'details', 'status', 'summary'}
        artifact_details = None
        for details in results['details']:
            assert {'sha256', 'sha1', 'md5', 'ssdeep',
                    'path'}.issubset(set(details.keys()))
            if details.get('artifact'):
                artifact_details = details
        # there are artifact details
        assert artifact_details is not None
        # the artifact digest which Indy returns is the same as the one from DigesterTask
        assert artifact_digest == artifact_details['sha256'] == compute_digest(
            artifact_path)
        assert artifact_details['path'] == 'six-1.0.0.tar.gz'
コード例 #9
0
def rubygems(rdb):
    rubygems = Ecosystem(name='rubygems',
                         backend=EcosystemBackend.rubygems,
                         fetch_url='https://rubygems.org/api/v1')
    rdb.add(rubygems)
    rdb.commit()
    return rubygems
コード例 #10
0
def nuget(rdb):
    nuget = Ecosystem(name='nuget',
                      backend=EcosystemBackend.nuget,
                      fetch_url='https://api.nuget.org/packages/')
    rdb.add(nuget)
    rdb.commit()
    return nuget
コード例 #11
0
def pypi(rdb):
    pypi = Ecosystem(name='pypi',
                     backend=EcosystemBackend.pypi,
                     fetch_url='https://pypi.python.org/pypi')
    rdb.add(pypi)
    rdb.commit()
    return pypi
コード例 #12
0
def npm(rdb):
    npm = Ecosystem(name='npm',
                    backend=EcosystemBackend.npm,
                    fetch_url='https://registry.npmjs.org/')
    rdb.add(npm)
    rdb.commit()
    return npm
コード例 #13
0
def maven(rdb):
    maven = Ecosystem(name='maven',
                      backend=EcosystemBackend.maven,
                      fetch_url='')
    rdb.add(maven)
    rdb.commit()
    return maven
コード例 #14
0
def fill_packages_for_paging(app, request):
    e = Ecosystem(name='pypi', backend=EcosystemBackend.pypi)
    app.rdb.session.add(e)
    for p in range(0, 11):
        app.rdb.session.add(Package(ecosystem=e, name=str(p)))

    app.rdb.session.commit()
コード例 #15
0
def maven(rdb):
    """Prepare database with Maven ecosystem."""
    maven = Ecosystem(name='maven', backend=EcosystemBackend.maven,
                      fetch_url='')
    rdb.add(maven)
    rdb.commit()
    return maven
コード例 #16
0
def npm(rdb):
    """Prepare database with NPM ecosystem."""
    npm = Ecosystem(name='npm', backend=EcosystemBackend.npm,
                    fetch_url='https://registry.npmjs.org/')
    rdb.add(npm)
    rdb.commit()
    return npm
コード例 #17
0
ファイル: foreach.py プロジェクト: pombredanne/worker
def iter_unknown_dependencies(storage_pool, node_args):
    """Collect unknown dependencies."""
    # Be safe here as fatal errors will cause errors in Dispatcher
    try:
        aggregated = storage_pool.get('UnknownDependencyFetcherTask')

        arguments = []
        for element in aggregated["result"]:
            epv = element.split(':')
            ecosystem = epv[0]
            if Ecosystem.by_name(
                    StoragePool.get_connected_storage(
                        'BayesianPostgres').session,
                    ecosystem).is_backed_by(EcosystemBackend.maven):
                name = '{}:{}'.format(epv[1], epv[2])
                version = epv[3]
            else:
                name = epv[1]
                version = epv[2]
            analysis_arguments = _create_analysis_arguments(
                ecosystem, name, version)
            # TODO: Remove force=True once data-importer is smart enough
            # to ingest missing packages from s3.
            analysis_arguments.update({"recursive_limit": 0, "force": True})
            arguments.append(analysis_arguments)

        print('Arguments appended: %s' %
              ', '.join(str(item) for item in arguments))
        logger.info("Arguments for next flows: %s" % str(arguments))
        return arguments
    except Exception as e:
        logger.exception(
            "Failed to collect unknown dependencies due to {}".format(e))
        return []
コード例 #18
0
def retrieve_bookkeeping_for_ecosystem(ecosystem):
    """Retrieve BookKeeping data for given Ecosystem.

    :param ecosystem: ecosystem for which the data should be retrieved
    """
    rdb = StoragePool.get_connected_storage('BayesianPostgres')
    db = rdb.session
    try:
        e = Ecosystem.by_name(db, ecosystem)
        package_count = _count(
            db,
            db.query(Package).filter(Package.ecosystem == e))
        pv_count = _count(
            db,
            db.query(Version).join(Package).filter(Package.ecosystem == e))
        result = {
            "summary": {
                "ecosystem": e.name,
                "package_count": package_count,
                "package_version_count": pv_count
            }
        }
    except NoResultFound as e:
        result = {"error": "No such ecosystem: %s" % ecosystem}
    except SQLAlchemyError as e:
        result = {
            "error":
            "Error encountered while fetching data. Please check logs."
        }

    return result
コード例 #19
0
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))

        rdb_session = StoragePool.get_connected_storage(
            'BayesianPostgres').session

        name = arguments['name']
        ecosystem = arguments['ecosystem']
        if ecosystem == 'go':
            name = quote(name, safe='')

        project_url = self.configuration.libraries_io_project_url(
            Ecosystem.by_name(rdb_session, ecosystem), name)
        project = get_response(project_url)
        versions = project['versions']
        details = {
            'dependent_repositories': {
                'count': project['dependent_repos_count']
            },
            'dependents': {
                'count': project['dependents_count']
            },
            'releases': {
                'count': len(versions),
                'recent': self.recent_releases(versions)
            }
        }

        return {'status': 'success', 'summary': [], 'details': details}
コード例 #20
0
def rubygems(rdb):
    """Prepare database with Ruby gems ecosystem."""
    rubygems = Ecosystem(name='rubygems', backend=EcosystemBackend.rubygems,
                         fetch_url='https://rubygems.org/api/v1')
    rdb.add(rubygems)
    rdb.commit()
    return rubygems
コード例 #21
0
def nuget(rdb):
    """Prepare database with Nuget ecosystem."""
    nuget = Ecosystem(name='nuget', backend=EcosystemBackend.nuget,
                      fetch_url='https://api.nuget.org/packages/')
    rdb.add(nuget)
    rdb.commit()
    return nuget
コード例 #22
0
def pypi(rdb):
    """Prepare database with Pypi ecosystem."""
    pypi = Ecosystem(name='pypi', backend=EcosystemBackend.pypi,
                     fetch_url='https://pypi.python.org/pypi')
    rdb.add(pypi)
    rdb.commit()
    return pypi
コード例 #23
0
    def retrieve_bookkeeping_for_ecosystem_package(self, ecosystem, package):
        """Retrieve BookKeeping data for given Package and Ecosystem.

        :param ecosystem: ecosystem for which the data should be retrieved
        :param package: package for which the data should be retrieved
        """
        e = Ecosystem.by_name(self.db, ecosystem)
        p = Package.by_name(self.db, package)

        stat = self.db.query(PackageWorkerResult).\
            join(PackageAnalysis).\
            filter(PackageAnalysis.package == p)
        worker_stats = []
        for package_worker_result in stat.all():
            entry = {"worker_name": package_worker_result.worker,
                     "has_error": package_worker_result.error,
                     "task_result": package_worker_result.task_result,
                     "started_at": package_worker_result.started_at,
                     "ended_at": package_worker_result.ended_at}
            worker_stats.append(entry)

        version_count = self.db.query(Version).join(Package).\
            filter(Package.ecosystem == e).\
            filter(Version.package == p).count()
        p_versions = self.db.query(Version).join(Package).join(Ecosystem).\
            filter(Package.ecosystem == e).\
            filter(Version.package == p)

        return {"ecosystem": e.name,
                "package": p.name,
                "package_version_count": version_count,
                "package_level_workers": worker_stats,
                "analysed_versions": [v.identifier for v in p_versions]}
コード例 #24
0
def fill_packages_for_paging(app, request):
    """Create and store set of packages used by unit tests."""
    e = Ecosystem(name='pypi', backend=EcosystemBackend.pypi)
    app.rdb.session.add(e)
    for p in range(0, 11):
        app.rdb.session.add(Package(ecosystem=e, name=str(p)))

    app.rdb.session.commit()
コード例 #25
0
def maven(rdb):
    """Prepare database with Maven ecosystem."""
    maven = Ecosystem(name='maven',
                      backend=EcosystemBackend.maven,
                      fetch_url='https://repo.maven.apache.org/maven2/')
    rdb.add(maven)
    rdb.commit()
    return maven
コード例 #26
0
def _create_analysis_arguments(ecosystem, name, version):
    """Create arguments for analysis."""
    return {
        'ecosystem': ecosystem,
        'name': MavenCoordinates.normalize_str(name) if Ecosystem.by_name(
            StoragePool.get_connected_storage('BayesianPostgres').session,
            ecosystem).is_backed_by(
            EcosystemBackend.maven) else name,
        'version': version
    }
コード例 #27
0
def normalize_package_name(ecosystem, name):
    """Normalize package name based on ecosystem."""
    normalized_name = name
    if Ecosystem.by_name(
            StoragePool.get_connected_storage('BayesianPostgres').session,
            ecosystem).is_backed_by(EcosystemBackend.pypi):
        case_sensitivity_transform(ecosystem, name)
    elif ecosystem == 'go':
        # go package name is the host+path part of a URL, thus it can be URL encoded
        normalized_name = unquote(name)
    return normalized_name
コード例 #28
0
    def _normalize_package_name(self, node_args):
        """Normalize package name in node arguments."""
        if not node_args:
            return

        if 'name' in node_args and 'ecosystem' in node_args:
            ecosystem = Ecosystem.by_name(self.postgres.session,
                                          node_args['ecosystem'])
            node_args['name'] = normalize_package_name(
                ecosystem_backend=ecosystem.backend.name,
                name=node_args['name'])
コード例 #29
0
    def execute(self, arguments):
        """Task code.

        :param arguments: dictionary with task arguments
        :return: {}, results
        """
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('ecosystem'))

        # get rid of version if scheduled from the core analyses
        arguments.pop('version', None)
        arguments.pop('document_id', None)

        db = self.storage.session
        try:
            ecosystem = Ecosystem.by_name(db, arguments['ecosystem'])
        except NoResultFound:
            raise FatalTaskError('Unknown ecosystem: %r' %
                                 arguments['ecosystem'])
        package = Package.get_or_create(db,
                                        ecosystem_id=ecosystem.id,
                                        name=arguments['name'])
        url = self.get_upstream_url(arguments)
        upstream = self.get_upstream_entry(package, url)
        if upstream is None:
            upstream = self.add_or_update_upstream(package, url)
        arguments['url'] = upstream.url

        if not arguments.get('force'):
            # can potentially schedule two flows of a same type at the same
            # time as there is no lock, but let's say it's OK
            if upstream.updated_at is not None \
                    and datetime.datetime.utcnow() - upstream.updated_at < self._UPDATE_INTERVAL:
                self.log.info(
                    'Skipping upstream package check as data are considered as recent - '
                    'last update %s.', upstream.updated_at)
                # keep track of start, but do not schedule nothing more
                # discard changes like updates
                db.rollback()
                return arguments

        # if this fails, it's actually OK, as there could be concurrency
        package_analysis = PackageAnalysis(
            package_id=package.id,
            started_at=datetime.datetime.utcnow(),
            finished_at=None)
        db.add(package_analysis)

        # keep track of updates
        upstream.updated_at = datetime.datetime.utcnow()

        db.commit()
        arguments['document_id'] = package_analysis.id
        return arguments
コード例 #30
0
    def retrieve_bookkeeping_for_ecosystem(self, ecosystem):
        """Retrieve BookKeeping data for given Ecosystem.

        :param ecosystem: ecosystem for which the data should be retrieved
        """
        e = Ecosystem.by_name(self.db, ecosystem)
        package_count = self.db.query(Package).filter(Package.ecosystem == e).count()
        pv_count = self.db.query(Version).join(Package).filter(Package.ecosystem == e).count()
        return {"ecosystem": e.name,
                "package_count": package_count,
                "package_version_count": pv_count}