def test_from_str(self, coords, from_str, is_from_str_ok, to_str, to_str_omit_version, to_repo_url):
     from_strings = from_str if isinstance(from_str, list) else [from_str]
     for fstr in from_strings:
         if is_from_str_ok:
             assert MavenCoordinates.from_str(fstr) == coords
         else:
             with pytest.raises(ValueError):
                 MavenCoordinates.from_str(fstr)
def server_create_analysis(ecosystem,
                           package,
                           version,
                           api_flow=True,
                           force=False,
                           force_graph_sync=False):
    """Create bayesianApiFlow handling analyses for specified EPV

    :param ecosystem: ecosystem for which the flow should be run
    :param package: package for which should be flow run
    :param version: package version
    :param force: force run flow even specified EPV exists
    :param force_graph_sync: force synchronization to graph
    :return: dispatcher ID handling flow
    """
    args = {
        'ecosystem':
        ecosystem,
        'name':
        MavenCoordinates.normalize_str(package)
        if ecosystem == 'maven' else package,
        'version':
        version,
        'force':
        force,
        'force_graph_sync':
        force_graph_sync
    }

    if api_flow:
        return server_run_flow('bayesianApiFlow', args)
    else:
        return server_run_flow('bayesianFlow', args)
Beispiel #3
0
    def get(self, ecosystem, package, version):
        if ecosystem == 'maven':
            package = MavenCoordinates.normalize_str(package)
        result = get_analyses_from_graph(ecosystem, package, version)
        current_app.logger.warn("%r" % result)

        if result != None:
            # Known component for Bayesian
            return result

        if os.environ.get("INVOKE_API_WORKERS", "") == "1":
            # Enter the unknown path
            server_create_analysis(ecosystem,
                                   package,
                                   version,
                                   api_flow=True,
                                   force=False,
                                   force_graph_sync=True)
            msg = "{ecosystem} Package {package}/{version} is unavailable. The package will be available shortly,"\
                    " please retry after some time.".format(ecosystem=ecosystem, package=package, version=version)
            raise HTTPError(202, msg)
        else:
            server_create_analysis(ecosystem,
                                   package,
                                   version,
                                   api_flow=False,
                                   force=False,
                                   force_graph_sync=True)
            msg = "No data found for {ecosystem} Package {package}/{version}".format(ecosystem=ecosystem,\
                    package=package, version=version)
            raise HTTPError(404, msg)
Beispiel #4
0
 def get(self, ecosystem, package):
     args = pagination_parser.parse_args()
     package = urllib.parse.unquote(package)
     if ecosystem == 'maven':
         package = MavenCoordinates.normalize_str(package)
     package_found = rdb.session.query(Package).\
         join(Ecosystem).\
         filter(Ecosystem.name == ecosystem, Package.name == package).\
         count()
     if package_found == 0:
         raise HTTPError(404,
                         error="Package '{e}/{p}' not tracked".format(
                             p=package, e=ecosystem))
     query = rdb.session.query(Version).\
         join(Package).join(Ecosystem).\
         filter(Ecosystem.name == ecosystem, Package.name == package)
     count = query.count()
     versions = query.\
         filter(Ecosystem.name == ecosystem, Package.name == package).\
         order_by(Version.identifier.asc()).\
         offset(get_item_skip(args['page'], args['per_page'])).\
         limit(get_item_relative_limit(args['page'], args['per_page']))
     items = [{
         'ecosystem': ecosystem,
         'package': package,
         'version': v.identifier
     } for v in versions]
     return {TOTAL_COUNT_KEY: count, 'items': items}
def get_latest_analysis_for(ecosystem, package, version):
    """Note: has to be called inside flask request context"""
    try:
        if ecosystem == 'maven':
            package = MavenCoordinates.normalize_str(package)
        return rdb.session.query(Analysis).\
            join(Version).join(Package).join(Ecosystem).\
            filter(Ecosystem.name == ecosystem).\
            filter(Package.name == package).\
            filter(Version.identifier == version).\
            order_by(Analysis.started_at.desc()).\
            first()
    except NoResultFound:
        return None
Beispiel #6
0
    def get_analysis_count(self, ecosystem, package):
        """Get count of previously scheduled analyses for given ecosystem-package.

        :param ecosystem: str, Ecosystem name
        :param package: str, Package name
        :return: analysis count
        """
        if ecosystem == 'maven':
            package = MavenCoordinates.normalize_str(package)

        count = PostgresBase.session.query(PackageAnalysis).\
            join(Package).join(Ecosystem).\
            filter(Ecosystem.name == ecosystem).\
            filter(Package.name == package).\
            count()

        return count
    def get_analysis_by_id(self, ecosystem, package, analysis_id):
        """Get result of previously scheduled analysis for given ecosystem-package triplet by analysis ID
    
        :param ecosystem: str, Ecosystem name
        :param package: str, Package name
        :param analysis_id: str, ID of analysis
        :return: analysis result
        """
        if ecosystem == 'maven':
            package = MavenCoordinates.normalize_str(package)

        found = self.session.query(PackageAnalysis).\
            filter(Ecosystem.name == ecosystem).\
            filter(Package.name == package).\
            filter(PackageAnalysis.id == analysis_id).\
            one()

        return found
    def get_analysis_count(self, ecosystem, package, version):
        """Get count of previously scheduled analysis for given EPV triplet

        :param ecosystem: str, Ecosystem name
        :param package: str, Package name
        :param version: str, Package version
        :return: analysis count
        """
        if ecosystem == 'maven':
            package = MavenCoordinates.normalize_str(package)

        count = self.session.query(Analysis).\
            join(Version).join(Package).join(Ecosystem).\
            filter(Ecosystem.name == ecosystem).\
            filter(Package.name == package).\
            filter(Version.identifier == version).\
            count()

        return count
    def fetch_artifact(ecosystem=None,
                       artifact=None,
                       version=None,
                       target_dir='.'):
        """
        download artifact from registry and process it

        :param ecosystem:
        :param artifact:
        :param version:
        :param target_dir:
        :return: tuple: (digest, artifact_path)
        """
        parsed = urlparse(artifact)
        digest = None
        artifact_path = None

        if ecosystem.is_backed_by(EcosystemBackend.pypi):
            git = Git.create_git(target_dir)
            # NOTE: we can't download Python packages via pip, because it runs setup.py
            #  even with `pip download`. Therefore we could always get syntax errors
            #  because of older/newer syntax.
            res = requests.get(
                'https://pypi.python.org/pypi/{a}/json'.format(a=artifact))
            res.raise_for_status()
            if not version:
                version = res.json()['info']['version']
            release_files = res.json()['releases'][version]

            # sort releases by order in which we'd like to download:
            #  1) sdist
            #  2) wheels
            #  3) eggs
            #  4) anything else (creepy stuff)
            def release_key(rel):
                return {
                    'sdist': 0,
                    'bdist_wheel': 1,
                    'bdist_egg': 2
                }.get(rel['packagetype'], 3)

            release_files = list(sorted(release_files, key=release_key))
            file_url = release_files[0]['url']
            local_filename = IndianaJones.download_file(file_url, target_dir)
            artifact_path = os.path.join(target_dir, local_filename)
            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.npm):
            git = Git.create_git(target_dir)

            # $ npm config get cache
            # /root/.npm
            cache_path = TimedCommand.get_command_output(
                ['npm', 'config', 'get', 'cache'], graceful=False).pop()

            # add package to cache:
            # /root/.npm/express/
            # └── 4.13.4
            #      ├── package
            #      │   ├── History.md
            #      │   ├── index.js
            #      │   ├── lib
            #      │   ├── LICENSE
            #      │   ├── package.json
            #      │   └── Readme.md
            #      └── package.tgz
            # 3 directories, 6 files
            name_ver = artifact
            if version:
                name_ver = "{}@{}".format(artifact, version)
            # make sure the artifact is not in the cache yet
            TimedCommand.get_command_output(
                ['npm', 'cache', 'clean', artifact], graceful=False)
            logger.info("downloading npm module %s", name_ver)
            npm_command = ['npm', 'cache', 'add', name_ver]
            TimedCommand.get_command_output(npm_command, graceful=False)

            # copy tarball to workpath
            tarball_name = "package.tgz"
            glob_path = os.path.join(cache_path, artifact, "*")
            cache_abs_path = os.path.abspath(glob.glob(glob_path).pop())
            artifact_path = os.path.join(cache_abs_path, tarball_name)
            logger.debug("[cache] tarball path = %s", artifact_path)
            artifact_path = shutil.copy(artifact_path, target_dir)

            logger.debug("[workdir] tarball path = %s", artifact_path)
            # Prior to npm-2.x.x (Fedora 24)
            # npm client was repackaging modules on download. It modified file permissions inside
            # package.tgz so they matched UID/GID of a user running npm command. Therefore its
            # digest was different then of a tarball downloaded directly from registry.npmjs.org.
            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)

            # copy package/package.json over the extracted one,
            # because it contains (since npm >= 2.x.x) more information.
            npm_package_json = os.path.join(cache_abs_path, 'package',
                                            'package.json')
            shutil.copy(npm_package_json, target_dir)
            # copy package/npm-shrinkwrap.json to target_dir
            npm_shrinkwrap_json = os.path.join(target_dir, 'package',
                                               'npm-shrinkwrap.json')
            if os.path.isfile(npm_shrinkwrap_json):
                shutil.copy(npm_shrinkwrap_json, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.rubygems):
            git = Git.create_git(target_dir)
            logger.info("downloading rubygems package %s-%s", artifact,
                        version)
            version_arg = []
            if version:
                version_arg = ['--version', version]
            gem_command = ['gem', 'fetch', artifact]
            gem_command.extend(version_arg)
            with cwd(target_dir):
                TimedCommand.get_command_output(gem_command, graceful=False)

            if not version:
                # if version is None we need to glob for the version that was downloaded
                artifact_path = os.path.abspath(
                    glob.glob(os.path.join(target_dir, artifact + '*')).pop())
            else:
                artifact_path = os.path.join(
                    target_dir, '{n}-{v}.gem'.format(n=artifact, v=version))

            digest = compute_digest(artifact_path)
            Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.maven):
            git = Git.create_git(target_dir)
            artifact_coords = MavenCoordinates.from_str(artifact)
            # lxml can't handle HTTPS URLs
            maven_url = "http://repo1.maven.org/maven2/"
            if not version:
                version = mvn_find_latest_version(maven_url, artifact_coords)
            artifact_coords.version = version
            logger.info("downloading maven package %s",
                        artifact_coords.to_str())

            if not artifact_coords.is_valid():
                raise ValueError("Invalid Maven coordinates: {a}".format(
                    a=artifact_coords.to_str()))

            artifact_url = urljoin(maven_url, artifact_coords.to_repo_url())
            local_filename = IndianaJones.download_file(
                artifact_url, target_dir)
            if local_filename is None:
                raise RuntimeError("Unable to download: %s" % artifact_url)
            artifact_path = os.path.join(
                target_dir,
                os.path.split(artifact_coords.to_repo_url())[1])
            digest = compute_digest(artifact_path)
            if artifact_coords.packaging != 'pom':
                Archive.extract(artifact_path, target_dir)
            git.add_and_commit_everything()
        elif ecosystem.is_backed_by(EcosystemBackend.scm):
            git = Git.clone(artifact, target_dir)
            digest = IndianaJones.get_revision(target_dir)
            artifact_path = git.archive(artifact)
        elif parsed:
            if parsed[0] == 'git' or parsed[2].endswith('.git'):
                git = Git.clone(artifact, target_dir)
                digest = IndianaJones.get_revision(target_dir)
                artifact_path = git.archive(artifact)

        return digest, artifact_path
def _create_analysis_arguments(ecosystem, name, version):
    return {
        'ecosystem': ecosystem,
        'name': MavenCoordinates.normalize_str(name) if ecosystem == 'maven' else name,
        'version': version
    }
        def foo(x):
            s.add(x)

        original = set(range(0, 10))
        tp = ThreadPool(foo)
        for i in original:
            tp.add_task(i)
        tp.start()
        tp.join()
        assert s == original


example_coordinates = [
    # MavenCoordinates(), from_str, is_from_str_ok, to_str, to_str(omit_version=True), to_repo_url
    (MavenCoordinates('g', 'a'), 'g:a', True, 'g:a', 'g:a', None),
    (MavenCoordinates('g', 'a',
                      '1'), 'g:a:1', True, 'g:a:1', 'g:a', 'g/a/1/a-1.jar'),
    (MavenCoordinates('g', 'a', packaging='war'), 'g:a:war:', True, 'g:a:war:',
     'g:a:war:', None),
    (MavenCoordinates('g', 'a', '1',
                      packaging='war'), ['g:a:war:1', 'g:a:war::1'], True,
     'g:a:war:1', 'g:a:war:', 'g/a/1/a-1.war'),
    (MavenCoordinates('g', 'a', classifier='sources'), 'g:a::sources:', True,
     'g:a::sources:', 'g:a::sources:', None),
    (MavenCoordinates('g', 'a', '1', classifier='sources'), 'g:a::sources:1',
     True, 'g:a::sources:1', 'g:a::sources:', 'g/a/1/a-1-sources.jar'),
    (MavenCoordinates('g', 'a', packaging='war', classifier='sources'),
     'g:a:war:sources:', True, 'g:a:war:sources:', 'g:a:war:sources:', None),
    (MavenCoordinates('g', 'a', '1', packaging='war',
                      classifier='sources'), 'g:a:war:sources:1', True,
 def test_mvn_find_latest_version(self):
     repo_url = os.path.join(os.path.dirname(__file__), 'data/maven/')
     a = MavenCoordinates('org.junit', 'junit')
     latest = mvn_find_latest_version(repo_url, a)
     assert latest == '4.12'