예제 #1
0
 def reset(self, revision, hard=False):
     """Run 'git reset'."""
     cmd = ["git", "reset", revision]
     if hard:
         cmd.extend(["--hard"])
     with cwd(self.repo_path):
         TimedCommand.get_command_output(cmd, graceful=False)
예제 #2
0
    def add(self, path):
        """Add path to index.

        :param path: str
        """
        with cwd(self.repo_path):
            TimedCommand.get_command_output(["git", "add", path], graceful=False)
예제 #3
0
    def clone(cls, url, path, depth=None, branch=None, single_branch=False):
        """
        clone repository provided as url to specific path

        :param url: str
        :param path: str
        :param depth: str
        :param branch: str
        :return: instance of Git()
        """
        orig_url = url
        cls.config()
        # git clone doesn't understand urls starting with: git+ssh, git+http, git+https
        url = url2git_repo(url)
        cmd = ["git", "clone", url, path]
        if depth is not None:
            cmd.extend(["--depth", depth])
        if branch is not None:
            cmd.extend(["--branch", branch])
        if single_branch:
            cmd.extend(["--single-branch"])
        try:
            TimedCommand.get_command_output(cmd, graceful=False)
        except TaskError as exc:
            raise TaskError("Unable to clone: %s" % orig_url) from exc
        return cls(path=path)
예제 #4
0
    def _generate_pom_xml(to_solve):
        """Create pom.xml with dependencies from to_solve.

        And run 'mvn versions:resolve-ranges',
        which resolves the version ranges (overwrites the pom.xml).

        :param to_solve: {"groupId:artifactId": "version-range"}
        """
        project = etree.Element('project')
        etree.SubElement(project, 'modelVersion').text = '4.0.0'
        etree.SubElement(project, 'groupId').text = 'foo.bar.baz'
        etree.SubElement(project, 'artifactId').text = 'testing'
        etree.SubElement(project, 'version').text = '1.0.0'
        dependencies = etree.SubElement(project, 'dependencies')
        for name, version_range in to_solve.items():
            group_id, artifact_id = name.rstrip(':').split(':')
            dependency = etree.SubElement(dependencies, 'dependency')
            etree.SubElement(dependency, 'groupId').text = group_id
            etree.SubElement(dependency, 'artifactId').text = artifact_id
            etree.SubElement(dependency, 'version').text = version_range
        with open('pom.xml', 'wb') as pom:
            pom.write(
                etree.tostring(project,
                               xml_declaration=True,
                               pretty_print=True))
        TimedCommand.get_command_output(['mvn', 'versions:resolve-ranges'],
                                        graceful=False)
예제 #5
0
 def zip_file(file, archive, junk_paths=False):
     command = ['zip', '-r', archive, file]
     if junk_paths:
         # Store just the name of a saved file (junk the path), not directory names.
         # By default, zip will store the full path (relative to the current directory).
         command.extend(['--junk-paths'])
     TimedCommand.get_command_output(command, graceful=False)
예제 #6
0
    def fetch_npm_artifact(name, version, target_dir):
        """Fetch npm artifact using system 'npm' tool."""
        git = Git.create_git(target_dir)

        # $ npm config get cache
        # /root/.npm
        cache_path = TimedCommand.get_command_output(
            ['npm', 'config', 'get', 'cache'], graceful=False).pop()

        # add package to cache:
        # /root/.npm/express/
        # └── 4.13.4
        #      ├── package
        #      │   ├── History.md
        #      │   ├── index.js
        #      │   ├── lib
        #      │   ├── LICENSE
        #      │   ├── package.json
        #      │   └── Readme.md
        #      └── package.tgz
        # 3 directories, 6 files
        name_ver = name
        if version:
            name_ver = "{}@{}".format(name, version)
        # make sure the artifact is not in the cache yet
        TimedCommand.get_command_output(['npm', 'cache', 'clean', name],
                                        graceful=False)
        logger.info("downloading npm module %s", name_ver)
        npm_command = ['npm', 'cache', 'add', name_ver]
        TimedCommand.get_command_output(npm_command, graceful=False)

        # copy tarball to workpath
        tarball_name = "package.tgz"
        glob_path = os.path.join(cache_path, name, "*")
        cache_abs_path = os.path.abspath(glob.glob(glob_path).pop())
        artifact_path = os.path.join(cache_abs_path, tarball_name)
        logger.debug("[cache] tarball path = %s", artifact_path)
        artifact_path = shutil.copy(artifact_path, target_dir)

        logger.debug("[workdir] tarball path = %s", artifact_path)
        # Prior to npm-2.x.x (Fedora 24)
        # npm client was repackaging modules on download. It modified file permissions inside
        # package.tgz so they matched UID/GID of a user running npm command. Therefore its
        # digest was different then of a tarball downloaded directly from registry.npmjs.org.
        digest = compute_digest(artifact_path)
        Archive.extract(artifact_path, target_dir)

        # copy package/package.json over the extracted one,
        # because it contains (since npm >= 2.x.x) more information.
        npm_package_json = os.path.join(cache_abs_path, 'package',
                                        'package.json')
        shutil.copy(npm_package_json, target_dir)
        # copy package/npm-shrinkwrap.json to target_dir
        npm_shrinkwrap_json = os.path.join(target_dir, 'package',
                                           'npm-shrinkwrap.json')
        if os.path.isfile(npm_shrinkwrap_json):
            shutil.copy(npm_shrinkwrap_json, target_dir)
        git.add_and_commit_everything()
        return digest, artifact_path
예제 #7
0
    def commit(self, message='blank'):
        """Commit git repository.

        :param message: str, commit message
        """
        # --git-dir is #$%^&&
        # http://stackoverflow.com/questions/1386291/git-git-dir-not-working-as-expected
        with cwd(self.repo_path):
            TimedCommand.get_command_output(["git", "commit", "-m", message], graceful=False)
예제 #8
0
    def create_git(cls, path):
        """Initialize new git repository at path.

        :param path: str
        :return: instance of Git()
        """
        cls.config()
        TimedCommand.get_command_output(["git", "init", path], graceful=False)
        return cls(path=path)
예제 #9
0
 def extract_zip(target, dest, mkdest=False):
     """Extract target zip archive into dest using system 'unzip' command."""
     if mkdest:
         try:
             os.mkdir(dest, mode=0o775)
         except FileExistsError:
             pass
     # -o: overwrite existing files without prompting
     TimedCommand.get_command_output(
         ['unzip', '-q', '-o', '-d', dest, target])
예제 #10
0
 def extract_zip(target, dest, mkdest=False):
     if mkdest:
         try:
             os.mkdir(dest, mode=0o775)
         except FileExistsError:
             pass
     # -o: overwrite existing files without prompting
     TimedCommand.get_command_output(
         ['unzip', '-q', '-o', '-d', dest, target])
     # Fix possibly wrong permissions in zip files that would prevent us from deleting files.
     TimedCommand.get_command_output(['chmod', '-R', 'u+rwX,g+rwX', dest])
예제 #11
0
 def update_depcheck_db_on_s3():
     """Update OWASP Dependency-check DB on S3."""
     s3 = StoragePool.get_connected_storage('S3VulnDB')
     depcheck = os.path.join(configuration.OWASP_DEP_CHECK_PATH, 'bin',
                             'dependency-check.sh')
     with tempdir() as temp_data_dir:
         s3.retrieve_depcheck_db_if_exists(temp_data_dir)
         # give DependencyCheck 25 minutes to download the DB
         TimedCommand.get_command_output(
             [depcheck, '--updateonly', '--data', temp_data_dir],
             timeout=1500)
         s3.store_depcheck_db(temp_data_dir)
예제 #12
0
    def clone(cls,
              url,
              path,
              timeout=300,
              depth=None,
              branch=None,
              single_branch=False):
        """Clone repository provided as url to specific path.

        :param url: str
        :param path: str
        :param timeout: int
        :param depth: str
        :param branch: str
        :param single_branch: bool, only checkout single branch
        :return: instance of Git()
        """
        orig_url = url
        # git clone doesn't understand urls starting with: git+ssh, git+http, git+https
        url = url2git_repo(url)

        orig_path = path
        path = Path(path)
        mode = 0
        if path.is_dir():
            mode = path.stat().st_mode

        cmd = ["git", "clone", url, orig_path]
        if depth is not None:
            cmd.extend(["--depth", depth])
        if branch is not None:
            cmd.extend(["--branch", branch])
        if single_branch:
            cmd.extend(["--single-branch"])
        try:
            cls.config()
            TimedCommand.get_command_output(cmd,
                                            graceful=False,
                                            timeout=timeout)
        except TaskError as exc:
            if not path.is_dir() and mode:
                # 'git clone repo dir/' deletes (no way to turn this off) dir/ if cloning fails.
                # This might confuse caller of this method, so we recreate the dir on error here.
                try:
                    path.mkdir(mode)
                except OSError:
                    logger.error("Unable to re-create dir: %s", str(path))
            raise TaskError("Unable to clone: %s" % orig_url) from exc
        return cls(path=orig_path)
예제 #13
0
 def fetch_scm_artifact(name, version, target_dir):
     env = dict(os.environ)
     env['GOPATH'] = target_dir
     TimedCommand.get_command_output(['go', 'get', '-d', name],
                                     timeout=300,
                                     env=env,
                                     graceful=True)
     package_dir = os.path.join(target_dir, 'src', name)
     with cwd(package_dir):
         git = Git(package_dir)
         git.reset(version, hard=True)
         artifact_filename = git.archive(version)
         artifact_path = os.path.join(package_dir, artifact_filename)
         digest = compute_digest(artifact_path)
         return digest, artifact_path
예제 #14
0
    def add_and_commit_everything(self, message="blank"):
        """Add and commit.

        git add .
        git commit -m everything

        :param message: str, commit message
        """
        # first we need to remove any .git dirs/files from the archive, they could contain
        #  directions that would break adding (e.g. Flask 0.10 contains .git with gitpath
        #  pointing to Mitsuhiko's home dir)
        TimedCommand.get_command_output(['find', self.repo_path, '-mindepth', '2', '-name', '.git',
                                        '-exec', 'rm', '-rf', '{}', ';'])
        # add everything
        self.add(self.repo_path)
        self.commit(message=message)
예제 #15
0
    def _run_victims_cve_db_cli(self, arguments):
        """Run Victims CVE DB CLI."""
        s3 = StoragePool.get_connected_storage('S3VulnDB')
        output = []

        with TemporaryDirectory() as temp_victims_db_dir:
            if not s3.retrieve_victims_db_if_exists(temp_victims_db_dir):
                self.log.debug('No Victims CVE DB found on S3, cloning from github')
                self.update_victims_cve_db_on_s3()
                s3.retrieve_victims_db_if_exists(temp_victims_db_dir)

            try:
                cli = os.path.join(temp_victims_db_dir, 'victims-cve-db-cli.py')
                command = [cli, 'search',
                           '--ecosystem', 'java',
                           '--name', arguments['name'],
                           '--version', arguments['version']]
                output = TimedCommand.get_command_output(command,
                                                         graceful=False,
                                                         is_json=True,
                                                         timeout=60)  # 1 minute
            except TaskError as e:
                self.log.exception(e)

        return output
    def execute(self, arguments):
        """Run oscryptocatcher tool for matching crypto algorithms."""
        self._strict_assert(arguments.get('ecosystem'))
        self._strict_assert(arguments.get('name'))
        self._strict_assert(arguments.get('version'))

        cache_path = ObjectCache.get_from_dict(
            arguments).get_extracted_source_tarball()

        results = {'status': 'unknown', 'summary': {}, 'details': []}

        try:
            oscc = TimedCommand.get_command_output(
                ['oscryptocatcher', '--subdir-in-result', cache_path],
                graceful=False,
                is_json=True)

            self.log.debug("oscryptocatcher %s output: %s", cache_path, oscc)
            results['details'] = oscc['details']
            results['summary'] = oscc['summary']
            results['status'] = 'success'
        except Exception:
            raise FatalTaskError('oscryptocatcher failed')

        return results
예제 #17
0
        def worker(path):
            mime = TimedCommand.get_command_output(['file', path, '-b', '-i']).pop()
            self.log.debug("%s mime = %s", path, mime)
            typ = TimedCommand.get_command_output(['file', path, '-b'])
            self.log.debug("%s filetype = %s", path, typ)

            linguist = None
            if 'charset=binary' not in mime:
                linguist = self._parse_linguist(
                    TimedCommand.get_command_output(['linguist', path])
                )
                self.log.debug("%s linguist output = %s", path, linguist)

            results.append({
                "type": typ,
                "output": linguist,
                "path": os.path.relpath(path, cache_path),
            })
예제 #18
0
 def compute_ssdeep(self, target):
     """Compute SSdeep piece-wise linear hash of target."""
     # 0 : ssdeep header
     # 1 : hash,filename
     data = TimedCommand.get_command_output(['ssdeep', '-c', '-s', target])
     try:
         return data[1].split(',')[0].strip()
     except IndexError as exc:
         self.log.error("unable to compute ssdeep of %r", target)
         raise RuntimeError("can't compute digest of %r" % target) from exc
예제 #19
0
    def extract(target, dest):
        """Detect archive type and extracts it."""
        # Make sure that the destination directory exists
        try:
            Path(dest).mkdir(mode=0o777, parents=True)
        except FileExistsError:
            pass

        tar = Archive.TarMatcher.search(target)
        if target.endswith(
            ('.zip', '.whl', '.egg', '.jar', '.war', '.aar', '.nupkg')):
            Archive.extract_zip(target, dest)
        elif tar or target.endswith(('.tgz', '.bz2')):
            Archive.extract_tar(target, dest)
        else:
            raise ValueError('Unknown archive for {0}'.format(target))

        # Fix possibly wrong permissions in zip files that would prevent us from deleting files.
        TimedCommand.get_command_output(['chmod', '-R', 'u+rwX,g+rwX', dest])
예제 #20
0
 def config():
     """Configure git."""
     user_name = configuration.GIT_USER_NAME
     user_email = configuration.GIT_USER_EMAIL
     if not TimedCommand.get_command_output(["git", "config", "--get", "user.name"]):
         TimedCommand.get_command_output(["git", "config", "--global", "user.name", user_name])
     if not TimedCommand.get_command_output(["git", "config", "--get", "user.email"]):
         TimedCommand.get_command_output(["git", "config", "--global", "user.email", user_email])
     # Use 'true' as external program to ask for credentials, i.e. don't ask
     # Better would be GIT_TERMINAL_PROMPT=0, but that requires git >= 2.3
     TimedCommand.get_command_output(["git", "config", "--global", "core.askpass",
                                      "/usr/bin/true"])
예제 #21
0
 def fetch_go_artifact(name, version, target_dir):
     """Fetch go artifact using 'go get' command."""
     env = dict(os.environ)
     env['GOPATH'] = target_dir
     Git.config()
     try:
         TimedCommand.get_command_output(['go', 'get', '-d', name],
                                         timeout=300,
                                         env=env,
                                         graceful=False)
     except TaskError:
         raise NotABugTaskError('Unable to go-get {n}'.format(n=name))
     package_dir = os.path.join(target_dir, 'src', name)
     with cwd(package_dir):
         git = Git(package_dir)
         git.reset(version, hard=True)
         artifact_filename = git.archive(version)
         artifact_path = os.path.join(package_dir, artifact_filename)
         digest = compute_digest(artifact_path)
         return digest, artifact_path
예제 #22
0
    def archive(self, basename, sub_path=None):
        """Create an archive; simply calls `git archive`.

        :param basename: str, name of the resulting archive, without file extension (suffix)
        :param sub_path: str, only add files found under this path to the archive;
                          default: add all files from the repository (.git/ is always excluded)
        :return: str, filename
        """
        suffix = "tar.gz"
        filename = basename + "." + suffix
        with cwd(self.repo_path):
            cmd = [
                "git", "archive", "--format={}".format(suffix),
                "--output={}".format(filename), "HEAD"
            ]
            if sub_path:
                cmd.append(sub_path)
            TimedCommand.get_command_output(cmd)

        return filename
예제 #23
0
    def archive(self, basename, basedir=None, sub_path=None, format="tar.gz"):
        """Create an archive; simply calls `git archive`.

        :param basename: str, name of the resulting archive, without file extension (suffix)
        :param basedir: str, path to a directory where to store the resulting archive
        :param sub_path: str, only add files found under this path to the archive;
                          default: add all files from the repository (.git/ is always excluded)
        :param format: str, format of the resulting archive, default: 'tar.gz'
        :return: str, filename
        """
        filename = os.path.join(basedir or "", basename + "." + format)
        with cwd(self.repo_path):
            cmd = [
                "git", "archive", "--format={}".format(format),
                "--output={}".format(filename), "HEAD"
            ]
            if sub_path:
                cmd.append(sub_path)
            TimedCommand.get_command_output(cmd)

        return filename
예제 #24
0
    def rev_parse(self, args=None):
        """Run git rev-parse.

        :param args: arguments to pass to `git rev-parse`
        :return: [str], output from `git rev-parse`
        """
        cmd = ["git", "rev-parse"]
        if args:
            cmd.extend(args)

        with cwd(self.repo_path):
            return TimedCommand.get_command_output(cmd, graceful=False)
예제 #25
0
 def update_depcheck_db_on_s3():
     """Update OWASP Dependency-check DB on S3."""
     s3 = StoragePool.get_connected_storage('S3VulnDB')
     depcheck = configuration.dependency_check_script_path
     with TemporaryDirectory() as temp_data_dir:
         s3.retrieve_depcheck_db_if_exists(temp_data_dir)
         old_java_opts = os.getenv('JAVA_OPTS', '')
         os.environ['JAVA_OPTS'] = CVEcheckerTask.dependency_check_jvm_mem_limit
         # give DependencyCheck 25 minutes to download the DB
         if TimedCommand.get_command_output([depcheck, '--updateonly', '--data', temp_data_dir],
                                            timeout=1500):
             s3.store_depcheck_db(temp_data_dir)
         os.environ['JAVA_OPTS'] = old_java_opts
예제 #26
0
    def fetch_rubygems_artifact(name, version, target_dir):
        git = Git.create_git(target_dir)
        logger.info("downloading rubygems package %s-%s", name, version)
        version_arg = []
        if version:
            version_arg = ['--version', version]
        gem_command = ['gem', 'fetch', name]
        gem_command.extend(version_arg)
        with cwd(target_dir):
            TimedCommand.get_command_output(gem_command, graceful=False)

        if not version:
            # if version is None we need to glob for the version that was downloaded
            artifact_path = os.path.abspath(
                glob.glob(os.path.join(target_dir, name + '*')).pop())
        else:
            artifact_path = os.path.join(
                target_dir, '{n}-{v}.gem'.format(n=name, v=version))

        digest = compute_digest(artifact_path)
        Archive.extract(artifact_path, target_dir)
        git.add_and_commit_everything()
        return digest, artifact_path
예제 #27
0
    def ls_remote(repository, refs=None, args=None):
        """Get output of `git ls-remote <args> <repo> <refs>` command.

        :param repository: str, remote git repository
        :param refs: list, list of git references
        :param args: list, list of additional arguments for the command
        :return: command output
        """
        cmd = ["git", "ls-remote"]
        if args:
            cmd.extend(args)

        cmd.append(repository)

        if refs:
            cmd.extend(refs)

        return TimedCommand.get_command_output(cmd, graceful=False)
예제 #28
0
    def extract_gem(target, dest):
        """Extract target gem and gemspec.

        Gem into $dest/sources
        Gemspec (renamed to rubygems-metadata.yaml) into $dest/metadata/
        """
        sources = os.path.join(dest, 'sources')
        metadata = os.path.join(dest, 'metadata')
        TimedCommand.get_command_output(['mkdir', '-p', sources, metadata])
        TimedCommand.get_command_output(['gem', 'unpack', target, '--target', sources])
        with cwd(metadata):
            # --spec ignores --target, so we need to cwd
            TimedCommand.get_command_output(['gem', 'unpack', target, '--spec'])
            metadatayaml = glob.glob('*.gemspec').pop()
            os.rename(metadatayaml, 'rubygems-metadata.yaml')
    def execute(self):
        self.log.info("Checking maven index for new releases")
        maven_index_checker_dir = os.getenv('MAVEN_INDEX_CHECKER_PATH')
        target_dir = os.path.join(maven_index_checker_dir, 'target')
        central_index_dir = os.path.join(target_dir, 'central-index')
        timestamp_path = os.path.join(central_index_dir, 'timestamp')

        s3 = StoragePool.get_connected_storage('S3MavenIndex')
        self.log.info('Fetching pre-built maven index from S3, if available.')
        s3.retrieve_index_if_exists(target_dir)

        old_timestamp = 0
        try:
            old_timestamp = int(os.stat(timestamp_path).st_mtime)
        except OSError:
            self.log.info(
                'Timestamp is missing, we need to build the index from scratch.'
            )
            pass

        last_offset = s3.get_last_offset()
        with tempdir() as java_temp_dir:
            cmd = [
                'java', '-Xmx768m',
                '-Djava.io.tmpdir={}'.format(java_temp_dir), '-jar',
                'maven-index-checker.jar', '-c'
            ]

            with cwd(maven_index_checker_dir):
                output = TimedCommand.get_command_output(cmd,
                                                         is_json=True,
                                                         graceful=False,
                                                         timeout=1200)

                current_count = output['count']
                new_timestamp = int(os.stat(timestamp_path).st_mtime)
                if old_timestamp != new_timestamp:
                    self.log.info('Storing pre-built maven index to S3...')
                    s3.store_index(target_dir)
                    self.log.debug('Stored. Index in S3 is up-to-date.')
                    if old_timestamp == 0:
                        s3.set_last_offset(current_count)
                        self.log.info(
                            'This is first run, i.e. all packages are considered new. '
                            'Skipping scheduling to not analyze all packages in index.'
                        )
                        return
                else:
                    self.log.info('Index in S3 is up-to-date.')

                self.log.debug(
                    "Number of entries in maven indexer: %d, "
                    "last offset used: %d", current_count, last_offset)
                to_schedule_count = current_count - last_offset
                if to_schedule_count == 0:
                    self.log.info("No new packages to schedule, exiting...")
                    return

                cmd = [
                    'java', '-Xmx768m',
                    '-Djava.io.tmpdir={}'.format(java_temp_dir), '-jar',
                    'maven-index-checker.jar', '-r',
                    '0-{}'.format(to_schedule_count)
                ]
                output = TimedCommand.get_command_output(cmd,
                                                         is_json=True,
                                                         graceful=False,
                                                         timeout=1200)

                self.log.info(
                    "Found %d new packages to analyse, scheduling analyses...",
                    len(output))
                for entry in output:
                    self.run_selinon_flow(
                        'bayesianFlow', {
                            'ecosystem': 'maven',
                            'name': '{groupId}:{artifactId}'.format(**entry),
                            'version': entry['version'],
                            'recursive_limit': 0
                        })

        s3.set_last_offset(current_count)
        self.log.info(
            "All new maven releases scheduled for analysis, exiting..")
예제 #30
0
    def fetch_npm_artifact(ecosystem, name, version, target_dir):
        """Fetch npm artifact using system 'npm' tool."""
        git = Git.create_git(target_dir)

        npm_cmd = ['npm', '--registry', ecosystem.fetch_url]

        # $ npm config get cache
        # /root/.npm
        cache_path = TimedCommand.get_command_output(
            npm_cmd + ['config', 'get', 'cache'], graceful=False).pop()

        # add package to cache:
        # /root/.npm/express/
        # └── 4.13.4
        #      ├── package
        #      │   ├── History.md
        #      │   ├── index.js
        #      │   ├── lib
        #      │   ├── LICENSE
        #      │   ├── package.json
        #      │   └── Readme.md
        #      └── package.tgz
        # 3 directories, 6 files
        name_ver = name

        try:
            # importing here to avoid circular dependency
            from f8a_worker.solver import NpmReleasesFetcher

            version_list = NpmReleasesFetcher(ecosystem).fetch_releases(
                name_ver)[1]
            if version not in version_list:
                raise NotABugTaskError(
                    "Provided version is not supported '%s'" % name)
            else:
                name_ver = "{}@{}".format(name, version)
        except ValueError as e:
            raise NotABugTaskError(
                'No versions for package NPM package {p} ({e})'.format(
                    p=name, e=str(e)))

        # make sure the artifact is not in the cache yet
        TimedCommand.get_command_output(npm_cmd + ['cache', 'clean', name],
                                        graceful=False)
        logger.info("downloading npm module %s", name_ver)
        cmd = npm_cmd + ['cache', 'add', name_ver]
        TimedCommand.get_command_output(cmd, graceful=False)

        # copy tarball to workpath
        tarball_name = "package.tgz"
        glob_path = os.path.join(cache_path, name, "*")
        cache_abs_path = os.path.abspath(glob.glob(glob_path).pop())
        artifact_path = os.path.join(cache_abs_path, tarball_name)
        logger.debug("[cache] tarball path = %s", artifact_path)
        artifact_path = shutil.copy(artifact_path, target_dir)

        logger.debug("[workdir] tarball path = %s", artifact_path)
        # Prior to npm-2.x.x (Fedora 24)
        # npm client was repackaging modules on download. It modified file permissions inside
        # package.tgz so they matched UID/GID of a user running npm command. Therefore its
        # digest was different then of a tarball downloaded directly from registry.npmjs.org.
        digest = compute_digest(artifact_path)
        Archive.extract(artifact_path, target_dir)
        Archive.fix_permissions(os.path.join(cache_abs_path, 'package'))

        # copy package/package.json over the extracted one,
        # because it contains (since npm >= 2.x.x) more information.
        npm_package_json = os.path.join(cache_abs_path, 'package',
                                        'package.json')
        shutil.copy(npm_package_json, target_dir)
        # copy package/npm-shrinkwrap.json to target_dir
        npm_shrinkwrap_json = os.path.join(target_dir, 'package',
                                           'npm-shrinkwrap.json')
        if os.path.isfile(npm_shrinkwrap_json):
            shutil.copy(npm_shrinkwrap_json, target_dir)
        git.add_and_commit_everything()
        return digest, artifact_path