Ejemplo n.º 1
0
def test_extractcode_command_works_with_relative_paths_verbose(monkeypatch):
    # The setup is a tad complex because we want to have a relative dir
    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
    # To use relative paths, we use our tmp dir at the root of the code tree
    from os.path import dirname, join, abspath
    from  commoncode import fileutils
    import tempfile
    import shutil

    try:
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        runner = CliRunner()
        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
        result = runner.invoke(extract_cli.extractcode, ['--verbose', test_src_file])
        assert result.exit_code == 0
        # extract the path from the second line of the output
        # check that the path is relative and not absolute
        lines = result.output.splitlines(False)
        line = lines[1]
        line_path = line.split(':', 1)[-1].strip()
        if on_windows:
            drive = test_file[:2]
            assert not line_path.startswith(drive)
        else:
            assert not line_path.startswith('/')
    finally:
        fileutils.delete(test_src_dir)
Ejemplo n.º 2
0
def test_extractcode_command_works_with_relative_paths(monkeypatch):
    # The setup is a tad complex because we want to have a relative dir
    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
    # To use relative paths, we use our tmp dir at the root of the code tree
    from os.path import dirname, join, abspath
    from  commoncode import fileutils
    import extractcode
    import tempfile
    import shutil

    try:
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX

        runner = CliRunner()
        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
        result = runner.invoke(extract_cli.extractcode, [test_src_file])
        assert result.exit_code == 0
        assert 'Extracting done' in result.output
        assert not 'WARNING' in result.output
        assert not 'ERROR' in result.output
        expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
        file_result = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.resource_iter(test_tgt_dir, with_dirs=False)]
        assert sorted(expected) == sorted(file_result)
    finally:
        fileutils.delete(test_src_dir)
Ejemplo n.º 3
0
 def setup(self):
     """
     Setup the cache: must be called at least once globally after cache
     initialization.
     """
     fileutils.create_dir(self.cache_infos_dir)
     fileutils.create_dir(self.cache_scans_dir)
Ejemplo n.º 4
0
    def get_temp_dir(self, sub_dir_path=None):
        """
        Create a unique new temporary directory location. Create directories
        identified by sub_dir_path if provided in this temporary directory.
        Return the location for this unique directory joined with the
        sub_dir_path if any.
        """
        # ensure that we have a new unique temp directory for each test run
        global test_run_temp_dir
        if not test_run_temp_dir:
            from scancode_config import scancode_root_dir
            test_tmp_root_dir = path.join(scancode_root_dir, 'tmp')
            # now we add a space in the path for testing path with spaces
            test_run_temp_dir = fileutils.get_temp_dir(
                base_dir=test_tmp_root_dir, prefix='scancode-tk-tests -')
        if on_linux and py2:
            test_run_temp_dir = fsencode(test_run_temp_dir)

        test_run_temp_subdir = fileutils.get_temp_dir(
            base_dir=test_run_temp_dir, prefix='')

        if sub_dir_path:
            # create a sub directory hierarchy if requested
            sub_dir_path = to_os_native_path(sub_dir_path)
            test_run_temp_subdir = path.join(test_run_temp_subdir,
                                             sub_dir_path)
            fileutils.create_dir(test_run_temp_subdir)
        return test_run_temp_subdir
Ejemplo n.º 5
0
def test_extractcode_command_works_with_relative_paths_verbose(monkeypatch):
    # The setup is a tad complex because we want to have a relative dir
    # to the base dir where we run tests from, ie the scancode-toolkit/ dir
    # To use relative paths, we use our tmp dir at the root of the code tree
    from os.path import dirname, join, abspath
    from  commoncode import fileutils
    import tempfile
    import shutil

    try:
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
        test_file = test_env.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        runner = CliRunner()
        monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
        result = runner.invoke(extract_cli.extractcode, ['--verbose',test_src_file])
        assert result.exit_code == 0
        # extract the path from the second line of the output
        # check that the path is relative and not absolute
        lines = result.output.splitlines(False)
        line = lines[1]
        line_path = line.split(':', 1)[-1].strip()
        if on_windows:
            drive = test_file[:2]
            assert not line_path.startswith(drive)
        else:
            assert not line_path.startswith('/')
    finally:
        fileutils.delete(test_src_dir)
Ejemplo n.º 6
0
def remove_backslashes_and_dotdots(directory):
    """
    Walk a directory and rename the files if their names contain backslashes.
    Return a list of errors if any.
    """
    if on_linux:
        directory = path_to_bytes(directory)
    errors = []
    for top, _, files in os.walk(directory):
        for filename in files:
            if not (WIN_PATH_SEP in filename or DOTDOT in filename):
                continue
            try:
                new_path = fileutils.as_posixpath(filename)
                new_path = new_path.strip(POSIX_PATH_SEP)
                new_path = posixpath.normpath(new_path)
                new_path = new_path.replace(DOTDOT, POSIX_PATH_SEP)
                new_path = new_path.strip(POSIX_PATH_SEP)
                new_path = posixpath.normpath(new_path)
                segments = new_path.split(POSIX_PATH_SEP)
                directory = os.path.join(top, *segments[:-1])
                fileutils.create_dir(directory)
                shutil.move(os.path.join(top, filename), os.path.join(top, *segments))
            except Exception:
                errors.append(os.path.join(top, filename))
    return errors
Ejemplo n.º 7
0
def remove_backslashes_and_dotdots(directory):
    """
    Walk a directory and rename the files if their names contain backslashes.
    Return a list of errors if any.
    """
    if on_linux:
        directory = fsencode(directory)
    errors = []
    for top, _, files in os.walk(directory):
        for filename in files:
            if not (WIN_PATH_SEP in filename or DOTDOT in filename):
                continue
            try:
                new_path = as_posixpath(filename)
                new_path = new_path.strip(POSIX_PATH_SEP)
                new_path = posixpath.normpath(new_path)
                new_path = new_path.replace(DOTDOT, POSIX_PATH_SEP)
                new_path = new_path.strip(POSIX_PATH_SEP)
                new_path = posixpath.normpath(new_path)
                segments = new_path.split(POSIX_PATH_SEP)
                directory = join(top, *segments[:-1])
                create_dir(directory)
                shutil.move(join(top, filename), join(top, *segments))
            except Exception:
                errors.append(join(top, filename))
    return errors
Ejemplo n.º 8
0
    def test_extract_option_works_with_relative_paths(self):
        # The setup is a tad complex because we want to have a relative dir
        # to the base dir where we run tests from, ie the scancode-toolkit/ dir
        # To use relative paths, we use our tmp dir at the root of the code
        from os.path import dirname, join, abspath
        from  commoncode import fileutils
        import extractcode
        import tempfile
        import shutil

        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs,  '').strip('\\/')
        test_file = self.get_test_loc('extract_relative_path/basic.zip')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX

        runner = CliRunner()
        result = runner.invoke(cli.scancode, ['--extract', test_src_file])
        assert result.exit_code == 0
        assert 'Extracting done' in result.output
        assert not 'WARNING' in result.output
        assert not 'ERROR' in result.output
        expected = ['/c/a/a.txt', '/c/b/a.txt', '/c/c/a.txt']
        file_result  = [as_posixpath(f.replace(test_tgt_dir, '')) for f in fileutils.file_iter(test_tgt_dir)]
        assert sorted(expected)==sorted(file_result)
Ejemplo n.º 9
0
def remove_backslashes(directory):
    """
    Walk a directory and rename the files if their names contain backslashes.
    Return a list of errors if any.
    """
    errors = []
    for top, _, files in os.walk(str(directory)):
        for filename in files:
            if '\\' in filename or '..' in filename:

                try:
                    new_path = fileutils.as_posixpath(filename)
                    new_path = new_path.strip('/')
                    new_path = posixpath.normpath(new_path)
                    new_path = new_path.replace('..', '/')
                    new_path = new_path.strip('/')
                    new_path = posixpath.normpath(new_path)
                    segments = new_path.split('/')
                    directory = os.path.join(top, *segments[:-1])
                    fileutils.create_dir(directory)
                    shutil.move(os.path.join(top, filename),
                                os.path.join(top, *segments))
                except Exception:
                    errors.append(os.path.join(top, filename))
    return errors
Ejemplo n.º 10
0
Archivo: cli.py Proyecto: hv7214/conan
def collect_and_rebuild_rootfs_v10(location,
                                   echo=print,
                                   layerid_len=DEFAULT_ID_LEN):
    """
    Collect all images in a directory tree. Extract/merges the layers side-by-
    side with the image directory with an extract suffix.
    """
    import extractcode
    all_wh = {}
    # FIXME: we should instead receive a list of images....

    for loc, image in collect_images_v10(location,
                                         echo,
                                         layerid_len=layerid_len).items():
        extract_target = loc.rstrip('\\/') + extractcode.EXTRACT_SUFFIX
        fileutils.create_dir(extract_target)
        echo(
            'Extracting/merging and building rootfs from layers for Docker image %(loc)r \n  to: %(extract_target)r'
            % locals())
        errors, whiteouts = rebuild_rootfs(image,
                                           extract_target,
                                           layerid_len=layerid_len)
        if whiteouts:
            echo(
                'Files deleted while extract/merging layers for Docker image %(loc)r:'
                % locals())
            all_wh[loc] = whiteouts
            for w in whiteouts:
                echo(' ' + w)
        if errors:
            echo('Extraction error for layers of Docker image %(loc)r:' %
                 locals())
            for e in errors:
                echo(' ' + e)
    return all_wh
Ejemplo n.º 11
0
 def setup(self):
     """
     Setup the cache: must be called at least once globally after cache
     initialization.
     """
     fileutils.create_dir(self.cache_infos_dir)
     fileutils.create_dir(self.cache_scans_dir)
Ejemplo n.º 12
0
    def test_tree_checksum_ignores_some_files_and_directories(self):
        test_dir = self.get_test_loc('cache/tree', copy=True)
        before = cache.tree_checksum(test_dir)
        # create some new pyc file and a dir
        with open(os.path.join(test_dir, 'some.pyc'), 'wb') as pyc:
            pyc.write('')
        fileutils.create_dir(os.path.join(test_dir, 'some dir'))

        after = cache.tree_checksum(test_dir)
        assert before == after

        with open(os.path.join(test_dir, 'some.py'), 'wb') as py:
            py.write(' ')
        after = cache.tree_checksum(test_dir)
        assert before != after

        before = after
        with open(os.path.join(test_dir, 'some.LICENSE'), 'wb') as f:
            f.write(' ')
        after = cache.tree_checksum(test_dir)
        assert before != after

        before = after
        with open(os.path.join(test_dir, 'some.LICENSE~'), 'wb') as f:
            f.write(' ')
        after = cache.tree_checksum(test_dir)
        assert before == after

        with open(os.path.join(test_dir, 'some.LICENSE.swp'), 'wb') as f:
            f.write(' ')
        after = cache.tree_checksum(test_dir)
        assert before == after
Ejemplo n.º 13
0
    def test_tree_checksum_ignores_some_files_and_directories(self):
        test_dir = self.get_test_loc('cache/tree', copy=True)
        before = cache.tree_checksum(test_dir)
        # create some new pyc file and a dir
        with open(os.path.join(test_dir, 'some.pyc'), 'wb') as pyc:
            pyc.write('')
        fileutils.create_dir(os.path.join(test_dir, 'some dir'))

        after = cache.tree_checksum(test_dir)
        assert before == after

        with open(os.path.join(test_dir, 'some.py'), 'wb') as py:
            py.write(' ')
        after = cache.tree_checksum(test_dir)
        assert before != after

        before = after
        with open(os.path.join(test_dir, 'some.LICENSE'), 'wb') as f:
            f.write(' ')
        after = cache.tree_checksum(test_dir)
        assert before != after

        before = after
        with open(os.path.join(test_dir, 'some.LICENSE~'), 'wb') as f:
            f.write(' ')
        after = cache.tree_checksum(test_dir)
        assert before == after

        with open(os.path.join(test_dir, 'some.LICENSE.swp'), 'wb') as f:
            f.write(' ')
        after = cache.tree_checksum(test_dir)
        assert before == after
Ejemplo n.º 14
0
def paths_from_keys(base_path, keys):
    """
    Return a tuple of (parent dir path, filename) built from a cache keys triple and
    a base_directory. Ensure that the parent directory exist.
    """
    dir1, dir2, fname = keys
    parent = os.path.join(base_path, dir1, dir2)
    fileutils.create_dir(parent)
    return parent, fname
Ejemplo n.º 15
0
 def test_tree_checksum_does_not_ignore_the_index_cache(self):
     # this is stored in the code tree as package data and we should not
     # ignore it
     test_dir = self.get_test_loc('cache/tree', copy=True)
     before = cache.tree_checksum(test_dir)
     # create some file name like the index
     with open(os.path.join(test_dir, cache.LICENSE_INDEX_FILENAME), 'w') as pyc:
         pyc.write(' ')
     fileutils.create_dir(os.path.join(test_dir, 'some dir'))
     after = cache.tree_checksum(test_dir)
     assert after != before
Ejemplo n.º 16
0
def get_scans_cache_class(cache_dir=scans_cache_dir):
    """
    Return a new persistent cache class configured with a unique storage directory.
    """
    # create a unique temp directory in cache_dir
    fileutils.create_dir(cache_dir)
    # ensure that the cache dir is alwasy unicode
    cache_dir = fileutils.get_temp_dir(unicode(cache_dir), prefix=unicode(timeutils.time2tstamp()) + u'-')
    sc = ScanFileCache(cache_dir)
    sc.setup()
    return partial(ScanFileCache, cache_dir)
Ejemplo n.º 17
0
    def write(self, target_dir, transform_path=lambda x: x):
        """
        Write entry to a file or directory saved relatively to the `target_dir` and
        return the path where the file or directory was written or None if nothing
        was written to disk. `transform_path` is a callable taking a path and
        returning a transformed path such as resolving relative paths,
        transliterating non-portable characters or other path transformations.
        The default is a no-op lambda.
        """
        if not self.archive.archive_struct:
            raise ArchiveErrorIllegalOperationOnClosedArchive()
        # skip links and special files
        if not (self.isfile or self.isdir):
            return
        abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
        # TODO: return some warning when original path has been transformed
        clean_path = transform_path(self.path)

        if self.isdir:
            # TODO: also rename directories to a new name if needed segment by segment
            dir_path = os.path.join(abs_target_dir, clean_path)
            fileutils.create_dir(dir_path)
            return dir_path

        # note: here isfile=True
        try:
            # create parent directories if needed
            target_path = os.path.join(abs_target_dir, clean_path)
            parent_path = os.path.dirname(target_path)

            # TODO: also rename directories to a new name if needed segment by segment
            fileutils.create_dir(parent_path)

            # TODO: return some warning when original path has been renamed?
            unique_path = extractcode.new_name(target_path, is_dir=False)

            chunk_len = 10240
            sbuffer = create_string_buffer(chunk_len)
            with open(unique_path, 'wb') as target:
                chunk_size = 1
                while chunk_size:
                    chunk_size = read_entry_data(self.archive.archive_struct,
                                                 sbuffer, chunk_len)
                    data = sbuffer.raw[0:chunk_size]
                    target.write(data)
            os.utime(unique_path, (self.time, self.time))
            return target_path

        except ArchiveWarning, aw:
            msg = aw.args and '\n'.join(aw.args) or 'No message provided.'
            if msg not in self.warnings:
                self.warnings.append(msg)
            return target_path
Ejemplo n.º 18
0
    def write(self, target_dir, transform_path=lambda x: x):
        """
        Write entry to a file or directory saved relatively to the `target_dir` and
        return the path where the file or directory was written or None if nothing
        was written to disk. `transform_path` is a callable taking a path and
        returning a transformed path such as resolving relative paths,
        transliterating non-portable characters or other path transformations.
        The default is a no-op lambda.
        """
        if not self.archive.archive_struct:
            raise ArchiveErrorIllegalOperationOnClosedArchive()
        # skip links and special files
        if not (self.isfile or self.isdir):
            return
        abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
        # TODO: return some warning when original path has been transformed
        clean_path = transform_path(self.path)

        if self.isdir:
            # TODO: also rename directories to a new name if needed segment by segment
            dir_path = os.path.join(abs_target_dir, clean_path)
            fileutils.create_dir(dir_path)
            return dir_path

        # note: here isfile=True
        try:
            # create parent directories if needed
            target_path = os.path.join(abs_target_dir, clean_path)
            parent_path = os.path.dirname(target_path)

            # TODO: also rename directories to a new name if needed segment by segment
            fileutils.create_dir(parent_path)

            # TODO: return some warning when original path has been renamed?
            unique_path = extractcode.new_name(target_path, is_dir=False)

            chunk_len = 10240
            sbuffer = create_string_buffer(chunk_len)
            with open(unique_path, 'wb') as target:
                chunk_size = 1
                while chunk_size:
                    chunk_size = read_entry_data(self.archive.archive_struct,
                                                 sbuffer, chunk_len)
                    data = sbuffer.raw[0:chunk_size]
                    target.write(data)
            os.utime(unique_path, (self.time, self.time))
            return target_path

        except ArchiveWarning as aw:
            msg = aw.args and '\n'.join(aw.args) or 'No message provided.'
            if msg not in self.warnings:
                self.warnings.append(msg)
            return target_path
Ejemplo n.º 19
0
def get_license_cache_paths(cache_dir=scancode_cache_dir):
    """
    Return a tuple of index cache files given a master `cache_dir`
    """
    idx_cache_dir = join(cache_dir, 'license_index')
    create_dir(idx_cache_dir)

    lock_file = join(idx_cache_dir, 'lockfile')
    checksum_file = join(idx_cache_dir, 'tree_checksums')
    cache_file = join(idx_cache_dir, 'index_cache')

    return lock_file, checksum_file, cache_file
Ejemplo n.º 20
0
def get_scans_cache_class(cache_dir=scans_cache_dir):
    """
    Return a new persistent cache class configured with a unique storage directory.
    """
    # create a unique temp directory in cache_dir
    fileutils.create_dir(cache_dir)
    prefix = timeutils.time2tstamp() + u'-'
    cache_dir = fileutils.get_temp_dir(cache_dir, prefix=prefix)
    if on_linux:
        cache_dir = path_to_bytes(cache_dir)
    sc = ScanFileCache(cache_dir)
    sc.setup()
    return partial(ScanFileCache, cache_dir)
Ejemplo n.º 21
0
def get_scans_cache_class(cache_dir=scans_cache_dir):
    """
    Return a new persistent cache class configured with a unique storage directory.
    """
    # create a unique temp directory in cache_dir
    fileutils.create_dir(cache_dir)
    prefix = timeutils.time2tstamp() + u'-'
    cache_dir = fileutils.get_temp_dir(cache_dir, prefix=prefix)
    if on_linux:
        cache_dir = path_to_bytes(cache_dir)
    sc = ScanFileCache(cache_dir)
    sc.setup()
    return partial(ScanFileCache, cache_dir)
Ejemplo n.º 22
0
def extract(location, target_dir):
    """
    Extract each patch of a patch file at `location` as files in a target_dir
    directory tree mimicking the directory in which the patches would be
    applied with the patch command.

    This treats a patch file as if it were an archive containing one file for
    each patch applied to a file to be patched.

    Return a list of warning messages. Raise Exceptionon errors.
    """
    for source, target, text in patch_info(location):
        # prefer the target path for writing the patch text to a subfile
        # unless target is /dev/null (a deletion)
        if '/dev/null' in target:
            patch_subfile_path = source
        else:
            patch_subfile_path = target

        # make the path safe to use as a subfile path
        # ensure this a good and clean posix relative path
        patch_subfile_path = paths.safe_path(patch_subfile_path)

        # create directories
        parent_dir = posixpath.dirname(patch_subfile_path)
        parent_target_dir = os.path.join(target_dir, parent_dir)
        fileutils.create_dir(parent_target_dir)

        # find a unique name using a simple counter
        base_subfile_path = os.path.join(target_dir, patch_subfile_path)
        counter = 0
        fp = base_subfile_path
        while os.path.exists(fp + extractcode.EXTRACT_SUFFIX):
            fp = base_subfile_path + '_%d' % counter
            counter += 1
        base_subfile_path = fp

        # write the location proper, with a suffix extension to avoid
        # recursive extraction
        if py2:
            mode = 'wb'
            eol = b'\n'
        if py3:
            mode = 'w'
            eol = u'\n'
        subfile_path = base_subfile_path + extractcode.EXTRACT_SUFFIX
        with open(subfile_path, mode) as subfile:
            subfile.write(eol.join(text))

        return []
Ejemplo n.º 23
0
def save_results(scanners, only_findings, files_count, results, format,
                 options, input, output_file):
    """
    Save scan results to file or screen.
    """

    if only_findings:
        # Find all scans that are both enabled and have a valid function reference.
        # This deliberately filters out the "info" scan (which always has a "None"
        # function reference) as there is no dedicated "infos" key in the results
        # that "has_findings()" could check.
        active_scans = [k for k, v in scanners.items() if v[0] and v[1]]

        # FIXME: this is forcing all the scan results to be loaded in memory
        # and defeats lazy loading from cache
        results = [
            file_data for file_data in results
            if has_findings(active_scans, file_data)
        ]
        # FIXME: computing len before hand will need a list and therefore need loding
        # it all ahead of time
        files_count = len(results)

    # note: in tests, sys.stdout is not used, but some io wrapper with no name
    # attributes
    is_real_file = hasattr(output_file, 'name')

    if output_file != sys.stdout and is_real_file:
        parent_dir = os.path.dirname(output_file.name)
        if parent_dir:
            fileutils.create_dir(abspath(expanduser(parent_dir)))

    if format not in formats:
        # render using a user-provided custom format template
        if not os.path.isfile(format):
            echo_stderr('\nInvalid template passed.', fg='red')
        else:
            for template_chunk in as_template(results, template=format):
                try:
                    output_file.write(template_chunk)
                except Exception as e:
                    extra_context = 'ERROR: Failed to write output to HTML for: ' + repr(
                        template_chunk)
                    echo_stderr(extra_context, fg='red')
                    e.args += (extra_context, )
                    raise e
        return

    write_formatted_output(scanners, files_count, version, notice, results,
                           format, options, input, output_file, echo_stderr)
Ejemplo n.º 24
0
def save_results(scanners, files_count, results, format, options, input,
                 output_file):
    """
    Save scan results to file or screen.
    """

    # note: in tests, sys.stdout is not used, but is instead some io
    # wrapper with no name attributes. We use this to check if this is a
    # real filesystem file or not.
    # note: sys.stdout.name == '<stdout>' so it has a name.
    is_real_file = hasattr(output_file, 'name')

    if output_file != sys.stdout and is_real_file:
        # we are writing to a real filesystem file: create directories!
        parent_dir = os.path.dirname(output_file.name)
        if parent_dir:
            fileutils.create_dir(abspath(expanduser(parent_dir)))

    # Write scan results to file or screen as a formatted output ...
    # ... using a user-provided custom format template
    format_plugins = plugincode.output.get_format_plugins()
    if format not in format_plugins:
        # format may be a custom template file path
        if not os.path.isfile(format):
            # this check was done before in the CLI validation, but this
            # is done again if the function is used directly
            echo_stderr('\nInvalid template: must be a file.', fg='red')
        else:
            from formattedcode import format_templated
            # FIXME: carrying an echo function does not make sense
            format_templated.write_custom(results,
                                          output_file,
                                          _echo=echo_stderr,
                                          version=version,
                                          template_path=format)

    # ... or  using the selected format plugin
    else:
        writer = format_plugins[format]
        # FIXME: carrying an echo function does not make sense
        # FIXME: do not use input as a variable name
        writer(files_count=files_count,
               version=version,
               notice=notice,
               scanned_files=results,
               options=options,
               input=input,
               output_file=output_file,
               _echo=echo_stderr)
Ejemplo n.º 25
0
def setup_vscode():
    """
    Add base settings for .vscode
    """
    from scancode_config import scancode_root_dir
    from commoncode.fileutils import create_dir
    from commoncode.fileutils import copyfile

    settings = os.path.join(scancode_root_dir, 'etc', 'vscode',
                            'settings.json')

    if os.path.exists(settings):
        vscode = os.path.join(scancode_root_dir, '.vscode')
        create_dir(vscode)
        copyfile(settings, vscode)
Ejemplo n.º 26
0
def paths_from_keys(base_path, keys):
    """
    Return a tuple of (parent dir path, filename) for a cache entry built from a cache
    keys triple and a base_directory. Ensure that the parent directory exist.
    """
    if on_linux:
        keys = [path_to_bytes(k) for k in keys]
        base_path = path_to_bytes(base_path)
    else:
        keys = [path_to_unicode(k) for k in keys]
        base_path = path_to_unicode(base_path)

    dir1, dir2, file_name = keys
    parent = os.path.join(base_path, dir1, dir2)
    fileutils.create_dir(parent)
    return parent, file_name
Ejemplo n.º 27
0
def paths_from_keys(base_path, keys):
    """
    Return a tuple of (parent dir path, filename) for a cache entry built from a cache
    keys triple and a base_directory. Ensure that the parent directory exist.
    """
    if on_linux:
        keys = [path_to_bytes(k) for k in keys]
        base_path = path_to_bytes(base_path)
    else:
        keys = [path_to_unicode(k) for k in keys]
        base_path = path_to_unicode(base_path)

    dir1, dir2, file_name = keys
    parent = os.path.join(base_path, dir1, dir2)
    fileutils.create_dir(parent)
    return parent, file_name
Ejemplo n.º 28
0
def get_license_cache_paths(
    licensedcode_cache_dir=licensedcode_cache_dir,
    scancode_cache_dir=scancode_cache_dir,
):
    """
    Return a tuple of index cache files given a master `cache_dir`
    """
    idx_cache_dir = os.path.join(licensedcode_cache_dir, LICENSE_INDEX_DIR)
    create_dir(idx_cache_dir)
    cache_file = os.path.join(idx_cache_dir, LICENSE_INDEX_FILENAME)

    lock_file = os.path.join(scancode_cache_dir,
                             'scancode_license_index_lockfile')
    checksum_file = os.path.join(scancode_cache_dir,
                                 'scancode_license_index_tree_checksums')

    return lock_file, checksum_file, cache_file
Ejemplo n.º 29
0
def extract(location, target_dir):
    """
    Extract each patch of a patch file at `location` as files in a target_dir
    directory tree mimicking the directory in which the patches would be
    applied with the patch command.

    This treats a patch file as if it were an archive containing one file for
    each patch applied to a file to be patched.

    Return a list of warning messages. Raise Exceptionon errors.
    """
    for source, target, text in patch_info(location):
        # prefer the target path for writing the patch text to a subfile
        # unless target is /dev/null (a deletion)
        if '/dev/null' in target:
            patch_subfile_path = source
        else:
            patch_subfile_path = target

        # make the path safe to use as a subfile path
        # ensure this a good and clean posix relative path
        patch_subfile_path = paths.safe_path(patch_subfile_path)

        # create directories
        parent_dir = posixpath.dirname(patch_subfile_path)
        parent_target_dir = os.path.join(target_dir, parent_dir)
        fileutils.create_dir(parent_target_dir)

        # find a unique name using a simple counter
        base_subfile_path = os.path.join(target_dir, patch_subfile_path)
        counter = 0
        fp = base_subfile_path
        while os.path.exists(fp + extractcode.EXTRACT_SUFFIX):
            fp = base_subfile_path + '_%d' % counter
            counter += 1
        base_subfile_path = fp

        # write the location proper, with a suffix extension to avoid
        # recursive extraction
        subfile_path = base_subfile_path + extractcode.EXTRACT_SUFFIX
        with open(subfile_path, 'wb') as subfile:
            subfile.write(u'\n'.join(text))

        return []
Ejemplo n.º 30
0
def save_results(scanners, files_count, results, format, options, input, output_file):
    """
    Save scan results to file or screen.
    """

    # note: in tests, sys.stdout is not used, but is instead some io
    # wrapper with no name attributes. We use this to check if this is a
    # real filesystem file or not.
    # note: sys.stdout.name == '<stdout>' so it has a name.
    is_real_file = hasattr(output_file, 'name')

    if output_file != sys.stdout and is_real_file:
        # we are writing to a real filesystem file: create directories!
        parent_dir = os.path.dirname(output_file.name)
        if parent_dir:
            fileutils.create_dir(abspath(expanduser(parent_dir)))

    # Write scan results to file or screen as a formatted output ...
    # ... using a user-provided custom format template
    format_plugins = plugincode.output.get_format_plugins()
    if format not in format_plugins:
        # format may be a custom template file path
        if not os.path.isfile(format):
            # this check was done before in the CLI validation, but this
            # is done again if the function is used directly
            echo_stderr('\nInvalid template: must be a file.', fg='red')
        else:
            from formattedcode import format_templated
            # FIXME: carrying an echo function does not make sense
            format_templated.write_custom(
                results, output_file, _echo=echo_stderr, template_path=format)

    # ... or  using the selected format plugin
    else:
        writer = format_plugins[format]
        # FIXME: carrying an echo function does not make sense
        # FIXME: do not use input as a variable name
        writer(files_count=files_count, version=version, notice=notice,
               scanned_files=results,
               options=options,
               input=input, output_file=output_file, _echo=echo_stderr)
Ejemplo n.º 31
0
    def get_temp_dir(self, sub_dir_path=None):
        """
        Create a unique new temporary directory location. Create directories
        identified by sub_dir_path if provided in this temporary directory.
        Return the location for this unique directory joined with the
        sub_dir_path if any.
        """
        # ensure that we have a new unique temp directory for each test run
        global test_run_temp_dir
        if not test_run_temp_dir:
            test_run_temp_dir = fileutils.get_temp_dir(base_dir='tst',
                                                       prefix=' ')

        new_temp_dir = fileutils.get_temp_dir(base_dir=test_run_temp_dir)

        if sub_dir_path:
            # create a sub directory hierarchy if requested
            sub_dir_path = to_os_native_path(sub_dir_path)
            new_temp_dir = os.path.join(new_temp_dir, sub_dir_path)
            fileutils.create_dir(new_temp_dir)
        return new_temp_dir
Ejemplo n.º 32
0
    def get_temp_dir(self, sub_dir_path=None):
        """
        Create a unique new temporary directory location. Create directories
        identified by sub_dir_path if provided in this temporary directory.
        Return the location for this unique directory joined with the
        sub_dir_path if any.
        """
        # ensure that we have a new unique temp directory for each test run
        global test_run_temp_dir
        if not test_run_temp_dir:
            test_run_temp_dir = fileutils.get_temp_dir(base_dir='tst',
                                                       prefix=' ')

        new_temp_dir = fileutils.get_temp_dir(base_dir=test_run_temp_dir)

        if sub_dir_path:
            # create a sub directory hierarchy if requested
            sub_dir_path = to_os_native_path(sub_dir_path)
            new_temp_dir = os.path.join(new_temp_dir, sub_dir_path)
            fileutils.create_dir(new_temp_dir)
        return new_temp_dir
Ejemplo n.º 33
0
 def test_extract_can_extract_to_relative_paths(self):
     # The setup is a tad complex because we want to have a relative dir
     # to the base dir where we run tests from, ie the scancode-toolkit/ dir
     # To use relative paths, we use our tmp dir at the root of the code
     from os.path import dirname, join, abspath
     scancode_root = dirname(dirname(dirname(__file__)))
     scancode_tmp = join(scancode_root, 'tmp')
     fileutils.create_dir(scancode_tmp)
     scancode_root_abs = abspath(scancode_root)
     import tempfile
     test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(scancode_root_abs, '').strip('\\/')
     test_file = self.get_test_loc('extract/relative_path/basic.zip')
     import shutil
     shutil.copy(test_file, test_src_dir)
     test_src_file = join(test_src_dir, 'basic.zip')
     test_tgt_dir = join(scancode_root, test_src_file) + extractcode.EXTRACT_SUFFIX
     result = list(extract.extract(test_src_file))
     expected = ['c/a/a.txt', 'c/b/a.txt', 'c/c/a.txt']
     check_files(test_tgt_dir, expected)
     for r in result:
         assert [] == r.warnings
         assert [] == r.errors
    def test_libarchive_extract_can_extract_to_relative_paths(self):
        # The setup is a tad complex because we want to have a relative dir
        # to the base dir where we run tests from, ie the scancode-toolkit/ dir
        # To use relative paths, we use our tmp dir at the root of the code tree
        from os.path import dirname, join, abspath
        import tempfile
        import shutil
        from extractcode.libarchive2 import extract

        test_file = self.get_test_loc('archive/relative_path/basic.zip')
        scancode_root = dirname(dirname(dirname(__file__)))
        scancode_tmp = join(scancode_root, 'tmp')
        fileutils.create_dir(scancode_tmp)
        scancode_root_abs = abspath(scancode_root)
        test_src_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(
            scancode_root_abs, '').strip('\\/')
        test_tgt_dir = tempfile.mkdtemp(dir=scancode_tmp).replace(
            scancode_root_abs, '').strip('\\/')
        shutil.copy(test_file, test_src_dir)
        test_src_file = join(test_src_dir, 'basic.zip')
        result = list(extract(test_src_file, test_tgt_dir))
        assert [] == result
        expected = ['c/a/a.txt', 'c/b/a.txt', 'c/c/a.txt']
        check_files(test_tgt_dir, expected)
Ejemplo n.º 35
0
def remove_backslashes(directory):
    """
    Walk a directory and rename the files if their names contain backslashes.
    Return a list of errors if any.
    """
    errors = []
    for top, _, files in os.walk(str(directory)):
        for filename in files:
            if '\\' in filename or '..' in filename:

                try:
                    new_path = fileutils.as_posixpath(filename)
                    new_path = new_path.strip('/')
                    new_path = posixpath.normpath(new_path)
                    new_path = new_path.replace('..', '/')
                    new_path = new_path.strip('/')
                    new_path = posixpath.normpath(new_path)
                    segments = new_path.split('/')
                    directory = os.path.join(top, *segments[:-1])
                    fileutils.create_dir(directory)
                    shutil.move(os.path.join(top, filename), os.path.join(top, *segments))
                except Exception:
                    errors.append(os.path.join(top, filename))
    return errors
Ejemplo n.º 36
0
def extract_file_by_file(location,
                         target_dir,
                         arch_type='*',
                         skip_symlinks=True):
    """
    Extract all files using a one-by-one process from a 7zip-supported archive
    file at location in the `target_dir` directory.

    Return a list of warning messages if any or an empty list.
    Raise exception on errors.

    `arch_type` is the type of 7zip archive passed to the -t 7zip option.
    Can be None.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))

    entries, errors_msgs = list_entries(location, arch_type)
    entries = list(entries)

    # Determine if we need a one-by-one approach: technically the aproach is to
    # check if we have files that are in the same dir and have the same name
    # when the case is ignored. We take a simpler approach: we check if all
    # paths are unique when we ignore the case: for that we only check that the
    # length of two paths sets are the same: one set as-is and the other
    # lowercased.

    paths_as_is = set(e.path for e in entries)
    paths_no_case = set(p.lower() for p in paths_as_is)
    need_by_file = len(paths_as_is) != len(paths_no_case)

    if not need_by_file:
        # use regular extract
        return extract_all_files_at_once(location=location,
                                         target_dir=target_dir,
                                         arch_type=arch_type)

    # now we are extracting one file at a time. this is a tad painful because we
    # are dealing with a full command execution at each time.

    errors = {}
    warnings = {}
    tmp_dir = fileutils.get_temp_dir(prefix='extractcode-extract-')
    for i, entry in enumerate(entries):

        if not entry.is_file:
            continue

        tmp_extract_dir = os.path.join(tmp_dir, str(i))
        fileutils.create_dir(tmp_extract_dir)

        ex_args = build_7z_extract_command(
            location=location,
            target_dir=tmp_extract_dir,
            single_entry=entry,
            arch_type=arch_type,
        )
        rc, stdout, stderr = command.execute2(**ex_args)

        error = get_7z_errors(stdout, stderr)
        if error or rc != 0:
            error = error or UNKNOWN_ERROR
            if TRACE:
                logger.debug(
                    'extract: failure: {rc}\n'
                    'stderr: {stderr}\nstdout: {stdout}'.format(**locals()))
            errors[entry.path] = error
            continue

        # these are all for a single file path
        warns = get_7z_warnings(stdout) or {}
        wmsg = '\n'.join(warns.values())
        if wmsg:
            if entry.path in warnings:
                warnings[entry.path] += '\n' + wmsg
            else:
                warnings[entry.path] = wmsg

        # finally move that extracted file to its target location, possibly renamed
        source_file_name = fileutils.file_name(entry.path)
        source_file_loc = os.path.join(tmp_extract_dir, source_file_name)
        if not os.path.exists(source_file_loc):
            if entry.path in errors:
                errors[entry.path] += '\nNo file name extracted.'
            else:
                errors[entry.path] = 'No file name extracted.'
            continue

        safe_path = paths.safe_path(entry.path, posix=True)
        target_file_loc = os.path.join(target_dir, safe_path)
        target_file_dir = os.path.dirname(target_file_loc)
        fileutils.create_dir(target_file_dir)

        unique_target_file_loc = extractcode.new_name(target_file_loc,
                                                      is_dir=False)

        if TRACE:
            logger.debug(
                'extract: unique_target_file_loc: from {} to {}'.format(
                    target_file_loc, unique_target_file_loc))

        if os.path.isfile(source_file_loc):
            fileutils.copyfile(source_file_loc, unique_target_file_loc)
        else:
            fileutils.copytree(source_file_loc, unique_target_file_loc)

    extractcode.remove_backslashes_and_dotdots(abs_target_dir)
    if errors:
        raise ExtractErrorFailedToExtract(errors)

    return convert_warnings_to_list(warnings)
Ejemplo n.º 37
0
from os.path import dirname
from os.path import abspath
from os.path import getsize
from os.path import getmtime
from os.path import join
from os.path import exists

from commoncode import fileutils


lic_src_dir = abspath(dirname(__file__))
src_dir = dirname(lic_src_dir)
data_dir = join(lic_src_dir, 'data')
licenses_data_dir = join(data_dir, 'licenses')
rules_data_dir = join(data_dir, 'rules')
root_dir = dirname(src_dir)
cache_dir = join(root_dir, '.cache')
license_index_cache_dir = join(cache_dir, 'license_index')

if not exists(license_index_cache_dir):
    fileutils.create_dir(license_index_cache_dir)

# minimum number of tokens a match should have to be considered as worthy keeping
MIN_MATCH_LENGTH = 4
MIN_MATCH_HIGH_LENGTH = 3

# maximum distance between two matches to merge
MAX_DIST = 120

Ejemplo n.º 38
0
from __future__ import print_function
from __future__ import absolute_import

from os.path import dirname
from os.path import abspath
from os.path import getsize
from os.path import getmtime
from os.path import join
from os.path import exists

from commoncode import fileutils


scan_src_dir = abspath(dirname(__file__))
src_dir = dirname(scan_src_dir)
root_dir = dirname(src_dir)
cache_dir = join(root_dir, '.cache')
scans_cache_dir = join(cache_dir, 'scan_results_caches')

if not exists(scans_cache_dir):
    fileutils.create_dir(scans_cache_dir)


from pkg_resources import get_distribution, DistributionNotFound
try:
    __version__ = get_distribution('scancode-toolkit').version
except DistributionNotFound:
    # package is not installed ??
    __version__ = '2.0.0'
Ejemplo n.º 39
0
from __future__ import print_function
from __future__ import absolute_import

from os.path import dirname
from os.path import abspath
from os.path import getsize
from os.path import getmtime
from os.path import join
from os.path import exists

from commoncode import fileutils


scan_src_dir = abspath(dirname(__file__))
src_dir = dirname(scan_src_dir)
root_dir = dirname(src_dir)
cache_dir = join(root_dir, '.cache')
scans_cache_dir = join(cache_dir, 'scan_results_caches')

if not exists(scans_cache_dir):
    fileutils.create_dir(scans_cache_dir)


from pkg_resources import get_distribution, DistributionNotFound
try:
    __version__ = get_distribution('scancode-toolkit').version
except DistributionNotFound:
    # package is not installed ??
    __version__ = '2.2.1'
Ejemplo n.º 40
0
    def load_or_build(
        licensedcode_cache_dir=licensedcode_cache_dir,
        scancode_cache_dir=scancode_cache_dir,
        check_consistency=SCANCODE_DEV_MODE,
        # used for testing only
        timeout=LICENSE_INDEX_LOCK_TIMEOUT,
        tree_base_dir=scancode_src_dir,
        licenses_data_dir=None,
        rules_data_dir=None,
    ):
        """
        Load or build and save and return a LicenseCache object.

        We either load a cached LicenseIndex or build and cache the index.
        On the side, we load cached or build license db, SPDX symbols and other
        license-related data structures.

        - If the cache does not exist, a new index is built and cached.
        - If `check_consistency` is True, the cache is checked for consistency and
          rebuilt if inconsistent or stale.
        - If `check_consistency` is False, the cache is NOT checked for consistency and
          if the cache files exist but ARE stale, the cache WILL NOT be rebuilt
        """
        idx_cache_dir = os.path.join(licensedcode_cache_dir, LICENSE_INDEX_DIR)
        create_dir(idx_cache_dir)
        cache_file = os.path.join(idx_cache_dir, LICENSE_INDEX_FILENAME)

        has_cache = os.path.exists(cache_file) and os.path.getsize(cache_file)

        # bypass check if no consistency check is needed
        if has_cache and not check_consistency:
            try:
                return load_cache_file(cache_file)
            except Exception as e:
                # work around some rare Windows quirks
                import traceback
                print(
                    'Inconsistent License cache: checking and rebuilding index.'
                )
                print(str(e))
                print(traceback.format_exc())

        from licensedcode.models import licenses_data_dir as ldd
        from licensedcode.models import rules_data_dir as rdd
        from licensedcode.models import load_licenses
        from scancode import lockfile

        licenses_data_dir = licenses_data_dir or ldd
        rules_data_dir = rules_data_dir or rdd

        lock_file = os.path.join(scancode_cache_dir, LICENSE_LOCKFILE_NAME)
        checksum_file = os.path.join(scancode_cache_dir, LICENSE_CHECKSUM_FILE)

        has_tree_checksum = os.path.exists(checksum_file)

        # here, we have no cache or we want a validity check: lock, check
        # and build or rebuild as needed
        try:
            # acquire lock and wait until timeout to get a lock or die
            with lockfile.FileLock(lock_file).locked(timeout=timeout):
                current_checksum = None
                # is the current cache consistent or stale?
                if has_cache and has_tree_checksum:
                    # if we have a saved cached index
                    # load saved tree_checksum and compare with current tree_checksum
                    with open(checksum_file) as etcs:
                        existing_checksum = etcs.read()

                    current_checksum = tree_checksum(
                        tree_base_dir=tree_base_dir)
                    if current_checksum == existing_checksum:
                        # The cache is consistent with the latest code and data
                        # load and return
                        return load_cache_file(cache_file)

                # Here, the cache is not consistent with the latest code and
                # data: It is either stale or non-existing: we need to
                # rebuild all cached data (e.g. mostly the index) and cache it

                licenses_db = load_licenses(
                    licenses_data_dir=licenses_data_dir)
                index = build_index(
                    licenses_db=licenses_db,
                    licenses_data_dir=licenses_data_dir,
                    rules_data_dir=rules_data_dir,
                )
                spdx_symbols = build_spdx_symbols(licenses_db=licenses_db)
                unknown_spdx_symbol = build_unknown_spdx_symbol(
                    licenses_db=licenses_db)
                licensing = build_licensing(licenses_db=licenses_db)

                license_cache = LicenseCache(
                    db=licenses_db,
                    index=index,
                    licensing=licensing,
                    spdx_symbols=spdx_symbols,
                    unknown_spdx_symbol=unknown_spdx_symbol,
                )

                # save the cache as pickle new tree checksum
                with open(cache_file, 'wb') as fn:
                    pickle.dump(license_cache, fn, protocol=PICKLE_PROTOCOL)

                current_checksum = tree_checksum(tree_base_dir=tree_base_dir)
                with open(checksum_file, 'w') as ctcs:
                    ctcs.write(current_checksum)

                return license_cache

        except lockfile.LockTimeout:
            # TODO: handle unable to lock in a nicer way
            raise
Ejemplo n.º 41
0
            )

        if spdx_text:
            yield Rule(
                text_file=join(license_obj.src_dir, license_obj.spdx_file),
                licenses=[license_key],
            )


text_tknzr, template_tknzr, _ = index.tokenizers()


# token caching
cache_dir = join(dirname(dirname(src_dir)), '.cache', 'license_tokens')
if not os.path.exists(cache_dir):
    fileutils.create_dir(cache_dir)


class RuleWithNoTokenError(Exception):
    pass


def get_tokens(location, template, use_cache=False):
    """
    Return a list of tokens from a from a file at location using the tokenizer
    function.
    """
    location = os.path.abspath(location)
    if not exists(location):
        raise RuleWithNoTokenError('Rule text location does not exist: %(location)r' % locals())
#        return []
Ejemplo n.º 42
0
 def dump(self):
     parent = fileutils.parent_directory(self.data_file)
     if not exists(parent):
         fileutils.create_dir(parent)
     with open(self.data_file, 'w') as df:
         df.write(saneyaml.dump(self.to_dict()))
Ejemplo n.º 43
0
    def write(self, target_dir, transform_path=lambda x: x, skip_links=True):
        """
        Write entry to a file or directory saved relatively to the `target_dir` and
        return the path where the file or directory was written or None if nothing
        was written to disk. `transform_path` is a callable taking a path and
        returning a transformed path such as resolving relative paths,
        transliterating non-portable characters or other path transformations.
        The default is a no-op lambda.
        """
        if TRACE:
            logger.debug('writing entry: {}'.format(self))

        if not self.archive.archive_struct:
            raise ArchiveErrorIllegalOperationOnClosedArchive()
        # skip links and special files
        if not (self.isfile or self.isdir):
            return

        if skip_links and self.issym:
            return

        if skip_links and self.issym:
            return
        if not skip_links and self.issym:
            raise NotImplemented(
                'extraction of sym links with librarchive is not yet implemented.'
            )

        abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
        # TODO: return some warning when original path has been transformed
        clean_path = transform_path(self.path)

        if self.isdir:
            # TODO: also rename directories to a new name if needed segment by segment
            dir_path = os.path.join(abs_target_dir, clean_path)
            fileutils.create_dir(dir_path)
            return dir_path

        # note: here isfile=True
        # create parent directories if needed
        target_path = os.path.join(abs_target_dir, clean_path)
        parent_path = os.path.dirname(target_path)

        # TODO: also rename directories to a new name if needed segment by segment
        fileutils.create_dir(parent_path)

        # TODO: return some warning when original path has been renamed?
        unique_path = extractcode.new_name(target_path, is_dir=False)
        if TRACE:
            logger.debug('path: \ntarget_path: {}\nunique_path: {}'.format(
                target_path, unique_path))

        with open(unique_path, 'wb') as target:
            for content in self.get_content():
                if TRACE_DEEP:
                    logger.debug('    chunk: {}'.format(repr(content)))
                target.write(content)

        os.utime(unique_path, (self.time, self.time))

        return target_path
Ejemplo n.º 44
0
from os.path import abspath
from os.path import getsize
from os.path import getmtime
from os.path import join
from os.path import exists

from commoncode import fileutils


lic_src_dir = abspath(dirname(__file__))
src_dir = dirname(lic_src_dir)
data_dir = join(lic_src_dir, 'data')
licenses_data_dir = join(data_dir, 'licenses')
rules_data_dir = join(data_dir, 'rules')
root_dir = dirname(src_dir)
cache_dir = join(root_dir, '.cache')
license_index_cache_dir = join(cache_dir, 'license_index')

if not exists(license_index_cache_dir):
    fileutils.create_dir(license_index_cache_dir)

# minimum number of tokens a match should have to be considered as worthy keeping
MIN_MATCH_LENGTH = 4
MIN_MATCH_HIGH_LENGTH = 3

# FIXME: we should consider the length of two rules and two matches when considering MAX_DIST
# eventually this should be skipped early right during the matching too
# maximum distance between two matches to merge
MAX_DIST = 120