コード例 #1
0
    def test_parent_directory_on_path_and_location_10(self):
        test_dir = self.get_test_loc('fileutils/basename')
        test_file = 'tst'
        expected_name = '/'
        result = fileutils.parent_directory(test_file)
        result = fileutils.as_posixpath(result)
        assert expected_name == result

        result = fileutils.parent_directory((os.path.join(test_dir, test_file)))
        result = fileutils.as_posixpath(result)
        assert result.endswith(expected_name)
コード例 #2
0
def check_files(test_dir, expected):
    """
    Walk test_dir.
    Check that all dirs are readable.
    Check that all files are:
     * non-special,
     * readable,
     * have a posix path that ends with one of the expected tuple paths.
    """
    result = []
    locs = []
    if filetype.is_file(test_dir):
        test_dir = fileutils.parent_directory(test_dir)

    test_dir_path = fileutils.as_posixpath(test_dir)
    for top, _, files in os.walk(test_dir):
        for f in files:
            location = os.path.join(top, f)
            locs.append(location)
            path = fileutils.as_posixpath(location)
            path = path.replace(test_dir_path, '').strip('/')
            result.append(path)

    assert sorted(expected) == sorted(result)

    for location in locs:
        assert filetype.is_file(location)
        assert not filetype.is_special(location)
        assert filetype.is_readable(location)
コード例 #3
0
    def test_extract_file_function(self):
        test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True)
        base = fileutils.parent_directory(test_file)
        expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt']
        cleaned_test_file = test_file.replace(base, '')
        expected_event = [
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=False, warnings=[], errors=[]
            ),
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=True, warnings=[], errors=[]
            )
        ]

        target = extractcode.get_extraction_path(test_file)
        result = list(extract.extract_file(test_file, target))
        result = [r._replace(
                    source=cleaned_test_file,
                    target=extractcode.get_extraction_path(cleaned_test_file))
                  for r in result]
        assert expected_event == result
        check_files(target, expected)
コード例 #4
0
 def test_fileutils_walk_can_walk_a_single_file(self):
     test_file = self.get_test_loc('fileutils/walk/f')
     result = list(fileutils.walk(test_file))
     expected = [
         (fileutils.parent_directory(test_file), [], ['f'])
     ]
     assert expected == result
コード例 #5
0
def new_name(location, is_dir=False):
    """
    Return a new non-existing location from a `location` usable to write a file
    or create directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the filename.

    The case of the filename is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique filename, this tries new names this way:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    if on_linux:
        location = path_to_bytes(location)
    location = location.rstrip(PATHS_SEPS)
    assert location

    parent = fileutils.parent_directory(location)

    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    filename = fileutils.file_name(location)

    # corner case
    if filename in (DOT, DOT):
        filename = UNDERSCORE

    # if unique, return this
    if filename.lower() not in siblings_lower:
        return os.path.join(parent, filename)

    # otherwise seek a unique name
    if is_dir:
        # directories do not have an "extension"
        base_name = filename
        ext = EMPTY_STRING
    else:
        base_name, dot, ext = filename.partition(DOT)
        if dot:
            ext = dot + ext
        else:
            base_name = filename
            ext = EMPTY_STRING

    # find a unique filename, adding a counter int to the base_name
    counter = 1
    while 1:
        filename = base_name + UNDERSCORE + str(counter) + ext
        if filename.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, filename)
コード例 #6
0
def test_paths_are_posix_paths_in_html_app_format_output(monkeypatch):
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    test_dir = test_env.get_test_loc('posix_path', copy=True)
    runner = CliRunner()
    result_file = test_env.get_temp_file(extension='html', file_name='test_html')
    result = runner.invoke(cli.scancode, [ '--copyright', '--format', 'html-app', test_dir, result_file])
    assert result.exit_code == 0
    assert 'Scanning done' in result.output
    # the data we want to test is in the data.json file
    data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.json')
    assert '/posix_path/copyright_acme_c-c.c' in open(data_file).read()
コード例 #7
0
def test_paths_are_posix_paths_in_html_app_format_output():
    test_dir = test_env.get_test_loc('templated/simple')
    result_file = test_env.get_temp_file(extension='html', file_name='test_html')

    result = run_scan_click(['--copyright', '--format', 'html-app', test_dir, result_file])
    assert result.exit_code == 0
    assert 'Scanning done' in result.output

    # the data we want to test is in the data.json file
    data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.json')
    assert '/copyright_acme_c-c.c' in open(data_file).read()
コード例 #8
0
 def test_parent_directory_on_path_and_location(self):
     test_dir = self.get_test_loc("fileutils/basename", copy=True)
     tests = [
         ("a/.a/file", "a/.a/"),
         ("a/.a/", "a/"),
         ("a/b/.a.b", "a/b/"),
         ("a/b/a.tag.gz", "a/b/"),
         ("a/b/", "a/"),
         ("a/f.a", "a/"),
         ("a/", "/"),
         ("f.a/a.c", "f.a/"),
         ("f.a/", "/"),
         ("tst", "/"),
     ]
     for test_file, name in tests:
         result = fileutils.parent_directory(test_file)
         assert name == result
         # also test on location
         result = fileutils.parent_directory((os.path.join(test_dir, test_file)))
         assert result.endswith(name)
コード例 #9
0
def parse(location):
    """
    Return a Package object from a composer.json file or None.
    """
    if not is_phpcomposer_json(location):
        return

    with codecs.open(location, encoding='utf-8') as loc:
        package_data = json.load(loc, object_pairs_hook=OrderedDict)

    base_dir = fileutils.parent_directory(location)
    metafile_name = fileutils.file_name(location)

    return build_package(package_data, base_dir, metafile_name)
コード例 #10
0
def new_name(location, is_dir=False):
    """
    Return a new non-existing location usable to write a file or create
    directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the name.
    The case of the name is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique name:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    
    location = location.rstrip('\\/')
    name = fileutils.file_name(location).strip()
    if (not name or name == '.' 
        # windows bare drive path as in c: or z:
        or (name and len(name)==2 and name.endswith(':'))):
        name = 'file'

    parent = fileutils.parent_directory(location)
    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    if name.lower() not in siblings_lower:
        return posixpath.join(parent, name)

    ext = fileutils.file_extension(name)
    base_name = fileutils.file_base_name(name)
    if is_dir:
        # directories have no extension
        ext = ''
        base_name = name

    counter = 1
    while True:
        new_name = base_name + '_' + str(counter) + ext
        if new_name.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, new_name)
コード例 #11
0
ファイル: cli.py プロジェクト: ocabrisses/scancode-toolkit
def _get_root_dir(input_path, strip_root=False, full_root=False):
    """
    Return a root dir name or None.
    On Windows, the path uses POSIX (forward slash) separators.
    """
    if strip_root:
        return

    scanned_path = os.path.abspath(os.path.normpath(os.path.expanduser(input_path)))
    scanned_path = fileutils.as_posixpath(scanned_path)
    if filetype.is_dir(scanned_path):
        root_dir = scanned_path
    else:
        root_dir = fileutils.parent_directory(scanned_path)
        root_dir = fileutils.as_posixpath(root_dir)

    if full_root:
        return root_dir
    else:
        return fileutils.file_name(root_dir)
コード例 #12
0
ファイル: format.py プロジェクト: 10imaging/scancode-toolkit
def as_template(scan_data, template='html'):
    """
    Return an string built from a list of results and the provided template.
    The template defaults to the standard HTML template format or can point to
    the path of a custom template file.
    """
    from licensedcode.models import get_license

    if template == 'html':
        template = get_template(get_template_dir('html'))
    else:
        # load a custom template
        tpath = fileutils.as_posixpath(abspath(expanduser(template)))
        assert isfile(tpath)
        tdir = fileutils.parent_directory(tpath)
        tfile = fileutils.file_name(tpath)
        template = get_template(tdir, tfile)

    converted = OrderedDict()
    converted_infos = OrderedDict()
    converted_packages = OrderedDict()
    licenses = {}

    # Create a flattened data dict keyed by location
    for scan_result in scan_data:
        location = scan_result['location']
        results = []
        if 'copyrights' in scan_result:
            for entry in scan_result['copyrights']:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'copyright',
                    # NOTE: we display one statement per line.
                    'value': '\n'.join(entry['statements']),
                })
        if 'licenses' in scan_result:
            for entry in scan_result['licenses']:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'license',
                    'value': entry['key'],
                })

                if entry['key'] not in licenses:
                    licenses[entry['key']] = entry
                    entry['object'] = get_license(entry['key'])
        if results:
            converted[location] = sorted(results, key=itemgetter('start'))

        if 'infos' in scan_result:
            converted_infos[location] = scan_result['infos']

        if 'packages' in scan_result:
            converted_packages[location] = scan_result['packages']

        licenses = OrderedDict(sorted(licenses.items()))

    results = {
        'license_copyright': converted,
        'infos': converted_infos,
        'packages': converted_packages
    }

    return template.render(results=results, licenses=licenses)
コード例 #13
0
def as_template(scanned_files, template):
    """
    Return an string built from a list of `scanned_files` results and
    the provided `template` identifier. The template defaults to the standard HTML
    template format or can point to the path of a custom template file.
    """
    # FIXME: This code is highly coupled with actual scans and may not
    # support adding new scans at all

    from licensedcode.cache import get_licenses_db

    # FIXME: factor out the html vs custom from this function: we should get a template path
    if template == 'html':
        template = get_template(get_template_dir('html'))
    else:
        # load a custom template
        tpath = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(template)))
        assert os.path.isfile(tpath)
        tdir = fileutils.parent_directory(tpath)
        tfile = fileutils.file_name(tpath)
        template = get_template(tdir, tfile)

    converted = OrderedDict()
    converted_infos = OrderedDict()
    converted_packages = OrderedDict()
    licenses = {}

    LICENSES = 'licenses'
    COPYRIGHTS = 'copyrights'
    PACKAGES = 'packages'
    URLS = 'urls'
    EMAILS = 'emails'

    # Create a flattened data dict keyed by path
    for scanned_file in scanned_files:
        path = scanned_file['path']
        results = []
        if COPYRIGHTS in scanned_file:
            for entry in scanned_file[COPYRIGHTS]:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'copyright',
                    # NOTE: we display one statement per line.
                    'value': '\n'.join(entry['statements']),
                })
        if LICENSES in scanned_file:
            for entry in scanned_file[LICENSES]:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'license',
                    'value': entry['key'],
                })

                # FIXME: we hsould NOT rely on license objects: only use what is in the JSON instead
                if entry['key'] not in licenses:
                    licenses[entry['key']] = entry
                    entry['object'] = get_licenses_db().get(entry['key'])
        if results:
            converted[path] = sorted(results, key=itemgetter('start'))

        # TODO: this is klunky: we need to drop templates entirely or we
        # should rather just pass a the list of files from the scan
        # results and let the template handle this rather than
        # denormalizing the list here??
        converted_infos[path] = OrderedDict()
        for name, value in scanned_file.items():
            if name in (LICENSES, PACKAGES, COPYRIGHTS, EMAILS, URLS):
                continue
            converted_infos[path][name] = value

        if PACKAGES in scanned_file:
            converted_packages[path] = scanned_file[PACKAGES]

        licenses = OrderedDict(sorted(licenses.items()))

    files = {
        'license_copyright': converted,
        'infos': converted_infos,
        'packages': converted_packages
    }

    return template.generate(files=files, licenses=licenses)
コード例 #14
0
ファイル: npm.py プロジェクト: 10imaging/scancode-toolkit
def parse(location):
    """
    Return a Package object from a package.json
    """
    if not is_package_json(location):
        return

    # mapping of top level package.json items to the Package object field name
    plain_fields = OrderedDict([
        ('name', 'name'),
        ('version', 'version'),
        ('description', 'summary'),
        ('keywords', 'keywords'),
        ('homepage', 'homepage_url'),
    ])

    # mapping of top level package.json items to a function accepting as arguments:
    # - the package.json element value and a Package Object to update
    field_mappers = OrderedDict([
        ('author', author_mapper),
        ('bugs', bugs_mapper),
        ('contributors', contributors_mapper),
        ('maintainers', maintainers_mapper),
        ('license', licensing_mapper),
        ('licenses', licensing_mapper),
        ('dependencies', dependencies_mapper),
        ('devDependencies', dev_dependencies_mapper),
        ('peerDependencies', peer_dependencies_mapper),
        ('optionalDependencies', optional_dependencies_mapper),
        ('url', url_mapper),
        ('dist', dist_mapper),
        ('repository', repository_mapper),
    ])

    with codecs.open(location, encoding='utf-8') as loc:
        data = json.load(loc, object_pairs_hook=OrderedDict)

    if not data.get('name') or not data.get('version'):
        # a package.json without name and version is not a usable NPM package
        return

    # a package.json is at the root of an NPM package
    base_dir = fileutils.parent_directory(location)
    package = NpmPackage(location=base_dir)
    package.metafile_locations = [location]

    for source, target in plain_fields.items():
        value = data.get(source)
        if value:
            if isinstance(value, basestring):
                value = value.strip()
            if value:
                setattr(package, target, value)

    for source, func in field_mappers.items():
        logger.debug('parse: %(source)r, %(func)r' % locals())
        value = data.get(source)
        if value:
            if isinstance(value, basestring):
                value = value.strip()
            if value:
                func(value, package)

    package.download_urls.append(public_download_url(package.name, package.version))
    package.metafile_locations.append(location)
    return package