Exemple #1
0
    def test_parent_directory_on_path_and_location_10(self):
        test_dir = self.get_test_loc('fileutils/basename')
        test_file = 'tst'
        expected_name = '/'
        result = fileutils.parent_directory(test_file)
        result = fileutils.as_posixpath(result)
        assert expected_name == result

        result = fileutils.parent_directory((os.path.join(test_dir, test_file)))
        result = fileutils.as_posixpath(result)
        assert result.endswith(expected_name)
    def recognize(cls, location):
        if not cls.is_manifest(location):
            return

        # Thanks to Starlark being a Python dialect, we can use the `ast`
        # library to parse it
        with open(location, 'rb') as f:
            tree = ast.parse(f.read())

        build_rules = defaultdict(list)
        for statement in tree.body:
            # We only care about function calls or assignments to functions whose
            # names ends with one of the strings in `rule_types`
            if (isinstance(statement, ast.Expr)
                    or isinstance(statement, ast.Call)
                    or isinstance(statement, ast.Assign)
                    and isinstance(statement.value, ast.Call)
                    and isinstance(statement.value.func, ast.Name)):
                rule_name = statement.value.func.id
                # Ensure that we are only creating packages from the proper
                # build rules
                if not check_rule_name_ending(rule_name):
                    continue
                # Process the rule arguments
                args = {}
                for kw in statement.value.keywords:
                    arg_name = kw.arg
                    if isinstance(kw.value, ast.Str):
                        args[arg_name] = kw.value.s
                    if isinstance(kw.value, ast.List):
                        # We collect the elements of a list if the element is not a function call
                        args[arg_name] = [
                            elt.s for elt in kw.value.elts
                            if not isinstance(elt, ast.Call)
                        ]
                if args:
                    build_rules[rule_name].append(args)

        if build_rules:
            for rule_name, rule_instances_args in build_rules.items():
                for args in rule_instances_args:
                    name = args.get('name')
                    if not name:
                        continue
                    license_files = args.get('licenses')
                    yield cls(name=name,
                              declared_license=license_files,
                              root_path=fileutils.parent_directory(location))
        else:
            # If we don't find anything in the manifest file, we yield a Package with
            # the parent directory as the name
            yield cls(
                name=fileutils.file_name(fileutils.parent_directory(location)))
Exemple #3
0
def get_maven_pom(location=None, text=None, check_is_pom=False, extra_properties=None):
    """
    Return a MavenPom object from a POM file at `location` or provided as a
    `text` string.
    """
    if location and check_is_pom and not is_pom(location):
        return
    pom = MavenPom(location, text)
    if not extra_properties:
        extra_properties = {}
    # do we have a pom.properties file side-by-side?
    if location and os.path.exists(location):
        parent = fileutils.parent_directory(location)
        pom_properties = os.path.join(parent, 'pom.properties')
        if os.path.exists(pom_properties):
            with open(pom_properties) as props:
                properties = javaproperties.load(props) or {}
                if TRACE:
                    logger.debug('_get_mavenpom: properties: {}'.format(repr(properties)))
            extra_properties.update(properties)
    pom.resolve(**extra_properties)
    # TODO: we cannot do much without these??
    if check_is_pom and not has_basic_pom_attributes(pom):
        if TRACE:
            logger.debug('_get_mavenpom: has_basic_pom_attributes: {}'.format(
                has_basic_pom_attributes(pom)))
        return
    return pom
def check_files(test_dir, expected):
    """
    Walk test_dir.
    Check that all dirs are readable.
    Check that all files are:
     * non-special,
     * readable,
     * have a posix path that ends with one of the expected tuple paths.
    """
    result = []
    locs = []
    if filetype.is_file(test_dir):
        test_dir = fileutils.parent_directory(test_dir)

    test_dir_path = fileutils.as_posixpath(test_dir)
    for top, _, files in os.walk(test_dir):
        for f in files:
            location = os.path.join(top, f)
            locs.append(location)
            path = fileutils.as_posixpath(location)
            path = path.replace(test_dir_path, '').strip('/')
            result.append(path)

    assert sorted(expected) == sorted(result)

    for location in locs:
        assert filetype.is_file(location)
        assert not filetype.is_special(location)
        assert filetype.is_readable(location)
Exemple #5
0
def parse2(location):
    """
    Parse using the pkginfo library according the file types and return package.
    """
    is_dir = filetype.is_dir(location)
    if is_dir:
        parser = parse_unpackaged_source
        package = parser(location)
        if package:
            parse_dependencies(location, package)
            return package
    else:
        file_name = fileutils.file_name(location)
        parsers = {
            'setup.py': parse_unpackaged_source,
            '.whl': parse_wheel,
            '.egg': parse_egg_binary,
            '.tar.gz': parse_source_distribution,
            '.zip': parse_source_distribution,
        }
        for name, parser in parsers.items():
            if file_name.endswith(name):
                package = parser(location)
                if package:
                    parent_directory = fileutils.parent_directory(location)
                    parse_dependencies(parent_directory, package)
                    return package
Exemple #6
0
def get_maven_pom(location=None):
    """
    Return a MavenPom object from a POM file at `location` or provided as a
    `text` string.
    """
    pom = MavenPom(location=location)

    extra_properties = {}

    # do we have a pom.properties file side-by-side?
    # FIXME: we should treat pom.properties as a datafile
    if location and os.path.exists(location):
        parent = fileutils.parent_directory(location)
        pom_properties = os.path.join(parent, 'pom.properties')
        if os.path.exists(pom_properties):
            with open(pom_properties) as props:
                properties = javaproperties.load(props) or {}
                if TRACE:
                    logger.debug(f'get_maven_pom: properties: {properties!r}')
            extra_properties.update(properties)
    pom.resolve(**extra_properties)
    # TODO: we cannot do much without these??
    hbpa = has_basic_pom_attributes(pom)

    if not hbpa:
        if TRACE:
            logger.debug(f'get_maven_pom: has_basic_pom_attributes: {hbpa}')
        return
    return pom
Exemple #7
0
def parse(location):
    """
    Return a Package built from parsing a file or directory at 'location'
    """
    if filetype.is_dir(location):
        package = parse_unpackaged_source(location)
        if package:
            parse_dependencies(location, package)
            return package
    else:
        file_name = fileutils.file_name(location)
        parsers = {
            'setup.py': parse_setup_py,
            'requirements.txt': parse_requirements_txt,
            'requirements.in': parse_requirements_txt,
            'Pipfile.lock': parse_pipfile_lock,
            'metadata.json': parse_metadata,
            'PKG-INFO': parse_unpackaged_source,
            '.whl': parse_wheel,
            '.egg': parse_egg_binary,
            '.tar.gz': parse_source_distribution,
            '.zip': parse_source_distribution,
        }
        for name, parser in parsers.items():
            if file_name.endswith(name):
                package = parser(location)
                if package:
                    parent_directory = fileutils.parent_directory(location)
                    parse_dependencies(parent_directory, package)
                    return package
Exemple #8
0
 def test_fileutils_walk_can_walk_a_single_file(self):
     test_file = self.get_test_loc('fileutils/walk/f')
     result = list(fileutils.walk(test_file))
     expected = [
         (fileutils.parent_directory(test_file), [], ['f'])
     ]
     assert expected == result
    def test_extract_file_function(self):
        test_file = self.get_test_loc('extract/basic_non_nested.tar.gz',
                                      copy=True)
        base = fileutils.parent_directory(test_file)
        expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt']
        cleaned_test_file = test_file.replace(base, '')
        expected_event = [
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=False,
                warnings=[],
                errors=[]),
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=True,
                warnings=[],
                errors=[])
        ]

        target = extractcode.get_extraction_path(test_file)
        result = list(extract.extract_file(test_file, target))
        result = [
            r._replace(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file))
            for r in result
        ]
        assert expected_event == result
        check_files(target, expected)
def check_files(test_dir, expected):
    """
    Walk test_dir.
    Check that all dirs are readable.
    Check that all files are:
     * non-special,
     * readable,
     * have a posix path that ends with one of the expected tuple paths.
    """
    result = []
    locs = []
    if filetype.is_file(test_dir):
        test_dir = fileutils.parent_directory(test_dir)

    test_dir_path = fileutils.as_posixpath(test_dir)
    for top, _, files in os.walk(test_dir):
        for f in files:
            location = os.path.join(top, f)
            locs.append(location)
            path = fileutils.as_posixpath(location)
            path = path.replace(test_dir_path, '').strip('/')
            result.append(path)

    assert sorted(expected) == sorted(result)

    for location in locs:
        assert filetype.is_file(location)
        assert not filetype.is_special(location)
        assert filetype.is_readable(location)
    def test_extract_file_function(self):
        test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True)
        base = fileutils.parent_directory(test_file)
        expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt']
        cleaned_test_file = test_file.replace(base, '')
        expected_event = [
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=False, warnings=[], errors=[]
            ),
            extract.ExtractEvent(
                source=cleaned_test_file,
                target=extractcode.get_extraction_path(cleaned_test_file),
                done=True, warnings=[], errors=[]
            )
        ]

        target = extractcode.get_extraction_path(test_file)
        result = list(extract.extract_file(test_file, target))
        result = [r._replace(
                    source=cleaned_test_file,
                    target=extractcode.get_extraction_path(cleaned_test_file))
                  for r in result]
        assert expected_event == result
        check_files(target, expected)
def parse(location):
    """
    Return a Package object from a composer.json file or None.
    """
    if not is_phpcomposer_json(location):
        return
    # mapping of top level composer.json items to the Package object field name
    plain_fields = OrderedDict([
        ('name', 'name'),
        ('description', 'summary'),
        ('keywords', 'keywords'),
        ('version', 'version'),
        ('homepage', 'homepage_url'),
    ])

    # mapping of top level composer.json items to a function accepting as arguments
    # the composer.json element value and returning an iterable of key, values Package Object to update
    field_mappers = OrderedDict([
        ('authors', author_mapper),
        ('license', licensing_mapper),
        ('require', dependencies_mapper),
        ('require-dev', dev_dependencies_mapper),
        ('repositories', repository_mapper),
        ('support', support_mapper),
    ])

    with codecs.open(location, encoding='utf-8') as loc:
        data = json.load(loc, object_pairs_hook=OrderedDict)

    if not data.get('name') or not data.get('description'):
        # a composer.json without name and description is not a usable PHP composer package
        # name and description fields are required: https://getcomposer.org/doc/04-schema.md#name
        return

    package = PHPComposerPackage()
    # a composer.json is at the root of a PHP composer package
    base_dir = fileutils.parent_directory(location)
    package.location = base_dir
    package.metafile_locations = [location]

    for source, target in plain_fields.items():
        value = data.get(source)
        if value:
            if isinstance(value, basestring):
                value = value.strip()
            if value:
                setattr(package, target, value)

    for source, func in field_mappers.items():
        logger.debug('parse: %(source)r, %(func)r' % locals())
        value = data.get(source)
        if value:
            if isinstance(value, basestring):
                value = value.strip()
            if value:
                func(value, package)
    vendor_mapper(package)  # Parse vendor from name value
    return package
Exemple #13
0
 def is_datafile(cls, location, filetypes=tuple()):
     """
     Return True if `location` path is for a Chef metadata.json file. The
     metadata.json is/was also used in Python legacy wheels in a 'dist-info'
     directory.
     """
     if super().is_datafile(location, filetypes=filetypes):
         parent = fileutils.file_name(fileutils.parent_directory(location))
         return not parent.endswith('dist-info')
def new_name(location, is_dir=False):
    """
    Return a new non-existing location from a `location` usable to write a file
    or create directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the filename.

    The case of the filename is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique filename, this tries new names this way:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    if on_linux:
        location = path_to_bytes(location)
    location = location.rstrip(PATHS_SEPS)
    assert location

    parent = fileutils.parent_directory(location)

    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    filename = fileutils.file_name(location)

    # corner case
    if filename in (DOT, DOT):
        filename = UNDERSCORE

    # if unique, return this
    if filename.lower() not in siblings_lower:
        return os.path.join(parent, filename)

    # otherwise seek a unique name
    if is_dir:
        # directories do not have an "extension"
        base_name = filename
        ext = EMPTY_STRING
    else:
        base_name, dot, ext = filename.partition(DOT)
        if dot:
            ext = dot + ext
        else:
            base_name = filename
            ext = EMPTY_STRING

    # find a unique filename, adding a counter int to the base_name
    counter = 1
    while 1:
        filename = base_name + UNDERSCORE + str(counter) + ext
        if filename.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, filename)
def new_name(location, is_dir=False):
    """
    Return a new non-existing location from a `location` usable to write a file
    or create directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the filename.

    The case of the filename is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique filename, this tries new names this way:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    if on_linux:
        location = fsencode(location)
    location = location.rstrip(PATHS_SEPS)
    assert location

    parent = parent_directory(location)

    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    filename = file_name(location)

    # corner case
    if filename in (DOT, DOT):
        filename = UNDERSCORE

    # if unique, return this
    if filename.lower() not in siblings_lower:
        return join(parent, filename)

    # otherwise seek a unique name
    if is_dir:
        # directories do not have an "extension"
        base_name = filename
        ext = EMPTY_STRING
    else:
        base_name, dot, ext = filename.partition(DOT)
        if dot:
            ext = dot + ext
        else:
            base_name = filename
            ext = EMPTY_STRING

    # find a unique filename, adding a counter int to the base_name
    counter = 1
    while 1:
        filename = base_name + UNDERSCORE + str(counter) + ext
        if filename.lower() not in siblings_lower:
            break
        counter += 1
    return join(parent, filename)
def is_metadata_json(location):
    """
    Return True if `location` path is for a Chef metadata.json file.
    The metadata.json is also used in Python installed packages in a 'dist-info'
    directory.
    """
    return (filetype.is_file(location)
            and fileutils.file_name(location).lower() == 'metadata.json'
            and not fileutils.file_name(fileutils.parent_directory(
                location)).lower().endswith('dist-info'))
 def test_parent_directory_on_path_and_location(self):
     test_dir = self.get_test_loc("fileutils/basename", copy=True)
     tests = [
         ("a/.a/file", "a/.a/"),
         ("a/.a/", "a/"),
         ("a/b/.a.b", "a/b/"),
         ("a/b/a.tag.gz", "a/b/"),
         ("a/b/", "a/"),
         ("a/f.a", "a/"),
         ("a/", "/"),
         ("f.a/a.c", "f.a/"),
         ("f.a/", "/"),
         ("tst", "/"),
     ]
     for test_file, name in tests:
         result = fileutils.parent_directory(test_file)
         assert name == result
         # also test on location
         result = fileutils.parent_directory((os.path.join(test_dir, test_file)))
         assert result.endswith(name)
def test_paths_are_posix_paths_in_html_app_format_output(monkeypatch):
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    test_dir = test_env.get_test_loc('posix_path', copy=True)
    runner = CliRunner()
    result_file = test_env.get_temp_file(extension='html', file_name='test_html')
    result = runner.invoke(cli.scancode, [ '--copyright', '--format', 'html-app', test_dir, result_file])
    assert result.exit_code == 0
    assert 'Scanning done' in result.output
    # the data we want to test is in the data.json file
    data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.json')
    assert '/posix_path/copyright_acme_c-c.c' in open(data_file).read()
def check_files(test_dir, expected, regen=False):
    """
    Walk test_dir.
    Check that all dirs are readable.
    Check that all files are:
     * non-special,
     * readable,
     * have a posix path that ends with one of the expected tuple paths.
    """
    result = []
    locs = []
    if filetype.is_file(test_dir):
        test_dir = fileutils.parent_directory(test_dir)

    test_dir_path = fileutils.as_posixpath(test_dir)
    for top, _, files in os.walk(test_dir):
        for f in files:
            location = os.path.join(top, f)
            locs.append(location)
            path = fileutils.as_posixpath(location)
            path = path.replace(test_dir_path, '').strip('/')
            result.append(path)

    expected_is_json_file = False
    if not isinstance(expected, (list, tuple)) and expected.endswith('.json'):
        expected_is_json_file = True
        # this is a path to a JSON file
        if regen:
            wmode = 'wb' if py2 else 'w'
            with open(expected, wmode) as ex:
                json.dump(result, ex, indent=2, separators=(',', ':'))
            expected_content = result
        else:
            with open(expected, 'rb') as ex:
                expected_content = json.load(ex, encoding='utf-8', object_pairs_hook=OrderedDict)
    else:
        expected_content = expected

    expected_content = sorted(expected_content)
    result = sorted(result)

    try:
        assert expected_content == result
    except AssertionError:
        files = [
            'test_dir: file://{}'.format(test_dir),
            'expected: file://{}'.format(expected if expected_is_json_file else ''),
        ]
        assert files + expected_content == result

    for location in locs:
        assert filetype.is_file(location)
        assert not filetype.is_special(location)
        assert filetype.is_readable(location)
def test_paths_are_posix_paths_in_html_app_format_output():
    test_dir = test_env.get_test_loc('templated/simple')
    result_file = test_env.get_temp_file(extension='html', file_name='test_html')

    result = run_scan_click(['--copyright', '--format', 'html-app', test_dir, result_file])
    assert result.exit_code == 0
    assert 'Scanning done' in result.output

    # the data we want to test is in the data.json file
    data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.json')
    assert '/copyright_acme_c-c.c' in open(data_file).read()
 def test_parent_directory_on_path_and_location(self):
     test_dir = self.get_test_loc('fileutils/basename', copy=True)
     tests = [
         ('a/.a/file', 'a/.a/'),
         ('a/.a/', 'a/'),
         ('a/b/.a.b', 'a/b/'),
         ('a/b/a.tag.gz', 'a/b/'),
         ('a/b/', 'a/'),
         ('a/f.a', 'a/'),
         ('a/', '/'),
         ('f.a/a.c', 'f.a/'),
         ('f.a/', '/'),
         ('tst', '/'),
     ]
     for test_file, name in tests:
         result = fileutils.parent_directory(test_file)
         assert name == result
         # also test on location
         result = fileutils.parent_directory(
             (os.path.join(test_dir, test_file)))
         assert result.endswith(name)
Exemple #22
0
def parse_metadata(location):
    """
    Return a Package object from the Python wheel 'metadata.json' file
    at 'location' or None. Check if the parent directory of 'location'
    contains both a 'METADATA' and a 'DESCRIPTION.rst' file to ensure
    this is a proper metadata.json file.
    """
    if not location or not location.endswith('metadata.json'):
        if TRACE: logger_debug('parse_metadata: not metadata.json:', location)
        return
    parent_dir = fileutils.parent_directory(location)
    # FIXME: is the absence of these two files a show stopper?
    paths = [
        os.path.join(parent_dir, n) for n in ('METADATA', 'DESCRIPTION.rst')
    ]
    if not all(os.path.exists(p) for p in paths):
        if TRACE: logger_debug('parse_metadata: not extra paths', paths)
        return

    with open(location, 'rb') as infs:
        infos = json.load(infs)

    extensions = infos.get('extensions')
    if TRACE: logger_debug('parse_metadata: extensions:', extensions)
    details = extensions and extensions.get('python.details')
    urls = details and details.get('project_urls')
    homepage_url = urls and urls.get('Home')

    parties = []
    if TRACE:
        logger_debug('parse_metadata: contacts:', details.get('contacts'))
    contacts = details and details.get('contacts') or []
    for contact in contacts:
        if TRACE: logger_debug('parse_metadata: contact:', contact)
        name = contact and contact.get('name')
        if not name:
            if TRACE: logger_debug('parse_metadata: no name:', contact)
            continue
        parties.append(
            models.Party(type=models.party_person, name=name, role='contact'))

    description = build_description(infos.get('summary'),
                                    infos.get('description'))

    package = PythonPackage(
        name=infos.get('name'),
        version=infos.get('version'),
        description=description or None,
        declared_license=infos.get('license') or None,
        homepage_url=homepage_url or None,
        parties=parties,
    )
    return package
def build_package_name(input_path):
    """
    Return a package name built from an ``input_path`` path.

    """
    if input_path:
        absinput = absinput = os.path.abspath(input_path)
        if os.path.isfile(absinput):
            input_path = parent_directory(absinput)
        return python_safe_name(file_name(input_path))

    return 'scancode-toolkit-analyzed-package'
def test_paths_are_posix_paths_in_html_app_format_output():
    test_dir = test_env.get_test_loc('templated/simple')
    result_file = test_env.get_temp_file(extension='html',
                                         file_name='test_html')
    run_scan_click(['--copyright', test_dir, '--html-app', result_file])
    # the data we want to test is in the data.js file
    data_file = os.path.join(fileutils.parent_directory(result_file),
                             'test_html_files', 'data.js')
    with io.open(data_file, encoding='utf-8') as res:
        results = res.read()
    assert '/copyright_acme_c-c.c' in results
    results = open(result_file).read()
    assert __version__ in results
Exemple #25
0
def parse(location):
    """
    Return a Package object from a composer.json file or None.
    """
    if not is_phpcomposer_json(location):
        return

    with codecs.open(location, encoding='utf-8') as loc:
        package_data = json.load(loc, object_pairs_hook=OrderedDict)

    base_dir = fileutils.parent_directory(location)
    metafile_name = fileutils.file_name(location)

    return build_package(package_data, base_dir, metafile_name)
Exemple #26
0
    def parse(cls, location):
        with open(location, encoding='utf-8') as loc:
            readme_manifest = loc.read()

        package_data = build_package(readme_manifest)

        if not package_data.name:
            # If no name was detected for the Package, then we use the basename
            # of the parent directory as the Package name
            parent_dir = fileutils.parent_directory(location)
            parent_dir_basename = fileutils.file_base_name(parent_dir)
            package_data.name = parent_dir_basename

        yield package_data
Exemple #27
0
def test_paths_are_posix_paths_in_html_app_format_output():
    test_dir = test_env.get_test_loc('posix_path')
    result_file = test_env.get_temp_file(extension='html',
                                         file_name='test_html')

    result = run_scan_click(
        ['--copyright', '--format', 'html-app', test_dir, result_file])
    assert result.exit_code == 0
    assert 'Scanning done' in result.output

    # the data we want to test is in the data.json file
    data_file = os.path.join(fileutils.parent_directory(result_file),
                             'test_html_files', 'data.json')
    assert 'copyright_acme_c-c.c' in open(data_file).read()
def parse(location):
    """
    Return a Package object from a composer.json file or None.
    """
    if not is_phpcomposer_json(location):
        return

    with codecs.open(location, encoding='utf-8') as loc:
        package_data = json.load(loc, object_pairs_hook=OrderedDict)

    base_dir = fileutils.parent_directory(location)
    metafile_name = fileutils.file_name(location)

    return build_package(package_data, base_dir, metafile_name)
Exemple #29
0
 def populate(self, base_dir):
     """
     Collect the `base_dir` for image repositories.
     """
     for fil in fileutils.file_iter(base_dir):
         # FIXME: we are only looking at V11 repos for now.
         fn = fileutils.file_name(fil)
         if not fn == MANIFEST_JSON_FILE:
             continue
         rd = parent_directory(fil)
         repo = Repository()
         repo.load_manifest(rd)
         logger_debug('populate: path: %(fn)r' % locals())
         self.repositories[rd] = repo
    def recognize(cls, location):
        if not cls.is_manifest(location):
            return

        # we use the parent directory as a name
        name = fileutils.file_name(fileutils.parent_directory(location))
        # we could use checksums as version in the future
        version = None

        # there is an optional array of license file names in targets that we could use
        # declared_license = None
        # there is are dependencies we could use
        # dependencies = []
        yield cls(name=name, version=version)
Exemple #31
0
def get_template(location):
    """
    Return a Jinja template object loaded from the file at `location`.
    """
    from jinja2 import Environment, FileSystemLoader

    location = as_posixpath(abspath(expanduser(location)))
    assert isfile(location)

    template_dir = parent_directory(location)
    env = Environment(loader=FileSystemLoader(template_dir))

    template_name = file_name(location)
    return env.get_template(template_name)
Exemple #32
0
def test_paths_are_posix_paths_in_html_app_format_output(monkeypatch):
    monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True)
    test_dir = test_env.get_test_loc('posix_path', copy=True)
    runner = CliRunner()
    result_file = test_env.get_temp_file(extension='html',
                                         file_name='test_html')
    result = runner.invoke(
        cli.scancode,
        ['--copyright', '--format', 'html-app', test_dir, result_file])
    assert result.exit_code == 0
    assert 'Scanning done' in result.output
    # the data we want to test is in the data.json file
    data_file = os.path.join(fileutils.parent_directory(result_file),
                             'test_html_files', 'data.json')
    assert '/posix_path/copyright_acme_c-c.c' in open(data_file).read()
Exemple #33
0
def _get_root_dir(input_path, strip_root=False):
    """
    Return a root dir name or None.
    """
    if strip_root:
        root_dir = None
    else:
        _scanned_path = os.path.abspath(os.path.normpath(os.path.expanduser(input_path)))
        if filetype.is_dir(_scanned_path):
            root_dir = _scanned_path
        else:
            root_dir = fileutils.parent_directory(_scanned_path)
        root_dir = fileutils.file_name(root_dir)

    return root_dir
Exemple #34
0
def get_description(metainfo, location=None):
    """
    Return a list of keywords found in a ``metainfo`` object or mapping.
    """
    description = None
    # newer metadata versions use the payload for the description
    if hasattr(metainfo, 'get_payload'):
        description = metainfo.get_payload()
    if not description:
        # legacymetadata versions use the Description for the description
        description = get_attribute(metainfo, 'Description')
        if not description and location:
            # older metadata versions can use a DESCRIPTION.rst file
            description = get_legacy_description(
                fileutils.parent_directory(location))

    summary = get_attribute(metainfo, 'Summary')
    return build_description(summary, description)
Exemple #35
0
    def parse(cls, location):
        # we use the parent directory as a package name
        name = fileutils.file_name(fileutils.parent_directory(location))
        # we could use checksums as version in the future
        version = None

        # there is an optional array of license file names in targets that we could use
        # declared_license = None

        # there are dependencies we could use
        # dependencies = []

        yield models.PackageData(
            datasource_id=cls.datasource_id,
            type=cls.default_package_type,
            name=name,
            version=version,
        )
    def recognize(cls, location):
        """
        Yield one or more Package manifest objects given a file ``location`` pointing to a
        package archive, manifest or similar.
        """
        with open(location, encoding='utf-8') as loc:
            readme_manifest = loc.read()

        package = build_package(cls, readme_manifest)

        if not package.name:
            # If no name was detected for the Package, then we use the basename of
            # the parent directory as the Package name
            parent_dir = fileutils.parent_directory(location)
            parent_dir_basename = fileutils.file_base_name(parent_dir)
            package.name = parent_dir_basename

        yield package
def new_name(location, is_dir=False):
    """
    Return a new non-existing location usable to write a file or create
    directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the name.
    The case of the name is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique name:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location

    location = location.rstrip('\\/')
    name = fileutils.file_name(location).strip()
    if (not name or name == '.'
            # windows bare drive path as in c: or z:
            or (name and len(name) == 2 and name.endswith(':'))):
        name = 'file'

    parent = fileutils.parent_directory(location)
    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    if name.lower() not in siblings_lower:
        return posixpath.join(parent, name)

    ext = fileutils.file_extension(name)
    base_name = fileutils.file_base_name(name)
    if is_dir:
        # directories have no extension
        ext = ''
        base_name = name

    counter = 1
    while True:
        new_name = base_name + '_' + str(counter) + ext
        if new_name.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, new_name)
def new_name(location, is_dir=False):
    """
    Return a new non-existing location usable to write a file or create
    directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the name.
    The case of the name is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique name:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    
    location = location.rstrip('\\/')
    name = fileutils.file_name(location).strip()
    if (not name or name == '.' 
        # windows bare drive path as in c: or z:
        or (name and len(name)==2 and name.endswith(':'))):
        name = 'file'

    parent = fileutils.parent_directory(location)
    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    if name.lower() not in siblings_lower:
        return posixpath.join(parent, name)

    ext = fileutils.file_extension(name)
    base_name = fileutils.file_base_name(name)
    if is_dir:
        # directories have no extension
        ext = ''
        base_name = name

    counter = 1
    while True:
        new_name = base_name + '_' + str(counter) + ext
        if new_name.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, new_name)
Exemple #39
0
def _get_root_dir(input_path, strip_root=False, full_root=False):
    """
    Return a root dir name or None.
    On Windows, the path uses POSIX (forward slash) separators.
    """
    if strip_root:
        return

    scanned_path = os.path.abspath(os.path.normpath(os.path.expanduser(input_path)))
    scanned_path = fileutils.as_posixpath(scanned_path)
    if filetype.is_dir(scanned_path):
        root_dir = scanned_path
    else:
        root_dir = fileutils.parent_directory(scanned_path)
        root_dir = fileutils.as_posixpath(root_dir)

    if full_root:
        return root_dir
    else:
        return fileutils.file_name(root_dir)
Exemple #40
0
def as_template(scan_data, template='html'):
    """
    Return an string built from a list of results and the provided template.
    The template defaults to the standard HTML template format or can point to
    the path of a custom template file.
    """
    from licensedcode.models import get_license

    if template == 'html':
        template = get_template(get_template_dir('html'))
    else:
        # load a custom template
        tpath = fileutils.as_posixpath(abspath(expanduser(template)))
        assert isfile(tpath)
        tdir = fileutils.parent_directory(tpath)
        tfile = fileutils.file_name(tpath)
        template = get_template(tdir, tfile)

    converted = OrderedDict()
    converted_infos = OrderedDict()
    converted_packages = OrderedDict()
    licenses = {}

    # Create a flattened data dict keyed by location
    for scan_result in scan_data:
        location = scan_result['location']
        results = []
        if 'copyrights' in scan_result:
            for entry in scan_result['copyrights']:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'copyright',
                    # NOTE: we display one statement per line.
                    'value': '\n'.join(entry['statements']),
                })
        if 'licenses' in scan_result:
            for entry in scan_result['licenses']:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'license',
                    'value': entry['key'],
                })

                if entry['key'] not in licenses:
                    licenses[entry['key']] = entry
                    entry['object'] = get_license(entry['key'])
        if results:
            converted[location] = sorted(results, key=itemgetter('start'))

        if 'infos' in scan_result:
            converted_infos[location] = scan_result['infos']

        if 'packages' in scan_result:
            converted_packages[location] = scan_result['packages']

        licenses = OrderedDict(sorted(licenses.items()))

    results = {
        'license_copyright': converted,
        'infos': converted_infos,
        'packages': converted_packages
    }

    return template.render(results=results, licenses=licenses)
Exemple #41
0
def parse(location):
    """
    Return a Package object from a package.json
    """
    if not is_package_json(location):
        return

    # mapping of top level package.json items to the Package object field name
    plain_fields = OrderedDict([
        ('name', 'name'),
        ('version', 'version'),
        ('description', 'summary'),
        ('keywords', 'keywords'),
        ('homepage', 'homepage_url'),
    ])

    # mapping of top level package.json items to a function accepting as arguments:
    # - the package.json element value and a Package Object to update
    field_mappers = OrderedDict([
        ('author', author_mapper),
        ('bugs', bugs_mapper),
        ('contributors', contributors_mapper),
        ('maintainers', maintainers_mapper),
        ('license', licensing_mapper),
        ('licenses', licensing_mapper),
        ('dependencies', dependencies_mapper),
        ('devDependencies', dev_dependencies_mapper),
        ('peerDependencies', peer_dependencies_mapper),
        ('optionalDependencies', optional_dependencies_mapper),
        ('url', url_mapper),
        ('dist', dist_mapper),
        ('repository', repository_mapper),
    ])

    with codecs.open(location, encoding='utf-8') as loc:
        data = json.load(loc, object_pairs_hook=OrderedDict)

    if not data.get('name') or not data.get('version'):
        # a package.json without name and version is not a usable NPM package
        return

    # a package.json is at the root of an NPM package
    base_dir = fileutils.parent_directory(location)
    package = NpmPackage(location=base_dir)
    package.metafile_locations = [location]

    for source, target in plain_fields.items():
        value = data.get(source)
        if value:
            if isinstance(value, basestring):
                value = value.strip()
            if value:
                setattr(package, target, value)

    for source, func in field_mappers.items():
        logger.debug('parse: %(source)r, %(func)r' % locals())
        value = data.get(source)
        if value:
            if isinstance(value, basestring):
                value = value.strip()
            if value:
                func(value, package)

    package.download_urls.append(public_download_url(package.name, package.version))
    package.metafile_locations.append(location)
    return package
def as_template(scanned_files, template):
    """
    Return an string built from a list of `scanned_files` results and
    the provided `template` identifier. The template defaults to the standard HTML
    template format or can point to the path of a custom template file.
    """
    # FIXME: This code is highly coupled with actual scans and may not
    # support adding new scans at all

    from licensedcode.cache import get_licenses_db

    # FIXME: factor out the html vs custom from this function: we should get a template path
    if template == 'html':
        template = get_template(get_template_dir('html'))
    else:
        # load a custom template
        tpath = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(template)))
        assert os.path.isfile(tpath)
        tdir = fileutils.parent_directory(tpath)
        tfile = fileutils.file_name(tpath)
        template = get_template(tdir, tfile)

    converted = OrderedDict()
    converted_infos = OrderedDict()
    converted_packages = OrderedDict()
    licenses = {}

    LICENSES = 'licenses'
    COPYRIGHTS = 'copyrights'
    PACKAGES = 'packages'
    URLS = 'urls'
    EMAILS = 'emails'

    # Create a flattened data dict keyed by path
    for scanned_file in scanned_files:
        path = scanned_file['path']
        results = []
        if COPYRIGHTS in scanned_file:
            for entry in scanned_file[COPYRIGHTS]:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'copyright',
                    # NOTE: we display one statement per line.
                    'value': '\n'.join(entry['statements']),
                })
        if LICENSES in scanned_file:
            for entry in scanned_file[LICENSES]:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'license',
                    'value': entry['key'],
                })

                # FIXME: we hsould NOT rely on license objects: only use what is in the JSON instead
                if entry['key'] not in licenses:
                    licenses[entry['key']] = entry
                    entry['object'] = get_licenses_db().get(entry['key'])
        if results:
            converted[path] = sorted(results, key=itemgetter('start'))

        # TODO: this is klunky: we need to drop templates entirely or we
        # should rather just pass a the list of files from the scan
        # results and let the template handle this rather than
        # denormalizing the list here??
        converted_infos[path] = OrderedDict()
        for name, value in scanned_file.items():
            if name in (LICENSES, PACKAGES, COPYRIGHTS, EMAILS, URLS):
                continue
            converted_infos[path][name] = value

        if PACKAGES in scanned_file:
            converted_packages[path] = scanned_file[PACKAGES]

        licenses = OrderedDict(sorted(licenses.items()))

    files = {
        'license_copyright': converted,
        'infos': converted_infos,
        'packages': converted_packages
    }

    return template.generate(files=files, licenses=licenses)