def test_new_name_with_extensions(self):
        test_dir = self.get_test_loc("new_name/ext", copy=True)
        renamed = new_name(join(test_dir, "test.txt"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "test_3.txt" == result

        renamed = new_name(join(test_dir, "TEST.txt"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "TEST_3.txt" == result

        renamed = new_name(join(test_dir, "TEST.tXt"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "TEST_3.tXt" == result

        renamed = new_name(join(test_dir, "test.txt"), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "test.txt_2" == result

        renamed = new_name(join(test_dir, "teST.txt"), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "teST.txt_2" == result
Exemple #2
0
    def test_new_name_with_empties(self):
        base_dir = self.get_temp_dir()
        self.assertRaises(AssertionError, new_name, '', is_dir=False)
        test_file = base_dir + '/'
        renamed = new_name(test_file, is_dir=False)
        assert renamed
        assert not exists(renamed)

        test_file = join(base_dir, '.')
        renamed = new_name(test_file, is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert '_' == result

        test_dir = base_dir + '/'

        renamed = new_name(test_dir, is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert result

        test_dir = join(base_dir, '.')
        renamed = new_name(test_dir, is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert '_' == result
    def test_new_name_with_empties(self):
        base_dir = self.get_temp_dir()
        self.assertRaises(AssertionError, new_name, '', is_dir=False)
        test_file = base_dir + '/'
        renamed = new_name(test_file, is_dir=False)
        assert renamed
        assert not exists(renamed)

        test_file = join(base_dir, '.')
        renamed = new_name(test_file, is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert '_' == result

        test_dir = base_dir + '/'

        renamed = new_name(test_dir, is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert result

        test_dir = join(base_dir, '.')
        renamed = new_name(test_dir, is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert '_' == result
    def test_new_name_with_extensions(self):
        test_dir = self.get_test_loc('new_name/ext', copy=True)
        renamed = new_name(join(test_dir, 'test.txt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_3.txt' == result

        renamed = new_name(join(test_dir, 'TEST.txt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_3.txt' == result

        renamed = new_name(join(test_dir, 'TEST.tXt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_3.tXt' == result

        renamed = new_name(join(test_dir, 'test.txt'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test.txt_2' == result

        renamed = new_name(join(test_dir, 'teST.txt'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'teST.txt_2' == result
Exemple #5
0
    def test_new_name_with_extensions(self):
        test_dir = self.get_test_loc('new_name/ext', copy=True)
        renamed = new_name(join(test_dir, 'test.txt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_3.txt' == result

        renamed = new_name(join(test_dir, 'TEST.txt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_3.txt' == result

        renamed = new_name(join(test_dir, 'TEST.tXt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_3.tXt' == result

        renamed = new_name(join(test_dir, 'test.txt'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test.txt_2' == result

        renamed = new_name(join(test_dir, 'teST.txt'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'teST.txt_2' == result
Exemple #6
0
 def test_file_name_on_path_and_location_10(self):
     test_dir = self.get_test_loc('fileutils/basename')
     test_file = 'tst'
     expected_name = 'tst'
     result = fileutils.file_name(test_file)
     assert expected_name == result
     result = fileutils.file_name((os.path.join(test_dir, test_file)))
     assert expected_name == result
def is_metadata_json(location):
    """
    Return True if `location` path is for a Chef metadata.json file.
    The metadata.json is also used in Python installed packages in a 'dist-info'
    directory.
    """
    return (filetype.is_file(location)
            and fileutils.file_name(location).lower() == 'metadata.json'
            and not fileutils.file_name(fileutils.parent_directory(
                location)).lower().endswith('dist-info'))
 def make_locations_relative(self, package_dict):
     """
     Helper to transform absolute locations to a simple file name.
     """
     for key, value in package_dict.items():
         if not value:
             continue
         if key.endswith('location'):
             package_dict[key] = value and fileutils.file_name(value) or None
         if key.endswith('locations'):
             values = [v and fileutils.file_name(v) or None for v in value]
             package_dict[key] = values
     return package_dict
 def make_locations_relative(self, package_dict):
     """
     Helper to transform absolute locations to a simple file name.
     """
     for key, value in package_dict.items():
         if not value:
             continue
         if key.endswith('location'):
             package_dict[key] = value and fileutils.file_name(
                 value) or None
         if key.endswith('locations'):
             values = [v and fileutils.file_name(v) or None for v in value]
             package_dict[key] = values
     return package_dict
Exemple #10
0
def convert_to_utf8(location):
    """
    Convert the file at location to UTF-8 text.
    Return the location of the converted file or None.
    """
    if not get_type(location).is_text:
        return location
    start = open(location, 'rb').read(4096)
    encoding = chardet.detect(start)
    if encoding:
        encoding = encoding.get('encoding', None)
        if encoding:
            target = os.path.join(fileutils.get_temp_dir('markup'),
                                  fileutils.file_name(location))
            with codecs.open(location,
                             'rb',
                             encoding=encoding,
                             errors='replace',
                             buffering=16384) as inf:
                with codecs.open(target, 'wb', encoding='utf-8') as outf:
                    outf.write(inf.read())
            return target
        else:
            # chardet failed somehow to detect an encoding
            return location
Exemple #11
0
 def is_manifest(cls, location):
     """
     Return True if the file at ``location`` is likely a manifest of this type.
     """
     return (filetype.is_file(location)
             and fileutils.file_name(location).lower().endswith(
                 ('.yaml', '.yml')))
Exemple #12
0
def fixed_width_file_name(path, max_length=25):
    """
    Return a fixed width file name of at most `max_length` characters
    extracted from the `path` string and usable for fixed width display.
    If the file_name is longer than `max_length`, it is truncated in the
    middle with using three dots "..." as an ellipsis and the extension
    is kept.

    For example:
    >>> short = fixed_width_file_name('0123456789012345678901234.c')
    >>> assert '0123456789...5678901234.c' == short
    """
    if not path:
        return ''

    # get the path as unicode for display!
    path = path_to_unicode(path)
    filename = fileutils.file_name(path)
    if len(filename) <= max_length:
        return filename
    base_name, extension = fileutils.splitext(filename)
    number_of_dots = 3
    len_extension = len(extension)
    remaining_length = max_length - len_extension - number_of_dots

    if remaining_length < (len_extension + number_of_dots) or remaining_length < 5:
        return ''

    prefix_and_suffix_length = abs(remaining_length // 2)
    prefix = base_name[:prefix_and_suffix_length]
    ellipsis = number_of_dots * '.'
    suffix = base_name[-prefix_and_suffix_length:]
    return '{prefix}{ellipsis}{suffix}{extension}'.format(**locals())
    def test_new_name_without_extensions(self):
        test_dir = self.get_test_loc('new_name/noext', copy=True)
        renamed = new_name(join(test_dir, 'test'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_4' == result

        renamed = new_name(join(test_dir, 'TEST'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_4' == result

        renamed = new_name(join(test_dir, 'test_1'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_1_1' == result
Exemple #14
0
def parse_with_dparse(location):
    is_dir = filetype.is_dir(location)
    if is_dir:
        return
    file_name = fileutils.file_name(location)
    if file_name not in (filetypes.requirements_txt, filetypes.conda_yml,
                         filetypes.tox_ini, filetypes.pipfile,
                         filetypes.pipfile_lock):
        return
    if py2:
        mode = 'rb'
    else:
        mode = 'r'
    with open(location, mode) as f:
        content = f.read()
        df = dparse.parse(content, file_type=file_name)
        df_dependencies = df.dependencies
        if not df_dependencies:
            return
        package_dependencies = []
        for df_dependency in df_dependencies:
            specs = df_dependency.specs
            requirement = None
            if specs:
                requirement = str(specs)
            package_dependencies.append(
                models.DependentPackage(
                    purl=PackageURL(type='pypi',
                                    name=df_dependency.name).to_string(),
                    scope='dependencies',
                    is_runtime=True,
                    is_optional=False,
                    requirement=requirement,
                ))
        return package_dependencies
Exemple #15
0
    def test_new_name_without_extensions(self):
        test_dir = self.get_test_loc('new_name/noext', copy=True)
        renamed = new_name(join(test_dir, 'test'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_4' == result

        renamed = new_name(join(test_dir, 'TEST'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_4' == result

        renamed = new_name(join(test_dir, 'test_1'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_1_1' == result
Exemple #16
0
def get_file_infos(location):
    """
    Return a list of dictionaries of informations collected from the file or
    directory at location.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import sha1, md5
    from typecode import contenttype

    T = contenttype.get_type(location)
    is_file = T.is_file
    is_dir = T.is_dir
    infos = OrderedDict()
    infos['type'] = filetype.get_type(location, short=False)
    infos['name'] = fileutils.file_name(location)
    infos['extension'] = is_file and fileutils.file_extension(location) or ''
    infos['date'] = is_file and filetype.get_last_modified_date(
        location) or None
    infos['size'] = T.size
    infos['sha1'] = is_file and sha1(location) or None
    infos['md5'] = is_file and md5(location) or None
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = is_file and T.is_binary or None
    infos['is_text'] = is_file and T.is_text or None
    infos['is_archive'] = is_file and T.is_archive or None
    infos['is_media'] = is_file and T.is_media or None
    infos['is_source'] = is_file and T.is_source or None
    infos['is_script'] = is_file and T.is_script or None
    return [infos]
def fixed_width_file_name(path, max_length=25):
    """
    Return a fixed width file name of at most `max_length` characters
    extracted from the `path` string and usable for fixed width display.
    If the file_name is longer than `max_length`, it is truncated in the
    middle with using three dots "..." as an ellipsis and the extension
    is kept.

    For example:
    >>> short = fixed_width_file_name('0123456789012345678901234.c')
    >>> assert '0123456789...5678901234.c' == short
    """
    if not path:
        return ''

    filename = fileutils.file_name(path)
    if len(filename) <= max_length:
        return filename
    base_name, extension = fileutils.splitext(filename)
    number_of_dots = 3
    len_extension = len(extension)
    remaining_length = max_length - len_extension - number_of_dots

    if remaining_length < (len_extension +
                           number_of_dots) or remaining_length < 5:
        return ''

    prefix_and_suffix_length = abs(remaining_length // 2)
    prefix = base_name[:prefix_and_suffix_length]
    ellipsis = number_of_dots * '.'
    suffix = base_name[-prefix_and_suffix_length:]
    return "{prefix}{ellipsis}{suffix}{extension}".format(**locals())
    def test_new_name_without_extensions(self):
        test_dir = self.get_test_loc("new_name/noext", copy=True)
        renamed = new_name(join(test_dir, "test"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "test_4" == result

        renamed = new_name(join(test_dir, "TEST"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "TEST_4" == result

        renamed = new_name(join(test_dir, "test_1"), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "test_1_1" == result
Exemple #19
0
def parse(location):
    """
    Return a Package built from parsing a file or directory at 'location'
    """
    if filetype.is_dir(location):
        package = parse_unpackaged_source(location)
        if package:
            parse_dependencies(location, package)
            return package
    else:
        file_name = fileutils.file_name(location)
        parsers = {
            'setup.py': parse_setup_py,
            'requirements.txt': parse_requirements_txt,
            'requirements.in': parse_requirements_txt,
            'Pipfile.lock': parse_pipfile_lock,
            'metadata.json': parse_metadata,
            'PKG-INFO': parse_unpackaged_source,
            '.whl': parse_wheel,
            '.egg': parse_egg_binary,
            '.tar.gz': parse_source_distribution,
            '.zip': parse_source_distribution,
        }
        for name, parser in parsers.items():
            if file_name.endswith(name):
                package = parser(location)
                if package:
                    parent_directory = fileutils.parent_directory(location)
                    parse_dependencies(parent_directory, package)
                    return package
Exemple #20
0
def parse2(location):
    """
    Parse using the pkginfo library according the file types and return package.
    """
    is_dir = filetype.is_dir(location)
    if is_dir:
        parser = parse_unpackaged_source
        package = parser(location)
        if package:
            parse_dependencies(location, package)
            return package
    else:
        file_name = fileutils.file_name(location)
        parsers = {
            'setup.py': parse_unpackaged_source,
            '.whl': parse_wheel,
            '.egg': parse_egg_binary,
            '.tar.gz': parse_source_distribution,
            '.zip': parse_source_distribution,
        }
        for name, parser in parsers.items():
            if file_name.endswith(name):
                package = parser(location)
                if package:
                    parent_directory = fileutils.parent_directory(location)
                    parse_dependencies(parent_directory, package)
                    return package
def get_file_infos(location):
    """
    Return a list of dictionaries of informations collected from the file or
    directory at location.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import sha1, md5
    from typecode import contenttype

    T = contenttype.get_type(location)
    is_file = T.is_file
    is_dir = T.is_dir
    infos = OrderedDict()
    infos['type'] = filetype.get_type(location, short=False)
    infos['name'] = fileutils.file_name(location)
    infos['extension'] = is_file and fileutils.file_extension(location) or ''
    infos['date'] = is_file and filetype.get_last_modified_date(location) or None
    infos['size'] = T.size
    infos['sha1'] = is_file and sha1(location) or None
    infos['md5'] = is_file and md5(location) or None
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = is_file and T.is_binary or None
    infos['is_text'] = is_file and T.is_text or None
    infos['is_archive'] = is_file and T.is_archive or None
    infos['is_media'] = is_file and T.is_media or None
    infos['is_source'] = is_file and T.is_source or None
    infos['is_script'] = is_file and T.is_script or None
    return [infos]
Exemple #22
0
def download_url(url, file_name=None, verify=True, timeout=10):
    """
    Fetch `url` and return the temporary location where the fetched content was
    saved. Use `file_name` if provided or create a new `file_name` base on the last
    url segment. If `verify` is True, SSL certification is performed. Otherwise, no
    verification is done but a warning will be printed.
    `timeout` is the timeout in seconds.
    """
    requests_args = dict(timeout=timeout, verify=verify)
    file_name = file_name or fileutils.file_name(url)

    try:
        response = requests.get(url, **requests_args)
    except (ConnectionError, InvalidSchema) as e:
        logger.error('download_url: Download failed for %(url)r' % locals())
        raise

    status = response.status_code
    if status != 200:
        msg = 'download_url: Download failed for %(url)r with %(status)r' % locals(
        )
        logger.error(msg)
        raise Exception(msg)

    tmp_dir = fileutils.get_temp_dir(prefix='fetch-')
    output_file = os.path.join(tmp_dir, file_name)
    with open(output_file, 'wb') as out:
        out.write(response.content)

    return output_file
Exemple #23
0
def download_url(url, file_name=None, verify=True):
    """
    Return the temporary location of the file fetched at the remote url. Use
    file_name if provided or create a file name base on the last url segment. If
    verify is True, SSL certification is performed. Otherwise, no verification
    is done but a warning will be printed.
    """
    requests_args = dict(timeout=10, verify=verify)
    file_name = file_name or fileutils.file_name(url)

    try:
        response = requests.get(url, **requests_args)
    except (ConnectionError, InvalidSchema) as e:
        logger.error('fetch: Download failed for %(url)r' % locals())
        raise

    status = response.status_code
    if status != 200:
        msg = 'fetch: Download failed for %(url)r with %(status)r' % locals()
        logger.error(msg)
        raise Exception(msg)

    tmp_dir = fileutils.get_temp_dir(base_dir='fetch')
    output_file = os.path.join(tmp_dir, file_name)
    with open(output_file, 'wb') as out:
        out.write(response.content)

    return output_file
def get_source_file_path_references(location):
    """
    Yield unique references to source file paths extracted from DWARF debug symbols
    from the Elf file at `location`.

    If there are errors when processing Elfs, these are returned as well as paths
    prefixed with 'ERROR: '.
    """
    if not os.path.exists(location):
        return
    T = contenttype.get_type(location)
    if not T.is_elf:
        return
    unique_files = set()
    unique_paths = set()
    errors = []
    try:
        with_libdwarf = dwarf.Dwarf(location)
        for path in with_libdwarf.included_source_files:
            if '/' not in path:
                # bare file name
                unique_files.add(path)
            else:
                unique_paths.add(path)

        for path in with_libdwarf.original_source_files:
            if '/' not in path:
                # bare file name
                unique_files.add(path)
            else:
                unique_paths.add(path)

    except Exception as lde:
        msg = str(lde)
        _, m1, m2 = msg.partition('dwarfdump')
        errors.append(''.join([m1, m2]))

    try:
        with_binutils_nm = dwarf2.get_dwarfs(location)
        for entry in with_binutils_nm:
            path = entry.path
            if '/' not in path:
                # bare file name
                unique_files.add(path)
            else:
                unique_paths.add(path)
    except Exception as lde:
        msg = str(lde)
        errors.append(str)

    seen_file_names = set(file_name(p) for p in unique_paths)
    for fn in unique_files:
        if fn not in seen_file_names and fn not in ignores:
            unique_paths.add(fn)

    for error in errors:
        yield 'ERROR: ' + error

    for path in sorted(unique_paths):
        yield path
Exemple #25
0
def is_requirements_file(location):
    """
    Return True if the ``location`` is likely for a pip requirements file.

    For example::
    >>> is_requirements_file('dev-requirements.txt')
    True
    >>> is_requirements_file('requirements.txt')
    True
    >>> is_requirements_file('requirements.in')
    True
    >>> is_requirements_file('requirements.pip')
    True
    >>> is_requirements_file('requirements-dev.txt')
    True
    >>> is_requirements_file('some-requirements-dev.txt')
    True
    >>> is_requirements_file('reqs.txt')
    False
    >>> is_requirements_file('requires.txt')
    True
    """
    filename = fileutils.file_name(location)
    req_files = (
        '*requirements*.txt',
        '*requirements*.pip',
        '*requirements*.in',
        'requires.txt',
    )
    return any(fnmatch.fnmatchcase(filename, rf) for rf in req_files)
def dwarf_source_path(location):
    """
    Collect unique paths to compiled source code found in Elf binaries DWARF
    sections for D2D.
    """
    location = location
    T = contenttype.get_type(location)
    if not (T.is_elf or T.is_stripped_elf):
        return
    seen_paths = set()
    path_file_names = set()
    bare_file_names = set()
    for dpath in chain(get_dwarf1(location), get_dwarf2(location)):
        if dpath in seen_paths:
            continue
        fn = fileutils.file_name(dpath)
        if fn == dpath:
            bare_file_names.add(fn)
            continue
        else:
            path_file_names.add(fn)
        seen_paths.add(dpath)
        yield dpath
    # only yield filename that do not exist as full paths
    for bfn in sorted(bare_file_names):
        if bfn not in path_file_names and bfn not in seen_paths:
            yield bfn
            seen_paths.add(bfn)
def parse_with_dparse(location):
    is_dir = filetype.is_dir(location)
    if is_dir:
        return
    file_name = fileutils.file_name(location)

    dependency_type = get_dependency_type(file_name)

    if dependency_type not in (filetypes.requirements_txt,
                         filetypes.conda_yml,
                         filetypes.tox_ini,
                         filetypes.pipfile,
                         filetypes.pipfile_lock):
        return
    if py2:
        mode = 'rb'
    else:
        mode = 'r'
    with open(location, mode) as f:
        content = f.read()

    df = dparse.parse(content, file_type=dependency_type)
    df_dependencies = df.dependencies

    if not df_dependencies:
        return

    package_dependencies = []
    for df_dependency in df_dependencies:
        specs = list(df_dependency.specs._specs)
        is_resolved = False
        requirement = None
        purl = PackageURL(
            type='pypi',
            name=df_dependency.name
        ).to_string()
        if specs:
            requirement = str(df_dependency.specs)
            for spec in specs:
                operator = spec.operator
                version = spec.version
                if any(operator == element for element in ('==', '===')):
                    is_resolved = True
                    purl = PackageURL(
                        type='pypi',
                        name=df_dependency.name,
                        version=version
                    ).to_string()
        package_dependencies.append(
            models.DependentPackage(
                purl=purl,
                scope='dependencies',
                is_runtime=True,
                is_optional=False,
                is_resolved=is_resolved,
                requirement=requirement
            )
        )

    return package_dependencies
 def is_manifest(cls, location):
     """
     Return True if `location` path is for a Chef metadata.json file.
     The metadata.json is also used in Python installed packages in a 'dist-info'
     directory.
     """
     return (filetype.is_file(location)
         and fileutils.file_name(location).lower() == 'metadata.rb')
Exemple #29
0
 def recognize(cls, location):
     filename = fileutils.file_name(location).lower()
     if filename == 'go.mod':
         gomods = go_mod.parse_gomod(location)
         yield build_gomod_package(gomods)
     elif filename == 'go.sum':
         gosums = go_mod.parse_gosum(location)
         yield build_gosum_package(gosums)
Exemple #30
0
def get_file_infos(location):
    """
    Return a mapping of file information collected from the file or
    directory at `location`.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import multi_checksums
    from typecode import contenttype

    infos = OrderedDict()
    is_file = filetype.is_file(location)
    is_dir = filetype.is_dir(location)

    T = contenttype.get_type(location)

    infos['type'] = filetype.get_type(location, short=False)
    name = fileutils.file_name(location)
    infos['name'] = fileutils.file_name(location)
    if is_file:
        base_name, extension = fileutils.splitext(location)
    else:
        base_name = name
        extension = ''
    infos['base_name'] = base_name
    infos['extension'] = extension
    infos['date'] = is_file and filetype.get_last_modified_date(
        location) or None
    infos['size'] = T.size
    infos.update(multi_checksums(location, (
        'sha1',
        'md5',
    )))
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = bool(is_file and T.is_binary)
    infos['is_text'] = bool(is_file and T.is_text)
    infos['is_archive'] = bool(is_file and T.is_archive)
    infos['is_media'] = bool(is_file and T.is_media)
    infos['is_source'] = bool(is_file and T.is_source)
    infos['is_script'] = bool(is_file and T.is_script)

    return infos
 def is_manifest(cls, location):
     """
     Return True if the file at ``location`` is likely a manifest of this type.
     """
     return (filetype.is_file(location)
             and fileutils.file_name(location).lower() in [
                 'readme.android', 'readme.chromium', 'readme.facebook',
                 'readme.google', 'readme.thirdparty'
             ])
Exemple #32
0
 def is_datafile(cls, location, filetypes=tuple()):
     """
     Return True if `location` path is for a Chef metadata.json file. The
     metadata.json is/was also used in Python legacy wheels in a 'dist-info'
     directory.
     """
     if super().is_datafile(location, filetypes=filetypes):
         parent = fileutils.file_name(fileutils.parent_directory(location))
         return not parent.endswith('dist-info')
Exemple #33
0
def parse(location):
    """
    Return a Package object from a Cargo.toml/Cargo.lock file.
    """
    handlers = {'cargo.toml': build_cargo_toml_package, 'cargo.lock': build_cargo_lock_package}
    filename = filetype.is_file(location) and fileutils.file_name(location).lower()
    handler = handlers.get(filename)
    if handler:
        return handler and handler(toml.load(location, _dict=dict))
def new_name(location, is_dir=False):
    """
    Return a new non-existing location from a `location` usable to write a file
    or create directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the filename.

    The case of the filename is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique filename, this tries new names this way:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    if on_linux:
        location = path_to_bytes(location)
    location = location.rstrip(PATHS_SEPS)
    assert location

    parent = fileutils.parent_directory(location)

    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    filename = fileutils.file_name(location)

    # corner case
    if filename in (DOT, DOT):
        filename = UNDERSCORE

    # if unique, return this
    if filename.lower() not in siblings_lower:
        return os.path.join(parent, filename)

    # otherwise seek a unique name
    if is_dir:
        # directories do not have an "extension"
        base_name = filename
        ext = EMPTY_STRING
    else:
        base_name, dot, ext = filename.partition(DOT)
        if dot:
            ext = dot + ext
        else:
            base_name = filename
            ext = EMPTY_STRING

    # find a unique filename, adding a counter int to the base_name
    counter = 1
    while 1:
        filename = base_name + UNDERSCORE + str(counter) + ext
        if filename.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, filename)
def new_name(location, is_dir=False):
    """
    Return a new non-existing location from a `location` usable to write a file
    or create directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the filename.

    The case of the filename is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique filename, this tries new names this way:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    if on_linux:
        location = fsencode(location)
    location = location.rstrip(PATHS_SEPS)
    assert location

    parent = parent_directory(location)

    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    filename = file_name(location)

    # corner case
    if filename in (DOT, DOT):
        filename = UNDERSCORE

    # if unique, return this
    if filename.lower() not in siblings_lower:
        return join(parent, filename)

    # otherwise seek a unique name
    if is_dir:
        # directories do not have an "extension"
        base_name = filename
        ext = EMPTY_STRING
    else:
        base_name, dot, ext = filename.partition(DOT)
        if dot:
            ext = dot + ext
        else:
            base_name = filename
            ext = EMPTY_STRING

    # find a unique filename, adding a counter int to the base_name
    counter = 1
    while 1:
        filename = base_name + UNDERSCORE + str(counter) + ext
        if filename.lower() not in siblings_lower:
            break
        counter += 1
    return join(parent, filename)
    def is_manifest(cls, location):
        """
        Return True if the file at ``location`` is likely a manifest of this type.

        Sub-classes should override to implement their own manifest recognition.
        """
        if not filetype.is_file(location):
            return

        filename = file_name(location)

        file_patterns = cls.file_patterns
        if any(
                fnmatch.fnmatchcase(filename, metaf)
                for metaf in file_patterns):
            return True

        T = contenttype.get_type(location)
        ftype = T.filetype_file.lower()
        mtype = T.mimetype_file

        _base_name, extension = splitext_name(location, is_file=True)
        extension = extension.lower()

        if TRACE:
            logger_debug(
                'is_manifest: ftype:',
                ftype,
                'mtype:',
                mtype,
                'pygtype:',
                T.filetype_pygment,
                'fname:',
                filename,
                'ext:',
                extension,
            )

        type_matched = False
        if cls.filetypes:
            type_matched = any(t in ftype for t in cls.filetypes)

        mime_matched = False
        if cls.mimetypes:
            mime_matched = any(m in mtype for m in cls.mimetypes)

        extension_matched = False
        extensions = cls.extensions
        if extensions:
            extensions = (e.lower() for e in extensions)
            extension_matched = any(
                fnmatch.fnmatchcase(extension, ext_pat)
                for ext_pat in extensions)

        if type_matched and mime_matched and extension_matched:
            return True
 def test_file_name_on_path_and_location(self):
     test_dir = self.get_test_loc("fileutils/basename", copy=True)
     tests = [
         ("a/.a/file", "file"),
         ("a/.a/", ".a"),
         ("a/b/.a.b", ".a.b"),
         ("a/b/a.tag.gz", "a.tag.gz"),
         ("a/b/", "b"),
         ("a/f.a", "f.a"),
         ("a/", "a"),
         ("f.a/a.c", "a.c"),
         ("f.a/", "f.a"),
         ("tst", "tst"),
     ]
     for test_file, name in tests:
         result = fileutils.file_name(test_file)
         assert name == result
         # also test on location
         result = fileutils.file_name((os.path.join(test_dir, test_file)))
         assert name == result
 def test_file_name_on_path_and_location(self):
     test_dir = self.get_test_loc('fileutils/basename', copy=True)
     tests = [
         ('a/.a/file', 'file'),
         ('a/.a/', '.a'),
         ('a/b/.a.b', '.a.b'),
         ('a/b/a.tag.gz', 'a.tag.gz'),
         ('a/b/', 'b'),
         ('a/f.a', 'f.a'),
         ('a/', 'a'),
         ('f.a/a.c', 'a.c'),
         ('f.a/', 'f.a'),
         ('tst', 'tst'),
     ]
     for test_file, name in tests:
         result = fileutils.file_name(test_file)
         assert name == result
         # also test on location
         result = fileutils.file_name((os.path.join(test_dir, test_file)))
         assert name == result
Exemple #39
0
def parse_dependency_file(location):
    """
    Return a PythonPackage built from a dparse-supported dependency file at
    location.
    """
    if not location:
        return

    dt = get_dparse_dependency_type(fileutils.file_name(location))
    if dt:
        dependent_packages = parse_with_dparse(location)
        return PythonPackage(dependencies=dependent_packages)
 def is_java_source(self):
     """
     FIXME: Check the filetype.
     """
     if self.is_file is True:
         name = fileutils.file_name(self.location)
         if fnmatch.fnmatch(name, "*.java") or fnmatch.fnmatch(name, "*.aj") or fnmatch.fnmatch(name, "*.ajt"):
             return True
         else:
             return False
     else:
         return False
Exemple #41
0
def get_file_infos(location):
    """
    Return a mapping of file information collected from the file or
    directory at `location`.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import multi_checksums
    from typecode import contenttype

    if on_linux:
        location = path_to_bytes(location)
    else:
        location = path_to_unicode(location)

    infos = OrderedDict()
    is_file = filetype.is_file(location)
    is_dir = filetype.is_dir(location)

    T = contenttype.get_type(location)

    infos['type'] = filetype.get_type(location, short=False)
    name = fileutils.file_name(location)
    if is_file:
        base_name, extension = fileutils.splitext(location)
    else:
        base_name = name
        extension = ''

    if on_linux:
        infos['name'] = path_to_unicode(name)
        infos['base_name'] = path_to_unicode(base_name)
        infos['extension'] = path_to_unicode(extension)
    else:
        infos['name'] = name
        infos['base_name'] = base_name
        infos['extension'] = extension

    infos['date'] = is_file and filetype.get_last_modified_date(location) or None
    infos['size'] = T.size
    infos.update(multi_checksums(location, ('sha1', 'md5',)))
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = bool(is_file and T.is_binary)
    infos['is_text'] = bool(is_file and T.is_text)
    infos['is_archive'] = bool(is_file and T.is_archive)
    infos['is_media'] = bool(is_file and T.is_media)
    infos['is_source'] = bool(is_file and T.is_source)
    infos['is_script'] = bool(is_file and T.is_script)

    return infos
 def is_java_class(self):
     """
     FIXME: Check the filetype.
     """
     if self.is_file is True:
         name = fileutils.file_name(self.location)
         if fnmatch.fnmatch(name, "*?.class"):
             return True
         else:
             return False
     else:
         return False
    def recognize(cls, location):
        if not cls.is_manifest(location):
            return

        # Thanks to Starlark being a Python dialect, we can use the `ast`
        # library to parse it
        with open(location, 'rb') as f:
            tree = ast.parse(f.read())

        build_rules = defaultdict(list)
        for statement in tree.body:
            # We only care about function calls or assignments to functions whose
            # names ends with one of the strings in `rule_types`
            if (isinstance(statement, ast.Expr)
                    or isinstance(statement, ast.Call)
                    or isinstance(statement, ast.Assign)
                    and isinstance(statement.value, ast.Call)
                    and isinstance(statement.value.func, ast.Name)):
                rule_name = statement.value.func.id
                # Ensure that we are only creating packages from the proper
                # build rules
                if not check_rule_name_ending(rule_name):
                    continue
                # Process the rule arguments
                args = {}
                for kw in statement.value.keywords:
                    arg_name = kw.arg
                    if isinstance(kw.value, ast.Str):
                        args[arg_name] = kw.value.s
                    if isinstance(kw.value, ast.List):
                        # We collect the elements of a list if the element is not a function call
                        args[arg_name] = [
                            elt.s for elt in kw.value.elts
                            if not isinstance(elt, ast.Call)
                        ]
                if args:
                    build_rules[rule_name].append(args)

        if build_rules:
            for rule_name, rule_instances_args in build_rules.items():
                for args in rule_instances_args:
                    name = args.get('name')
                    if not name:
                        continue
                    license_files = args.get('licenses')
                    yield cls(name=name,
                              declared_license=license_files,
                              root_path=fileutils.parent_directory(location))
        else:
            # If we don't find anything in the manifest file, we yield a Package with
            # the parent directory as the name
            yield cls(
                name=fileutils.file_name(fileutils.parent_directory(location)))
Exemple #44
0
def get_relative_path(path, len_base_path, base_is_dir):
    """
    Return a posix relative path from the posix 'path' relative to a
    base path of `len_base_path` length where the base is a directory if
    `base_is_dir` True or a file otherwise.
    """
    path = path_to_unicode(path)
    if base_is_dir:
        rel_path = path[len_base_path:]
    else:
        rel_path = fileutils.file_name(path)

    return rel_path.lstrip('/')
 def extract_event(item):
     """
     Display an extract event.
     """
     if not item:
         return ''
     if verbose:
         if item.done:
             return ''
         line = utils.get_relative_path(original_input, abs_input, as_posixpath(item.source)) or ''
     else:
         line = fileutils.file_name(item.source) or ''
     return 'Extracting: %(line)s' % locals()
    def __init__(self, data_file=None, test_file=None):
        self.data_file = data_file
        self.test_file = test_file
        if self.test_file:
            self.test_file_name = fileutils.file_name(test_file)

        if self.data_file:
            with codecs.open(data_file, mode='rb', encoding='utf-8') as df:
                data = saneyaml.load(df.read())

        self.licenses = data.get('licenses', [])
        self.notes = data.get('notes')
        self.sort = data.get('sort')
        self.expected_failure = data.get('expected_failure', False)
def parse(location):
    """
    Return a Package object from a composer.json file or None.
    """
    if not is_phpcomposer_json(location):
        return

    with codecs.open(location, encoding='utf-8') as loc:
        package_data = json.load(loc, object_pairs_hook=OrderedDict)

    base_dir = fileutils.parent_directory(location)
    metafile_name = fileutils.file_name(location)

    return build_package(package_data, base_dir, metafile_name)
    def test_new_name_with_empties(self):
        test_dir = self.get_temp_dir()
        self.assertRaises(AssertionError, new_name, "", is_dir=False)

        renamed = new_name(join(test_dir, "/"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "file" == result

        renamed = new_name(join(test_dir, "."), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "file" == result

        renamed = new_name(join(test_dir, "/"), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "file" == result

        renamed = new_name(join(test_dir, "."), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "file" == result
    def __init__(self, data_file=None, test_file=None):
        self.data_file = data_file
        self.test_file = test_file
        if self.test_file:
            self.test_file_name = fileutils.file_name(test_file)

        if self.data_file:
            with codecs.open(data_file, mode="rb", encoding="utf-8") as df:
                data = saneyaml.load(df.read())

        self.licenses = data.get("licenses", [])
        # TODO: this is for future support of license expressions
        self.license = data.get("license", None)
        self.notes = data.get("notes")
        self.expected_failure = data.get("expected_failure", False)
Exemple #50
0
def is_patch(location, include_extracted=False):
    """
    Test if a file is a possible patch file. May return True for some files
    that are not patches. Extracted patch files are ignored by default.
    """
    T = typecode.contenttype.get_type(location)
    file_name = fileutils.file_name(location)
    patch_like = "diff " in T.filetype_file.lower() or ".diff" in file_name or ".patch" in file_name

    if not patch_like:
        return False

    if extractcode.is_extraction_path(file_name) and not include_extracted:
        return False

    return True
def new_name(location, is_dir=False):
    """
    Return a new non-existing location usable to write a file or create
    directory without overwriting existing files or directories in the same
    parent directory, ignoring the case of the name.
    The case of the name is ignored to ensure that similar results are returned
    across case sensitive (*nix) and case insensitive file systems.

    To find a new unique name:
     * pad a directory name with _X where X is an incremented number.
     * pad a file base name with _X where X is an incremented number and keep
       the extension unchanged.
    """
    assert location
    
    location = location.rstrip('\\/')
    name = fileutils.file_name(location).strip()
    if (not name or name == '.' 
        # windows bare drive path as in c: or z:
        or (name and len(name)==2 and name.endswith(':'))):
        name = 'file'

    parent = fileutils.parent_directory(location)
    # all existing files or directory as lower case
    siblings_lower = set(s.lower() for s in os.listdir(parent))

    if name.lower() not in siblings_lower:
        return posixpath.join(parent, name)

    ext = fileutils.file_extension(name)
    base_name = fileutils.file_base_name(name)
    if is_dir:
        # directories have no extension
        ext = ''
        base_name = name

    counter = 1
    while True:
        new_name = base_name + '_' + str(counter) + ext
        if new_name.lower() not in siblings_lower:
            break
        counter += 1
    return os.path.join(parent, new_name)
Exemple #52
0
def _get_root_dir(input_path, strip_root=False, full_root=False):
    """
    Return a root dir name or None.
    On Windows, the path uses POSIX (forward slash) separators.
    """
    if strip_root:
        return

    scanned_path = os.path.abspath(os.path.normpath(os.path.expanduser(input_path)))
    scanned_path = fileutils.as_posixpath(scanned_path)
    if filetype.is_dir(scanned_path):
        root_dir = scanned_path
    else:
        root_dir = fileutils.parent_directory(scanned_path)
        root_dir = fileutils.as_posixpath(root_dir)

    if full_root:
        return root_dir
    else:
        return fileutils.file_name(root_dir)
 def extract_event(item):
     """
     Display an extract event.
     """
     if quiet:
         return ''
     if not item:
         return ''
     source = item.source
     if not isinstance(source, unicode):
         source = toascii(source, translit=True).decode('utf-8', 'replace')
     if verbose:
         if item.done:
             return ''
         line = source and utils.get_relative_path(path=source, len_base_path=len_base_path, base_is_dir=base_is_dir) or ''
     else:
         line = source and fileutils.file_name(source) or ''
     if not isinstance(line, unicode):
         line = toascii(line, translit=True).decode('utf-8', 'replace')
     return 'Extracting: %(line)s' % locals()
Exemple #54
0
    def recon(self, location):
        for f in  os.listdir(location):
            loc = join(location, f)
            if not filetype.is_file(loc):
                continue
            # a pom is an xml doc
            if not is_pom(location):
                continue

            if f == 'pom.xml':
                # first case: a maven pom.xml inside a META-INF directory
                # such as in META-INF/maven/log4j/log4j/pom.xml
                # the directory tree has a fixed depth
                # as is: META-INF/maven/groupid/artifactid/pom.xml
                # this will typically be inside a binary jar, so we should find
                # a typical structure above
                try:
                    gggp = dirname(dirname(dirname(dirname(loc))))
                    if fileutils.file_name(gggp) == 'META-INF':
                        # recon here: the root of the component is the parent of
                        # META-INF, return that, with a type and the POM
                        # metafile to parse.
                        pass
                except:
                    pass

                # second case: a maven pom.xml at the root of component
                # development tree we should find a few extra clues in the
                # conventional directory structure below for now we take this as
                # being the component root. return that, with a type and the POM
                # metafile to parse.

                pass
            elif f.endswith('.pom'):
                # first case: a maven repo layout
                # the jars are side-by-side with the pom
                # check if there are side-by-side artifacts
                jar = loc.replace('.pom', '.jar')
                if os.path.exists(jar):
                # return that, with a type and the POM metafile to parse.
                    pass
Exemple #55
0
def convert_to_utf8(location):
    """
    Convert the file at location to UTF-8 text.
    Return the location of the converted file or None.
    """
    if not contenttype.get_type(location).is_text:
        return location
    start = open(location, 'rb').read(4096)
    encoding = chardet.detect(start)
    if encoding:
        encoding = encoding.get('encoding', None)
        if encoding:
            target = os.path.join(fileutils.get_temp_dir('markup'),
                                  fileutils.file_name(location))
            with codecs.open(location, 'rb', encoding=encoding,
                             errors='replace', buffering=16384) as inf:
                with codecs.open(target, 'wb', encoding='utf-8') as outf:
                    outf.write(inf.read())
            return target
        else:
            # chardet failed somehow to detect an encoding
            return location
Exemple #56
0
def get_tokens(location, template, use_cache=False):
    """
    Return a list of tokens from a from a file at location using the tokenizer
    function.
    """
    location = os.path.abspath(location)
    if not exists(location):
        raise RuleWithNoTokenError('Rule text location does not exist: %(location)r' % locals())
#        return []

    file_name = fileutils.file_name(location)
    cached_tokens = os.path.join(cache_dir, file_name)
    if use_cache and os.path.exists(cached_tokens):
        # TODO: improve cache check
        tokens = list(load_tokens(cached_tokens))
    else:
        tokenizr = template and template_tknzr or text_tknzr
        lines = analysis.unicode_text_lines(location)
        tokens = list(tokenizr(lines))
        if use_cache:
            dump_tokens(cached_tokens, tokens)
    return tokens
    def __init__(self, data_file=None, test_file=None):
        self.data_file = data_file
        self.test_file = test_file
        if self.test_file:
            self.test_file_name = fileutils.file_name(test_file)

        if self.data_file:
            with codecs.open(data_file, mode='rb', encoding='utf-8') as df:
                data = saneyaml.load(df.read())

        self.licenses = data.get('licenses', [])

        # TODO: this is for future support of license expressions
        self.license = data.get('license')
        self.license_choice = data.get('license_choice')

        self.notes = data.get('notes')

        # True if the test is expected to fail
        self.expected_failure = data.get('expected_failure', False)

        # True if the test should be skipped
        self.skip = data.get('skip', False)
Exemple #58
0
def as_template(scan_data, template='html'):
    """
    Return an string built from a list of results and the provided template.
    The template defaults to the standard HTML template format or can point to
    the path of a custom template file.
    """
    from licensedcode.models import get_license

    if template == 'html':
        template = get_template(get_template_dir('html'))
    else:
        # load a custom template
        tpath = fileutils.as_posixpath(abspath(expanduser(template)))
        assert isfile(tpath)
        tdir = fileutils.parent_directory(tpath)
        tfile = fileutils.file_name(tpath)
        template = get_template(tdir, tfile)

    converted = OrderedDict()
    converted_infos = OrderedDict()
    converted_packages = OrderedDict()
    licenses = {}

    # Create a flattened data dict keyed by location
    for scan_result in scan_data:
        location = scan_result['location']
        results = []
        if 'copyrights' in scan_result:
            for entry in scan_result['copyrights']:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'copyright',
                    # NOTE: we display one statement per line.
                    'value': '\n'.join(entry['statements']),
                })
        if 'licenses' in scan_result:
            for entry in scan_result['licenses']:
                results.append({
                    'start': entry['start_line'],
                    'end': entry['end_line'],
                    'what': 'license',
                    'value': entry['key'],
                })

                if entry['key'] not in licenses:
                    licenses[entry['key']] = entry
                    entry['object'] = get_license(entry['key'])
        if results:
            converted[location] = sorted(results, key=itemgetter('start'))

        if 'infos' in scan_result:
            converted_infos[location] = scan_result['infos']

        if 'packages' in scan_result:
            converted_packages[location] = scan_result['packages']

        licenses = OrderedDict(sorted(licenses.items()))

    results = {
        'license_copyright': converted,
        'infos': converted_infos,
        'packages': converted_packages
    }

    return template.render(results=results, licenses=licenses)