def test_parent_directory_on_path_and_location_10(self): test_dir = self.get_test_loc('fileutils/basename') test_file = 'tst' expected_name = '/' result = fileutils.parent_directory(test_file) result = fileutils.as_posixpath(result) assert expected_name == result result = fileutils.parent_directory((os.path.join(test_dir, test_file))) result = fileutils.as_posixpath(result) assert result.endswith(expected_name)
def recognize(cls, location): if not cls.is_manifest(location): return # Thanks to Starlark being a Python dialect, we can use the `ast` # library to parse it with open(location, 'rb') as f: tree = ast.parse(f.read()) build_rules = defaultdict(list) for statement in tree.body: # We only care about function calls or assignments to functions whose # names ends with one of the strings in `rule_types` if (isinstance(statement, ast.Expr) or isinstance(statement, ast.Call) or isinstance(statement, ast.Assign) and isinstance(statement.value, ast.Call) and isinstance(statement.value.func, ast.Name)): rule_name = statement.value.func.id # Ensure that we are only creating packages from the proper # build rules if not check_rule_name_ending(rule_name): continue # Process the rule arguments args = {} for kw in statement.value.keywords: arg_name = kw.arg if isinstance(kw.value, ast.Str): args[arg_name] = kw.value.s if isinstance(kw.value, ast.List): # We collect the elements of a list if the element is not a function call args[arg_name] = [ elt.s for elt in kw.value.elts if not isinstance(elt, ast.Call) ] if args: build_rules[rule_name].append(args) if build_rules: for rule_name, rule_instances_args in build_rules.items(): for args in rule_instances_args: name = args.get('name') if not name: continue license_files = args.get('licenses') yield cls(name=name, declared_license=license_files, root_path=fileutils.parent_directory(location)) else: # If we don't find anything in the manifest file, we yield a Package with # the parent directory as the name yield cls( name=fileutils.file_name(fileutils.parent_directory(location)))
def get_maven_pom(location=None, text=None, check_is_pom=False, extra_properties=None): """ Return a MavenPom object from a POM file at `location` or provided as a `text` string. """ if location and check_is_pom and not is_pom(location): return pom = MavenPom(location, text) if not extra_properties: extra_properties = {} # do we have a pom.properties file side-by-side? if location and os.path.exists(location): parent = fileutils.parent_directory(location) pom_properties = os.path.join(parent, 'pom.properties') if os.path.exists(pom_properties): with open(pom_properties) as props: properties = javaproperties.load(props) or {} if TRACE: logger.debug('_get_mavenpom: properties: {}'.format(repr(properties))) extra_properties.update(properties) pom.resolve(**extra_properties) # TODO: we cannot do much without these?? if check_is_pom and not has_basic_pom_attributes(pom): if TRACE: logger.debug('_get_mavenpom: has_basic_pom_attributes: {}'.format( has_basic_pom_attributes(pom))) return return pom
def check_files(test_dir, expected): """ Walk test_dir. Check that all dirs are readable. Check that all files are: * non-special, * readable, * have a posix path that ends with one of the expected tuple paths. """ result = [] locs = [] if filetype.is_file(test_dir): test_dir = fileutils.parent_directory(test_dir) test_dir_path = fileutils.as_posixpath(test_dir) for top, _, files in os.walk(test_dir): for f in files: location = os.path.join(top, f) locs.append(location) path = fileutils.as_posixpath(location) path = path.replace(test_dir_path, '').strip('/') result.append(path) assert sorted(expected) == sorted(result) for location in locs: assert filetype.is_file(location) assert not filetype.is_special(location) assert filetype.is_readable(location)
def parse2(location): """ Parse using the pkginfo library according the file types and return package. """ is_dir = filetype.is_dir(location) if is_dir: parser = parse_unpackaged_source package = parser(location) if package: parse_dependencies(location, package) return package else: file_name = fileutils.file_name(location) parsers = { 'setup.py': parse_unpackaged_source, '.whl': parse_wheel, '.egg': parse_egg_binary, '.tar.gz': parse_source_distribution, '.zip': parse_source_distribution, } for name, parser in parsers.items(): if file_name.endswith(name): package = parser(location) if package: parent_directory = fileutils.parent_directory(location) parse_dependencies(parent_directory, package) return package
def get_maven_pom(location=None): """ Return a MavenPom object from a POM file at `location` or provided as a `text` string. """ pom = MavenPom(location=location) extra_properties = {} # do we have a pom.properties file side-by-side? # FIXME: we should treat pom.properties as a datafile if location and os.path.exists(location): parent = fileutils.parent_directory(location) pom_properties = os.path.join(parent, 'pom.properties') if os.path.exists(pom_properties): with open(pom_properties) as props: properties = javaproperties.load(props) or {} if TRACE: logger.debug(f'get_maven_pom: properties: {properties!r}') extra_properties.update(properties) pom.resolve(**extra_properties) # TODO: we cannot do much without these?? hbpa = has_basic_pom_attributes(pom) if not hbpa: if TRACE: logger.debug(f'get_maven_pom: has_basic_pom_attributes: {hbpa}') return return pom
def parse(location): """ Return a Package built from parsing a file or directory at 'location' """ if filetype.is_dir(location): package = parse_unpackaged_source(location) if package: parse_dependencies(location, package) return package else: file_name = fileutils.file_name(location) parsers = { 'setup.py': parse_setup_py, 'requirements.txt': parse_requirements_txt, 'requirements.in': parse_requirements_txt, 'Pipfile.lock': parse_pipfile_lock, 'metadata.json': parse_metadata, 'PKG-INFO': parse_unpackaged_source, '.whl': parse_wheel, '.egg': parse_egg_binary, '.tar.gz': parse_source_distribution, '.zip': parse_source_distribution, } for name, parser in parsers.items(): if file_name.endswith(name): package = parser(location) if package: parent_directory = fileutils.parent_directory(location) parse_dependencies(parent_directory, package) return package
def test_fileutils_walk_can_walk_a_single_file(self): test_file = self.get_test_loc('fileutils/walk/f') result = list(fileutils.walk(test_file)) expected = [ (fileutils.parent_directory(test_file), [], ['f']) ] assert expected == result
def test_extract_file_function(self): test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True) base = fileutils.parent_directory(test_file) expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt'] cleaned_test_file = test_file.replace(base, '') expected_event = [ extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=False, warnings=[], errors=[]), extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=True, warnings=[], errors=[]) ] target = extractcode.get_extraction_path(test_file) result = list(extract.extract_file(test_file, target)) result = [ r._replace( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file)) for r in result ] assert expected_event == result check_files(target, expected)
def test_extract_file_function(self): test_file = self.get_test_loc('extract/basic_non_nested.tar.gz', copy=True) base = fileutils.parent_directory(test_file) expected = ['a/b/a.txt', 'a/b/b.txt', 'a/c/c.txt'] cleaned_test_file = test_file.replace(base, '') expected_event = [ extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=False, warnings=[], errors=[] ), extract.ExtractEvent( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file), done=True, warnings=[], errors=[] ) ] target = extractcode.get_extraction_path(test_file) result = list(extract.extract_file(test_file, target)) result = [r._replace( source=cleaned_test_file, target=extractcode.get_extraction_path(cleaned_test_file)) for r in result] assert expected_event == result check_files(target, expected)
def parse(location): """ Return a Package object from a composer.json file or None. """ if not is_phpcomposer_json(location): return # mapping of top level composer.json items to the Package object field name plain_fields = OrderedDict([ ('name', 'name'), ('description', 'summary'), ('keywords', 'keywords'), ('version', 'version'), ('homepage', 'homepage_url'), ]) # mapping of top level composer.json items to a function accepting as arguments # the composer.json element value and returning an iterable of key, values Package Object to update field_mappers = OrderedDict([ ('authors', author_mapper), ('license', licensing_mapper), ('require', dependencies_mapper), ('require-dev', dev_dependencies_mapper), ('repositories', repository_mapper), ('support', support_mapper), ]) with codecs.open(location, encoding='utf-8') as loc: data = json.load(loc, object_pairs_hook=OrderedDict) if not data.get('name') or not data.get('description'): # a composer.json without name and description is not a usable PHP composer package # name and description fields are required: https://getcomposer.org/doc/04-schema.md#name return package = PHPComposerPackage() # a composer.json is at the root of a PHP composer package base_dir = fileutils.parent_directory(location) package.location = base_dir package.metafile_locations = [location] for source, target in plain_fields.items(): value = data.get(source) if value: if isinstance(value, basestring): value = value.strip() if value: setattr(package, target, value) for source, func in field_mappers.items(): logger.debug('parse: %(source)r, %(func)r' % locals()) value = data.get(source) if value: if isinstance(value, basestring): value = value.strip() if value: func(value, package) vendor_mapper(package) # Parse vendor from name value return package
def is_datafile(cls, location, filetypes=tuple()): """ Return True if `location` path is for a Chef metadata.json file. The metadata.json is/was also used in Python legacy wheels in a 'dist-info' directory. """ if super().is_datafile(location, filetypes=filetypes): parent = fileutils.file_name(fileutils.parent_directory(location)) return not parent.endswith('dist-info')
def new_name(location, is_dir=False): """ Return a new non-existing location from a `location` usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the filename. The case of the filename is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique filename, this tries new names this way: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location if on_linux: location = path_to_bytes(location) location = location.rstrip(PATHS_SEPS) assert location parent = fileutils.parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) filename = fileutils.file_name(location) # corner case if filename in (DOT, DOT): filename = UNDERSCORE # if unique, return this if filename.lower() not in siblings_lower: return os.path.join(parent, filename) # otherwise seek a unique name if is_dir: # directories do not have an "extension" base_name = filename ext = EMPTY_STRING else: base_name, dot, ext = filename.partition(DOT) if dot: ext = dot + ext else: base_name = filename ext = EMPTY_STRING # find a unique filename, adding a counter int to the base_name counter = 1 while 1: filename = base_name + UNDERSCORE + str(counter) + ext if filename.lower() not in siblings_lower: break counter += 1 return os.path.join(parent, filename)
def new_name(location, is_dir=False): """ Return a new non-existing location from a `location` usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the filename. The case of the filename is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique filename, this tries new names this way: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location if on_linux: location = fsencode(location) location = location.rstrip(PATHS_SEPS) assert location parent = parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) filename = file_name(location) # corner case if filename in (DOT, DOT): filename = UNDERSCORE # if unique, return this if filename.lower() not in siblings_lower: return join(parent, filename) # otherwise seek a unique name if is_dir: # directories do not have an "extension" base_name = filename ext = EMPTY_STRING else: base_name, dot, ext = filename.partition(DOT) if dot: ext = dot + ext else: base_name = filename ext = EMPTY_STRING # find a unique filename, adding a counter int to the base_name counter = 1 while 1: filename = base_name + UNDERSCORE + str(counter) + ext if filename.lower() not in siblings_lower: break counter += 1 return join(parent, filename)
def is_metadata_json(location): """ Return True if `location` path is for a Chef metadata.json file. The metadata.json is also used in Python installed packages in a 'dist-info' directory. """ return (filetype.is_file(location) and fileutils.file_name(location).lower() == 'metadata.json' and not fileutils.file_name(fileutils.parent_directory( location)).lower().endswith('dist-info'))
def test_parent_directory_on_path_and_location(self): test_dir = self.get_test_loc("fileutils/basename", copy=True) tests = [ ("a/.a/file", "a/.a/"), ("a/.a/", "a/"), ("a/b/.a.b", "a/b/"), ("a/b/a.tag.gz", "a/b/"), ("a/b/", "a/"), ("a/f.a", "a/"), ("a/", "/"), ("f.a/a.c", "f.a/"), ("f.a/", "/"), ("tst", "/"), ] for test_file, name in tests: result = fileutils.parent_directory(test_file) assert name == result # also test on location result = fileutils.parent_directory((os.path.join(test_dir, test_file))) assert result.endswith(name)
def test_paths_are_posix_paths_in_html_app_format_output(monkeypatch): monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True) test_dir = test_env.get_test_loc('posix_path', copy=True) runner = CliRunner() result_file = test_env.get_temp_file(extension='html', file_name='test_html') result = runner.invoke(cli.scancode, [ '--copyright', '--format', 'html-app', test_dir, result_file]) assert result.exit_code == 0 assert 'Scanning done' in result.output # the data we want to test is in the data.json file data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.json') assert '/posix_path/copyright_acme_c-c.c' in open(data_file).read()
def check_files(test_dir, expected, regen=False): """ Walk test_dir. Check that all dirs are readable. Check that all files are: * non-special, * readable, * have a posix path that ends with one of the expected tuple paths. """ result = [] locs = [] if filetype.is_file(test_dir): test_dir = fileutils.parent_directory(test_dir) test_dir_path = fileutils.as_posixpath(test_dir) for top, _, files in os.walk(test_dir): for f in files: location = os.path.join(top, f) locs.append(location) path = fileutils.as_posixpath(location) path = path.replace(test_dir_path, '').strip('/') result.append(path) expected_is_json_file = False if not isinstance(expected, (list, tuple)) and expected.endswith('.json'): expected_is_json_file = True # this is a path to a JSON file if regen: wmode = 'wb' if py2 else 'w' with open(expected, wmode) as ex: json.dump(result, ex, indent=2, separators=(',', ':')) expected_content = result else: with open(expected, 'rb') as ex: expected_content = json.load(ex, encoding='utf-8', object_pairs_hook=OrderedDict) else: expected_content = expected expected_content = sorted(expected_content) result = sorted(result) try: assert expected_content == result except AssertionError: files = [ 'test_dir: file://{}'.format(test_dir), 'expected: file://{}'.format(expected if expected_is_json_file else ''), ] assert files + expected_content == result for location in locs: assert filetype.is_file(location) assert not filetype.is_special(location) assert filetype.is_readable(location)
def test_paths_are_posix_paths_in_html_app_format_output(): test_dir = test_env.get_test_loc('templated/simple') result_file = test_env.get_temp_file(extension='html', file_name='test_html') result = run_scan_click(['--copyright', '--format', 'html-app', test_dir, result_file]) assert result.exit_code == 0 assert 'Scanning done' in result.output # the data we want to test is in the data.json file data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.json') assert '/copyright_acme_c-c.c' in open(data_file).read()
def test_parent_directory_on_path_and_location(self): test_dir = self.get_test_loc('fileutils/basename', copy=True) tests = [ ('a/.a/file', 'a/.a/'), ('a/.a/', 'a/'), ('a/b/.a.b', 'a/b/'), ('a/b/a.tag.gz', 'a/b/'), ('a/b/', 'a/'), ('a/f.a', 'a/'), ('a/', '/'), ('f.a/a.c', 'f.a/'), ('f.a/', '/'), ('tst', '/'), ] for test_file, name in tests: result = fileutils.parent_directory(test_file) assert name == result # also test on location result = fileutils.parent_directory( (os.path.join(test_dir, test_file))) assert result.endswith(name)
def parse_metadata(location): """ Return a Package object from the Python wheel 'metadata.json' file at 'location' or None. Check if the parent directory of 'location' contains both a 'METADATA' and a 'DESCRIPTION.rst' file to ensure this is a proper metadata.json file. """ if not location or not location.endswith('metadata.json'): if TRACE: logger_debug('parse_metadata: not metadata.json:', location) return parent_dir = fileutils.parent_directory(location) # FIXME: is the absence of these two files a show stopper? paths = [ os.path.join(parent_dir, n) for n in ('METADATA', 'DESCRIPTION.rst') ] if not all(os.path.exists(p) for p in paths): if TRACE: logger_debug('parse_metadata: not extra paths', paths) return with open(location, 'rb') as infs: infos = json.load(infs) extensions = infos.get('extensions') if TRACE: logger_debug('parse_metadata: extensions:', extensions) details = extensions and extensions.get('python.details') urls = details and details.get('project_urls') homepage_url = urls and urls.get('Home') parties = [] if TRACE: logger_debug('parse_metadata: contacts:', details.get('contacts')) contacts = details and details.get('contacts') or [] for contact in contacts: if TRACE: logger_debug('parse_metadata: contact:', contact) name = contact and contact.get('name') if not name: if TRACE: logger_debug('parse_metadata: no name:', contact) continue parties.append( models.Party(type=models.party_person, name=name, role='contact')) description = build_description(infos.get('summary'), infos.get('description')) package = PythonPackage( name=infos.get('name'), version=infos.get('version'), description=description or None, declared_license=infos.get('license') or None, homepage_url=homepage_url or None, parties=parties, ) return package
def build_package_name(input_path): """ Return a package name built from an ``input_path`` path. """ if input_path: absinput = absinput = os.path.abspath(input_path) if os.path.isfile(absinput): input_path = parent_directory(absinput) return python_safe_name(file_name(input_path)) return 'scancode-toolkit-analyzed-package'
def test_paths_are_posix_paths_in_html_app_format_output(): test_dir = test_env.get_test_loc('templated/simple') result_file = test_env.get_temp_file(extension='html', file_name='test_html') run_scan_click(['--copyright', test_dir, '--html-app', result_file]) # the data we want to test is in the data.js file data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.js') with io.open(data_file, encoding='utf-8') as res: results = res.read() assert '/copyright_acme_c-c.c' in results results = open(result_file).read() assert __version__ in results
def parse(location): """ Return a Package object from a composer.json file or None. """ if not is_phpcomposer_json(location): return with codecs.open(location, encoding='utf-8') as loc: package_data = json.load(loc, object_pairs_hook=OrderedDict) base_dir = fileutils.parent_directory(location) metafile_name = fileutils.file_name(location) return build_package(package_data, base_dir, metafile_name)
def parse(cls, location): with open(location, encoding='utf-8') as loc: readme_manifest = loc.read() package_data = build_package(readme_manifest) if not package_data.name: # If no name was detected for the Package, then we use the basename # of the parent directory as the Package name parent_dir = fileutils.parent_directory(location) parent_dir_basename = fileutils.file_base_name(parent_dir) package_data.name = parent_dir_basename yield package_data
def test_paths_are_posix_paths_in_html_app_format_output(): test_dir = test_env.get_test_loc('posix_path') result_file = test_env.get_temp_file(extension='html', file_name='test_html') result = run_scan_click( ['--copyright', '--format', 'html-app', test_dir, result_file]) assert result.exit_code == 0 assert 'Scanning done' in result.output # the data we want to test is in the data.json file data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.json') assert 'copyright_acme_c-c.c' in open(data_file).read()
def populate(self, base_dir): """ Collect the `base_dir` for image repositories. """ for fil in fileutils.file_iter(base_dir): # FIXME: we are only looking at V11 repos for now. fn = fileutils.file_name(fil) if not fn == MANIFEST_JSON_FILE: continue rd = parent_directory(fil) repo = Repository() repo.load_manifest(rd) logger_debug('populate: path: %(fn)r' % locals()) self.repositories[rd] = repo
def recognize(cls, location): if not cls.is_manifest(location): return # we use the parent directory as a name name = fileutils.file_name(fileutils.parent_directory(location)) # we could use checksums as version in the future version = None # there is an optional array of license file names in targets that we could use # declared_license = None # there is are dependencies we could use # dependencies = [] yield cls(name=name, version=version)
def get_template(location): """ Return a Jinja template object loaded from the file at `location`. """ from jinja2 import Environment, FileSystemLoader location = as_posixpath(abspath(expanduser(location))) assert isfile(location) template_dir = parent_directory(location) env = Environment(loader=FileSystemLoader(template_dir)) template_name = file_name(location) return env.get_template(template_name)
def test_paths_are_posix_paths_in_html_app_format_output(monkeypatch): monkeypatch.setattr(click._termui_impl, 'isatty', lambda _: True) test_dir = test_env.get_test_loc('posix_path', copy=True) runner = CliRunner() result_file = test_env.get_temp_file(extension='html', file_name='test_html') result = runner.invoke( cli.scancode, ['--copyright', '--format', 'html-app', test_dir, result_file]) assert result.exit_code == 0 assert 'Scanning done' in result.output # the data we want to test is in the data.json file data_file = os.path.join(fileutils.parent_directory(result_file), 'test_html_files', 'data.json') assert '/posix_path/copyright_acme_c-c.c' in open(data_file).read()
def _get_root_dir(input_path, strip_root=False): """ Return a root dir name or None. """ if strip_root: root_dir = None else: _scanned_path = os.path.abspath(os.path.normpath(os.path.expanduser(input_path))) if filetype.is_dir(_scanned_path): root_dir = _scanned_path else: root_dir = fileutils.parent_directory(_scanned_path) root_dir = fileutils.file_name(root_dir) return root_dir
def get_description(metainfo, location=None): """ Return a list of keywords found in a ``metainfo`` object or mapping. """ description = None # newer metadata versions use the payload for the description if hasattr(metainfo, 'get_payload'): description = metainfo.get_payload() if not description: # legacymetadata versions use the Description for the description description = get_attribute(metainfo, 'Description') if not description and location: # older metadata versions can use a DESCRIPTION.rst file description = get_legacy_description( fileutils.parent_directory(location)) summary = get_attribute(metainfo, 'Summary') return build_description(summary, description)
def parse(cls, location): # we use the parent directory as a package name name = fileutils.file_name(fileutils.parent_directory(location)) # we could use checksums as version in the future version = None # there is an optional array of license file names in targets that we could use # declared_license = None # there are dependencies we could use # dependencies = [] yield models.PackageData( datasource_id=cls.datasource_id, type=cls.default_package_type, name=name, version=version, )
def recognize(cls, location): """ Yield one or more Package manifest objects given a file ``location`` pointing to a package archive, manifest or similar. """ with open(location, encoding='utf-8') as loc: readme_manifest = loc.read() package = build_package(cls, readme_manifest) if not package.name: # If no name was detected for the Package, then we use the basename of # the parent directory as the Package name parent_dir = fileutils.parent_directory(location) parent_dir_basename = fileutils.file_base_name(parent_dir) package.name = parent_dir_basename yield package
def new_name(location, is_dir=False): """ Return a new non-existing location usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the name. The case of the name is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique name: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location location = location.rstrip('\\/') name = fileutils.file_name(location).strip() if (not name or name == '.' # windows bare drive path as in c: or z: or (name and len(name) == 2 and name.endswith(':'))): name = 'file' parent = fileutils.parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) if name.lower() not in siblings_lower: return posixpath.join(parent, name) ext = fileutils.file_extension(name) base_name = fileutils.file_base_name(name) if is_dir: # directories have no extension ext = '' base_name = name counter = 1 while True: new_name = base_name + '_' + str(counter) + ext if new_name.lower() not in siblings_lower: break counter += 1 return os.path.join(parent, new_name)
def new_name(location, is_dir=False): """ Return a new non-existing location usable to write a file or create directory without overwriting existing files or directories in the same parent directory, ignoring the case of the name. The case of the name is ignored to ensure that similar results are returned across case sensitive (*nix) and case insensitive file systems. To find a new unique name: * pad a directory name with _X where X is an incremented number. * pad a file base name with _X where X is an incremented number and keep the extension unchanged. """ assert location location = location.rstrip('\\/') name = fileutils.file_name(location).strip() if (not name or name == '.' # windows bare drive path as in c: or z: or (name and len(name)==2 and name.endswith(':'))): name = 'file' parent = fileutils.parent_directory(location) # all existing files or directory as lower case siblings_lower = set(s.lower() for s in os.listdir(parent)) if name.lower() not in siblings_lower: return posixpath.join(parent, name) ext = fileutils.file_extension(name) base_name = fileutils.file_base_name(name) if is_dir: # directories have no extension ext = '' base_name = name counter = 1 while True: new_name = base_name + '_' + str(counter) + ext if new_name.lower() not in siblings_lower: break counter += 1 return os.path.join(parent, new_name)
def _get_root_dir(input_path, strip_root=False, full_root=False): """ Return a root dir name or None. On Windows, the path uses POSIX (forward slash) separators. """ if strip_root: return scanned_path = os.path.abspath(os.path.normpath(os.path.expanduser(input_path))) scanned_path = fileutils.as_posixpath(scanned_path) if filetype.is_dir(scanned_path): root_dir = scanned_path else: root_dir = fileutils.parent_directory(scanned_path) root_dir = fileutils.as_posixpath(root_dir) if full_root: return root_dir else: return fileutils.file_name(root_dir)
def as_template(scan_data, template='html'): """ Return an string built from a list of results and the provided template. The template defaults to the standard HTML template format or can point to the path of a custom template file. """ from licensedcode.models import get_license if template == 'html': template = get_template(get_template_dir('html')) else: # load a custom template tpath = fileutils.as_posixpath(abspath(expanduser(template))) assert isfile(tpath) tdir = fileutils.parent_directory(tpath) tfile = fileutils.file_name(tpath) template = get_template(tdir, tfile) converted = OrderedDict() converted_infos = OrderedDict() converted_packages = OrderedDict() licenses = {} # Create a flattened data dict keyed by location for scan_result in scan_data: location = scan_result['location'] results = [] if 'copyrights' in scan_result: for entry in scan_result['copyrights']: results.append({ 'start': entry['start_line'], 'end': entry['end_line'], 'what': 'copyright', # NOTE: we display one statement per line. 'value': '\n'.join(entry['statements']), }) if 'licenses' in scan_result: for entry in scan_result['licenses']: results.append({ 'start': entry['start_line'], 'end': entry['end_line'], 'what': 'license', 'value': entry['key'], }) if entry['key'] not in licenses: licenses[entry['key']] = entry entry['object'] = get_license(entry['key']) if results: converted[location] = sorted(results, key=itemgetter('start')) if 'infos' in scan_result: converted_infos[location] = scan_result['infos'] if 'packages' in scan_result: converted_packages[location] = scan_result['packages'] licenses = OrderedDict(sorted(licenses.items())) results = { 'license_copyright': converted, 'infos': converted_infos, 'packages': converted_packages } return template.render(results=results, licenses=licenses)
def parse(location): """ Return a Package object from a package.json """ if not is_package_json(location): return # mapping of top level package.json items to the Package object field name plain_fields = OrderedDict([ ('name', 'name'), ('version', 'version'), ('description', 'summary'), ('keywords', 'keywords'), ('homepage', 'homepage_url'), ]) # mapping of top level package.json items to a function accepting as arguments: # - the package.json element value and a Package Object to update field_mappers = OrderedDict([ ('author', author_mapper), ('bugs', bugs_mapper), ('contributors', contributors_mapper), ('maintainers', maintainers_mapper), ('license', licensing_mapper), ('licenses', licensing_mapper), ('dependencies', dependencies_mapper), ('devDependencies', dev_dependencies_mapper), ('peerDependencies', peer_dependencies_mapper), ('optionalDependencies', optional_dependencies_mapper), ('url', url_mapper), ('dist', dist_mapper), ('repository', repository_mapper), ]) with codecs.open(location, encoding='utf-8') as loc: data = json.load(loc, object_pairs_hook=OrderedDict) if not data.get('name') or not data.get('version'): # a package.json without name and version is not a usable NPM package return # a package.json is at the root of an NPM package base_dir = fileutils.parent_directory(location) package = NpmPackage(location=base_dir) package.metafile_locations = [location] for source, target in plain_fields.items(): value = data.get(source) if value: if isinstance(value, basestring): value = value.strip() if value: setattr(package, target, value) for source, func in field_mappers.items(): logger.debug('parse: %(source)r, %(func)r' % locals()) value = data.get(source) if value: if isinstance(value, basestring): value = value.strip() if value: func(value, package) package.download_urls.append(public_download_url(package.name, package.version)) package.metafile_locations.append(location) return package
def as_template(scanned_files, template): """ Return an string built from a list of `scanned_files` results and the provided `template` identifier. The template defaults to the standard HTML template format or can point to the path of a custom template file. """ # FIXME: This code is highly coupled with actual scans and may not # support adding new scans at all from licensedcode.cache import get_licenses_db # FIXME: factor out the html vs custom from this function: we should get a template path if template == 'html': template = get_template(get_template_dir('html')) else: # load a custom template tpath = fileutils.as_posixpath(os.path.abspath(os.path.expanduser(template))) assert os.path.isfile(tpath) tdir = fileutils.parent_directory(tpath) tfile = fileutils.file_name(tpath) template = get_template(tdir, tfile) converted = OrderedDict() converted_infos = OrderedDict() converted_packages = OrderedDict() licenses = {} LICENSES = 'licenses' COPYRIGHTS = 'copyrights' PACKAGES = 'packages' URLS = 'urls' EMAILS = 'emails' # Create a flattened data dict keyed by path for scanned_file in scanned_files: path = scanned_file['path'] results = [] if COPYRIGHTS in scanned_file: for entry in scanned_file[COPYRIGHTS]: results.append({ 'start': entry['start_line'], 'end': entry['end_line'], 'what': 'copyright', # NOTE: we display one statement per line. 'value': '\n'.join(entry['statements']), }) if LICENSES in scanned_file: for entry in scanned_file[LICENSES]: results.append({ 'start': entry['start_line'], 'end': entry['end_line'], 'what': 'license', 'value': entry['key'], }) # FIXME: we hsould NOT rely on license objects: only use what is in the JSON instead if entry['key'] not in licenses: licenses[entry['key']] = entry entry['object'] = get_licenses_db().get(entry['key']) if results: converted[path] = sorted(results, key=itemgetter('start')) # TODO: this is klunky: we need to drop templates entirely or we # should rather just pass a the list of files from the scan # results and let the template handle this rather than # denormalizing the list here?? converted_infos[path] = OrderedDict() for name, value in scanned_file.items(): if name in (LICENSES, PACKAGES, COPYRIGHTS, EMAILS, URLS): continue converted_infos[path][name] = value if PACKAGES in scanned_file: converted_packages[path] = scanned_file[PACKAGES] licenses = OrderedDict(sorted(licenses.items())) files = { 'license_copyright': converted, 'infos': converted_infos, 'packages': converted_packages } return template.generate(files=files, licenses=licenses)