def check_files(test_dir, expected): """ Walk test_dir. Check that all dirs are readable. Check that all files are: * non-special, * readable, * have a posix path that ends with one of the expected tuple paths. """ result = [] locs = [] if filetype.is_file(test_dir): test_dir = fileutils.parent_directory(test_dir) test_dir_path = fileutils.as_posixpath(test_dir) for top, _, files in os.walk(test_dir): for f in files: location = os.path.join(top, f) locs.append(location) path = fileutils.as_posixpath(location) path = path.replace(test_dir_path, '').strip('/') result.append(path) assert sorted(expected) == sorted(result) for location in locs: assert filetype.is_file(location) assert not filetype.is_special(location) assert filetype.is_readable(location)
def is_pom(location): """ Return True if the file at location is highly likely to be a POM. """ if (not filetype.is_file(location) or not location.endswith(('.pom', 'pom.xml', 'project.xml',))): if TRACE: logger.debug('is_pom: not a POM on name: {}'.format(location)) return T = contenttype.get_type(location) if T.is_text: # check the POM version in the first 150 lines with codecs.open(location, encoding='utf-8') as pom: for n, line in enumerate(pom): if n > 150: break if any(x in line for x in ('http://maven.apache.org/POM/4.0.0', 'http://maven.apache.org/xsd/maven-4.0.0.xsd', '<modelVersion>', # somehow we can still parse version 3 poms too '<pomVersion>',) ): return True if TRACE: logger.debug('is_pom: not a POM based on type: {}: {}'.format(T, location))
def recognize_packaged_archives(location): """ Return a Package object if one was recognized or None for this `location`. """ if not filetype.is_file(location): return T = typecode.contenttype.get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file for package in PACKAGE_TYPES: if not package.packaging == models.Package.as_archive: continue # Note: default to True if there is nothing to match against if package.filetypes: type_matched = any(t in ftype for t in package.filetypes) else: type_matched = True if package.mimetypes: mime_matched = any(m in mtype for m in package.mimetypes) else: mime_matched = True if package.extensions: extension_matched = location.lower().endswith(package.extensions) else: extension_matched = True if type_matched and mime_matched and extension_matched: # we return the first match in the order of PACKAGE_TYPES return package(location=location)
def __init__(self, location): if (not location or (not os.path.exists(location) and not filetype.is_broken_link(location))): raise IOError("[Errno 2] No such file or directory: " "'%(location)r'" % locals()) self.location = location # flags and values self.is_file = filetype.is_file(location) self.is_dir = filetype.is_dir(location) self.is_regular = filetype.is_regular(location) self.is_special = filetype.is_special(location) self.date = filetype.get_last_modified_date(location) self.is_link = filetype.is_link(location) self.is_broken_link = filetype.is_broken_link(location) # computed on demand self._size = None self._link_target = None self._mimetype_python = None self._filetype_file = None self._mimetype_file = None self._filetype_pygments = None self._is_pdf_with_text = None self._is_text = None self._is_binary = None
def __init__(self, location): if not location or (not os.path.exists(location) and not filetype.is_broken_link(location)): raise IOError("[Errno 2] No such file or directory: " "'%(location)r'" % locals()) self.location = location # flags and values self.is_file = filetype.is_file(location) self.is_dir = filetype.is_dir(location) self.is_regular = filetype.is_regular(location) self.is_special = filetype.is_special(location) self.date = filetype.get_last_modified_date(location) self.is_link = filetype.is_link(location) self.is_broken_link = filetype.is_broken_link(location) # FIXME: the way the True and False values are checked in properties is verbose and contrived at best # and is due to use None/True/False as different values # computed on demand self._size = None self._link_target = None self._mimetype_python = None self._filetype_file = None self._mimetype_file = None self._filetype_pygments = None self._is_pdf_with_text = None self._is_text = None self._is_binary = None
def get_test_loc(self, test_path, copy=False, debug=False): """ Given a `test_path` relative to the self.test_data_dir directory, return the location to a test file or directory for this path. Copy to a temp test location if `copy` is True. """ if debug: import inspect caller = inspect.stack()[1][3] print('\nself.get_test_loc,%(caller)s,"%(test_path)s"' % locals()) test_loc = get_test_loc(test_path, self.test_data_dir, debug=debug) if copy: base_name = os.path.basename(test_loc) if filetype.is_file(test_loc): # target must be an existing dir target_dir = self.get_temp_dir() fileutils.copyfile(test_loc, target_dir) test_loc = os.path.join(target_dir, base_name) else: # target must be a NON existing dir target_dir = os.path.join(self.get_temp_dir(), base_name) fileutils.copytree(test_loc, target_dir) # cleanup of VCS that could be left over from checkouts self.remove_vcs(target_dir) test_loc = target_dir return test_loc
def walk(location, ignored=ignore_nothing): """ Walk location returning the same tuples as os.walk but with a different behavior: - always walk top-down, breadth-first. - always ignore and never follow symlinks, . - always ignore special files (FIFOs, etc.) - optionally ignore files and directories by invoking the `ignored` callable on files and directories returning True if it should be ignored. - location is a directory or a file: for a file, the file is returned. """ if on_linux: location = path_to_bytes(location) # TODO: consider using the new "scandir" module for some speed-up. if TRACE: ign = ignored(location) logger_debug('walk: ignored:', location, ign) if ignored(location): return if filetype.is_file(location) : yield parent_directory(location), [], [file_name(location)] elif filetype.is_dir(location): dirs = [] files = [] # TODO: consider using scandir for name in os.listdir(location): loc = os.path.join(location, name) if filetype.is_special(loc) or ignored(loc): if TRACE: ign = ignored(loc) logger_debug('walk: ignored:', loc, ign) continue # special files and symlinks are always ignored if filetype.is_dir(loc): dirs.append(name) elif filetype.is_file(loc): files.append(name) yield location, dirs, files for dr in dirs: for tripple in walk(os.path.join(location, dr), ignored): yield tripple
def is_metadata_json(location): """ Return True if `location` path is for a Chef metadata.json file. The metadata.json is also used in Python installed packages in a 'dist-info' directory. """ return (filetype.is_file(location) and fileutils.file_name(location).lower() == 'metadata.json' and not fileutils.file_name(fileutils.parent_directory( location)).lower().endswith('dist-info'))
def walk(location, ignored=ignore_nothing): """ Walk location returning the same tuples as os.walk but with a different behavior: - always walk top-down, breadth-first. - always ignore and never follow symlinks, . - always ignore special files (FIFOs, etc.) - optionally ignore files and directories by invoking the `ignored` callable on files and directories returning True if it should be ignored. - location is a directory or a file: for a file, the file is returned. """ # TODO: consider using the new "scandir" module for some speed-up. if DEBUG: ign = ignored(location) logger.debug('walk: ignored:', location, ign) if ignored(location): return if filetype.is_file(location) : yield parent_directory(location), [], [file_name(location)] elif filetype.is_dir(location): dirs = [] files = [] # TODO: consider using scandir for name in os.listdir(location): loc = os.path.join(location, name) if filetype.is_special(loc) or ignored(loc): if DEBUG: ign = ignored(loc) logger.debug('walk: ignored:', loc, ign) continue # special files and symlinks are always ignored if filetype.is_dir(loc): dirs.append(name) elif filetype.is_file(loc): files.append(name) yield location, dirs, files for dr in dirs: for tripple in walk(os.path.join(location, dr), ignored): yield tripple
def get_best_handler(location, kinds=all_kinds): """ Return the best handler of None for the file at location. """ location = os.path.abspath(os.path.expanduser(location)) if not filetype.is_file(location): return handlers = list(get_handlers(location)) if handlers: candidates = score_handlers(handlers) return candidates and pick_best_handler(candidates, kinds)
def get_handlers(location): """ Return an iterable of (handler, type_matched, mime_matched, extension_matched,) for this `location`. """ if on_linux and py2: location = fileutils.fsencode(location) if filetype.is_file(location): T = contenttype.get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file if TRACE_DEEP: logger.debug( 'get_handlers: processing %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals()) for handler in archive_handlers: if not handler.extractors: continue extractor_count = len(handler.extractors) if extractor_count > 2: raise Exception('Maximum level of archive nesting is two.') # default to False type_matched = handler.filetypes and any( t in ftype for t in handler.filetypes) mime_matched = handler.mimetypes and any( m in mtype for m in handler.mimetypes) exts = handler.extensions if exts: if on_linux and py2: exts = tuple(fileutils.fsencode(e) for e in exts) extension_matched = exts and location.lower().endswith(exts) if TRACE_DEEP: logger.debug( ' get_handlers: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals()) if handler.strict and not all( [type_matched, mime_matched, extension_matched]): logger.debug(' get_handlers: skip strict' % locals()) continue if type_matched or mime_matched or extension_matched: if TRACE_DEEP: handler_name = handler.name logger.debug( ' get_handlers: yielding handler: %(handler_name)r' % locals()) yield handler, type_matched, mime_matched, extension_matched
def recognize_package_data(location): """ Return a list of Package objects if any package_data were recognized for this `location`, or None if there were no Packages found. Raises Exceptions on errors. """ if not filetype.is_file(location): return [] return list(_parse(location))
def parse_status_file(location, distro='debian'): """ Yield Debian Package objects from a dpkg `status` file or None. """ if not os.path.exists(location): raise FileNotFoundError( '[Errno 2] No such file or directory: {}'.format(repr(location))) if not filetype.is_file(location): raise Exception(f'Location is not a file: {location}') for debian_pkg_data in debcon.get_paragraphs_data_from_file(location): yield build_package(debian_pkg_data, distro)
def copytree(src, dst): """ Copy recursively the `src` directory to the `dst` directory. If `dst` is an existing directory, files in `dst` may be overwritten during the copy. Preserve timestamps. Ignores: -`src` permissions: `dst` files are created with the default permissions. - all special files such as FIFO or character devices and symlinks. Raise an shutil.Error with a list of reasons. This function is similar to and derived from the Python shutil.copytree function. See fileutils.py.ABOUT for details. """ if on_linux and py2: src = fsencode(src) dst = fsencode(dst) if not filetype.is_readable(src): chmod(src, R, recurse=False) names = os.listdir(src) if not os.path.exists(dst): os.makedirs(dst) errors = [] errors.extend(copytime(src, dst)) for name in names: srcname = os.path.join(src, name) dstname = os.path.join(dst, name) # skip anything that is not a regular file, dir or link if not filetype.is_regular(srcname): continue if not filetype.is_readable(srcname): chmod(srcname, R, recurse=False) try: if os.path.isdir(srcname): copytree(srcname, dstname) elif filetype.is_file(srcname): copyfile(srcname, dstname) # catch the Error from the recursive copytree so that we can # continue with other files except shutil.Error as err: errors.extend(err.args[0]) except EnvironmentError as why: errors.append((srcname, dstname, str(why))) if errors: raise shutil.Error(errors)
def get_file_infos(location): """ Return a mapping of file information collected from the file or directory at `location`. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from typecode import contenttype if on_linux: location = path_to_bytes(location) else: location = path_to_unicode(location) infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) name = fileutils.file_name(location) if is_file: base_name, extension = fileutils.splitext(location) else: base_name = name extension = '' if on_linux: infos['name'] = path_to_unicode(name) infos['base_name'] = path_to_unicode(base_name) infos['extension'] = path_to_unicode(extension) else: infos['name'] = name infos['base_name'] = base_name infos['extension'] = extension infos['date'] = is_file and filetype.get_last_modified_date(location) or None infos['size'] = T.size infos.update(multi_checksums(location, ('sha1', 'md5',))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) return infos
def parse(location): """ Return a Package object from a Cargo.toml/Cargo.lock file. """ handlers = { 'cargo.toml': build_cargo_toml_package, 'cargo.lock': build_cargo_lock_package } filename = filetype.is_file(location) and fileutils.file_name( location).lower() handler = handlers.get(filename) if handler: return handler and handler(toml.load(location, _dict=OrderedDict))
def copytree(src, dst): """ Copy recursively the `src` directory to the `dst` directory. If `dst` is an existing directory, files in `dst` may be overwritten during the copy. Preserve timestamps. Ignores: -`src` permissions: `dst` files are created with the default permissions. - all special files such as FIFO or character devices and symlinks. Raise an shutil.Error with a list of reasons. This function is similar to and derived from the Python shutil.copytree function. See fileutils.py.ABOUT for details. """ if on_linux: src = path_to_bytes(src) dst = path_to_bytes(dst) if not filetype.is_readable(src): chmod(src, R, recurse=False) names = os.listdir(src) if not os.path.exists(dst): os.makedirs(dst) errors = [] errors.extend(copytime(src, dst)) for name in names: srcname = os.path.join(src, name) dstname = os.path.join(dst, name) # skip anything that is not a regular file, dir or link if not filetype.is_regular(srcname): continue if not filetype.is_readable(srcname): chmod(srcname, R, recurse=False) try: if os.path.isdir(srcname): copytree(srcname, dstname) elif filetype.is_file(srcname): copyfile(srcname, dstname) # catch the Error from the recursive copytree so that we can # continue with other files except shutil.Error, err: errors.extend(err.args[0]) except EnvironmentError, why: errors.append((srcname, dstname, str(why)))
def get_extractors(location, kinds=all_kinds): """ Return a list of extractors that can extract the file at location or an empty list. """ location = os.path.abspath(os.path.expanduser(location)) if filetype.is_file(location): handlers = list(get_handlers(location)) if handlers: candidates = score_handlers(handlers) if candidates: best = pick_best_handler(candidates, kinds) if best: return best.extractors return []
def get_handlers(location): """ Return an iterable of (handler, type_matched, mime_matched, extension_matched,) for this `location`. """ if filetype.is_file(location): T = typecode.contenttype.get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file for handler in archive_handlers: if not handler.extractors: continue extractor_count = len(handler.extractors) if extractor_count > 2: raise Exception('Maximum level of archive nesting is two.') # default to False type_matched = handler.filetypes and any( t in ftype for t in handler.filetypes) mime_matched = handler.mimetypes and any( m in mtype for m in handler.mimetypes) extension_matched = handler.extensions and location.lower( ).endswith(handler.extensions) if TRACE_DEEP: handler_name = handler.name logger.debug( 'get_handlers: considering %(handler_name)r handler for %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals()) logger.debug( 'get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals()) if handler.strict and not all( [type_matched, mime_matched, extension_matched]): continue if type_matched or mime_matched or extension_matched: if TRACE_DEEP: logger.debug( 'get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals()) logger.debug( 'get_handlers: %(location)s: handler: %(handler)r' % locals()) yield handler, type_matched, mime_matched, extension_matched
def get_best_handler(location, kinds=all_kinds): """ Return the best handler of None for the file at location. """ if on_linux: location = fileutils.fsencode(location) location = os.path.abspath(os.path.expanduser(location)) if not filetype.is_file(location): return handlers = list(get_handlers(location)) if TRACE_DEEP: logger.debug('get_best_handler: handlers: %(handlers)r ' % locals()) if handlers: candidates = score_handlers(handlers) return candidates and pick_best_handler(candidates, kinds)
def recognize_package(location): """ Return a Package object if one was recognized or None for this `location`. """ if not filetype.is_file(location): return T = contenttype.get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file for package_type in PACKAGE_TYPES: # Note: default to True if there is nothing to match against metafiles = package_type.metafiles if on_linux: metafiles = (path_to_bytes(m) for m in metafiles) if location.endswith(tuple(metafiles)): logger_debug('metafile matching: package_type is of type:', package_type) return package_type.recognize(location) if package_type.filetypes: type_matched = any(t in ftype for t in package_type.filetypes) else: type_matched = False if package_type.mimetypes: mime_matched = any(m in mtype for m in package_type.mimetypes) else: mime_matched = False extensions = package_type.extensions if extensions: if on_linux: extensions = tuple(path_to_bytes(e) for e in extensions) extension_matched = location.lower().endswith(extensions) else: extension_matched = False if type_matched and mime_matched and extension_matched: # we return the first match in the order of PACKAGE_TYPES logger_debug('all matching: package is of type:', package_type) recognized = package_type.recognize(location) logger_debug('all matching: recognized as:', repr(recognized)) return recognized logger_debug('no match: package is not of known type:', package_type)
def get_file_infos(location): """ Return a mapping of file information collected from the file or directory at `location`. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from typecode import contenttype infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) name = fileutils.file_name(location) infos['name'] = fileutils.file_name(location) if is_file: base_name, extension = fileutils.splitext(location) else: base_name = name extension = '' infos['base_name'] = base_name infos['extension'] = extension infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos.update(multi_checksums(location, ( 'sha1', 'md5', ))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) return infos
def multi_checksums(location, checksum_names=('md5', 'sha1', 'sha256', 'sha512', 'sha1_git')): """ Return a mapping of hexdigest checksums keyed by checksum name from the content of the file at `location`. Use the `checksum_names` list of checksum names. The mapping is guaranted to contains all the requested names as keys. If the location is not a file, the values are None. """ results = OrderedDict([(name, None) for name in checksum_names]) if not filetype.is_file(location): return results # fixme: we should read in chunks? with open(location, 'rb') as f: hashable = f.read() for name in checksum_names: results[name] = _hashmodules_by_name[name](hashable).hexdigest() return results
def is_data(location, definitions=DATA_TYPE_DEFINITIONS): """ Return True isthe file at `location` is a data file. """ if on_linux: location = fileutils.fsencode(location) if not filetype.is_file(location): return False T = get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file.lower() for ddef in definitions: type_matched = ddef.filetypes and any(t in ftype for t in ddef.filetypes) mime_matched = ddef.mimetypes and any(m in mtype for m in ddef.mimetypes) exts = ddef.extensions if exts: if on_linux: exts = tuple(fileutils.fsencode(e) for e in exts) extension_matched = exts and location.lower().endswith(exts) if TRACE: logger_debug( 'is_data: considering def: %(ddef)r for %(location)s' % locals()) logger_debug( 'matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals()) if ddef.strict and not all( [type_matched, mime_matched, extension_matched]): continue if type_matched or mime_matched or extension_matched: if TRACE: logger_debug('is_data: True: %(location)s: ' % locals()) return True return False
def checksum(location, name, base64=False): """ Return a checksum of `bitsize` length from the content of the file at `location`. The checksum is a hexdigest or base64-encoded is `base64` is True. """ if not filetype.is_file(location): return hasher = _hashmodules_by_name[name] # fixme: we should read in chunks? with open(location, 'rb') as f: hashable = f.read() hashed = hasher(hashable) if base64: return hashed.b64digest() return hashed.hexdigest()
def get_handlers(location): """ Return an iterable of (handler, type_matched, mime_matched, extension_matched,) for this `location`. """ if on_linux: location = path_to_bytes(location) if filetype.is_file(location): T = typecode.contenttype.get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file for handler in archive_handlers: if not handler.extractors: continue extractor_count = len(handler.extractors) if extractor_count > 2: raise Exception('Maximum level of archive nesting is two.') # default to False type_matched = handler.filetypes and any(t in ftype for t in handler.filetypes) mime_matched = handler.mimetypes and any(m in mtype for m in handler.mimetypes) exts = handler.extensions if exts: if on_linux: exts = tuple(path_to_bytes(e) for e in exts) extension_matched = exts and location.lower().endswith(exts) if TRACE_DEEP: handler_name = handler.name logger.debug('get_handlers: considering %(handler_name)r handler for %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals()) logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals()) if handler.strict and not all([type_matched, mime_matched, extension_matched]): continue if type_matched or mime_matched or extension_matched: if TRACE_DEEP: logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals()) logger.debug('get_handlers: %(location)s: handler: %(handler)r' % locals()) yield handler, type_matched, mime_matched, extension_matched
def recon(self, location): for f in os.listdir(location): loc = join(location, f) if not filetype.is_file(loc): continue # a pom is an xml doc pom_ver = pom_version(location) if not pom_ver: continue if f == 'pom.xml': # first case: a maven pom.xml inside a META-INF directory # such as in META-INF/maven/log4j/log4j/pom.xml # the directory tree has a fixed depth # as is: META-INF/maven/groupid/artifactid/pom.xml # this will typically be inside a binary jar, so we should find # a typical structure above try: gggp = dirname(dirname(dirname(dirname(loc)))) if fileutils.file_name(gggp) == 'META-INF': # recon here: the root of the component is the parent of # META-INF, return that, with a type and the POM # metafile to parse. pass except: pass # second case: a maven pom.xml at the root of component # development tree we should find a few extra clues in the # conventional directory structure below for now we take this as # being the component root. return that, with a type and the POM # metafile to parse. pass elif f.endswith('.pom'): # first case: a maven repo layout # the jars are side-by-side with the pom # check if there are side-by-side artifacts jar = loc.replace('.pom', '.jar') if os.path.exists(jar): # return that, with a type and the POM metafile to parse. pass
def get_file_infos(location, as_list=True): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from scancode import utils from typecode import contenttype infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos.update(multi_checksums(location, ( 'sha1', 'md5', ))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) if as_list: return [infos] else: return infos
def recon(self, location): for f in os.listdir(location): loc = join(location, f) if not filetype.is_file(loc): continue # a pom is an xml doc if not is_pom(location): continue if f == 'pom.xml': # first case: a maven pom.xml inside a META-INF directory # such as in META-INF/maven/log4j/log4j/pom.xml # the directory tree has a fixed depth # as is: META-INF/maven/groupid/artifactid/pom.xml # this will typically be inside a binary jar, so we should find # a typical structure above try: gggp = dirname(dirname(dirname(dirname(loc)))) if fileutils.file_name(gggp) == 'META-INF': # recon here: the root of the component is the parent of # META-INF, return that, with a type and the POM # metafile to parse. pass except: pass # second case: a maven pom.xml at the root of component # development tree we should find a few extra clues in the # conventional directory structure below for now we take this as # being the component root. return that, with a type and the POM # metafile to parse. pass elif f.endswith('.pom'): # first case: a maven repo layout # the jars are side-by-side with the pom # check if there are side-by-side artifacts jar = loc.replace('.pom', '.jar') if os.path.exists(jar): # return that, with a type and the POM metafile to parse. pass
def get_handlers(location): """ Return an iterable of (handler, type_matched, mime_matched, extension_matched,) for this `location`. """ if filetype.is_file(location): T = typecode.contenttype.get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file for handler in archive_handlers: if not handler.extractors: continue extractor_count = len(handler.extractors) if extractor_count > 2: raise Exception("Maximum level of archive nesting is two.") # default to False type_matched = handler.filetypes and any(t in ftype for t in handler.filetypes) mime_matched = handler.mimetypes and any(m in mtype for m in handler.mimetypes) extension_matched = handler.extensions and location.lower().endswith(handler.extensions) if DEBUG_DEEP: logger.debug("get_handlers: %(location)s: ftype: %(ftype)s, mtype: %(mtype)s " % locals()) logger.debug( "get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s" % locals() ) if handler.strict and not all([type_matched, mime_matched, extension_matched]): continue if type_matched or mime_matched or extension_matched: if DEBUG_DEEP: logger.debug( "get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s" % locals() ) logger.debug("get_handlers: %(location)s: handler: %(handler)r" % locals()) yield handler, type_matched, mime_matched, extension_matched
def get_handlers(location): """ Return an iterable of (handler, type_matched, mime_matched, extension_matched,) for this `location`. """ if filetype.is_file(location): T = typecode.contenttype.get_type(location) ftype = T.filetype_file.lower() mtype = T.mimetype_file for handler in archive_handlers: if not handler.extractors: continue extractor_count = len(handler.extractors) if extractor_count > 2: raise Exception('Maximum level of archive nesting is two.') type_matched = None if handler.types: type_matched = any(t in ftype for t in handler.types) mime_matched = None if handler.mimes: mime_matched = any(m in mtype for m in handler.mimes) extension_matched = None if handler.exts: extension_matched = location.lower().endswith(handler.exts) if DEBUG_DEEP: logger.debug('get_handlers: %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals()) logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals()) if type_matched or mime_matched or extension_matched: if DEBUG_DEEP: logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals()) logger.debug('get_handlers: %(location)s: handler: %(handler)r' % locals()) yield handler, type_matched, mime_matched, extension_matched
def get_test_loc(self, test_path, copy=False): """ Given a `test_path` relative to the self.test_data_dir directory, return the location to a test file or directory for this path. Copy to a temp test location if `copy` is True. """ test_loc = get_test_loc(test_path, self.test_data_dir) if copy: base_name = os.path.basename(test_loc) if filetype.is_file(test_loc): # target must be an existing dir target_dir = self.get_temp_dir() fileutils.copyfile(test_loc, target_dir) test_loc = os.path.join(target_dir, base_name) else: # target must be a NON existing dir target_dir = os.path.join(self.get_temp_dir(), base_name) fileutils.copytree(test_loc, target_dir) # cleanup of VCS that could be left over from checkouts self.remove_vcs(target_dir) test_loc = target_dir return test_loc
def __init__(self, location): if (not location or (not os.path.exists(location) and not filetype.is_broken_link(location))): raise IOError("[Errno 2] No such file or directory: " "'%(location)r'" % locals()) self.location = location # flags and values self.is_file = filetype.is_file(location) self.is_dir = filetype.is_dir(location) self.is_regular = filetype.is_regular(location) self.is_special = filetype.is_special(location) self.date = filetype.get_last_modified_date(location) self.is_link = filetype.is_link(location) self.is_broken_link = filetype.is_broken_link(location) # FIXME: the way the True and False values are checked in properties is verbose and contrived at best # and is due to use None/True/False as different values # computed on demand self._size = None self._link_target = None self._mimetype_python = None self._filetype_file = None self._mimetype_file = None self._filetype_pygments = None self._is_pdf_with_text = None self._is_text = None self._is_text_with_long_lines = None self._is_compact_js = None self._is_js_map = None self._is_binary = None self._is_data = None self._is_archive = None self._contains_text = None
def get_test_loc(self, test_path, copy=False, debug=False, must_exist=True): """ Given a `test_path` relative to the self.test_data_dir directory, return the location to a test file or directory for this path. Copy to a temp test location if `copy` is True. Raise an IOError if `must_exist` is True and the `test_path` does not exists. """ test_data_dir = self.test_data_dir if debug: import inspect caller = inspect.stack()[1][3] print('\nself.get_test_loc,%(caller)s,"%(test_path)s"' % locals()) test_loc = get_test_loc( test_path, test_data_dir, debug=debug, must_exist=must_exist, ) if copy: base_name = path.basename(test_loc) if filetype.is_file(test_loc): # target must be an existing dir target_dir = self.get_temp_dir() fileutils.copyfile(test_loc, target_dir) test_loc = path.join(target_dir, base_name) else: # target must be a NON existing dir target_dir = path.join(self.get_temp_dir(), base_name) fileutils.copytree(test_loc, target_dir) # cleanup of VCS that could be left over from checkouts self.remove_vcs(target_dir) test_loc = target_dir return test_loc
def is_podspec_json(location): """ Checks if the file is actually a podspec.json metadata file """ return (filetype.is_file(location) and location.endswith('.podspec.json'))
def is_podfile_lock(location): """ Checks if the file is actually a podfile.lock file """ return (filetype.is_file(location) and location.endswith( ('podfile.lock', 'Podfile.lock')))
def is_podspec(location): """ Checks if the file is actually a podspec file """ return (filetype.is_file(location) and location.endswith('.podspec'))
def file_endswith(location, endswith): """ Check if the file at ``location`` ends with ``endswith`` string or tuple. """ return filetype.is_file(location) and location.endswith(endswith)
def is_manifest(cls, location): """ Return True if the file at ``location`` is likely a manifest of this type. """ return filetype.is_file(location) and location.endswith('Gemfile.lock')
def is_about_file(location): return (filetype.is_file(location) and location.lower().endswith( ('.about', )))
def is_package_json(location): return (filetype.is_file(location) and fileutils.file_name(location).lower() == 'package.json')
def is_ignore_file(location): """ Return True if the location is an ignore file. """ return (filetype.is_file(location) and fileutils.file_name(location) == '.scancodeignore')
def _is_build_manifest(cls, location): if not filetype.is_file(location): return False fn = fileutils.file_name(location) return any(fn == mf for mf in cls.metafiles)
def is_haxelib_json(location): return (filetype.is_file(location) and fileutils.file_name(location).lower() == 'haxelib.json')
def is_phpcomposer_json(location): return (filetype.is_file(location) and fileutils.file_name(location).lower() == 'composer.json')
def test_get_file_count_with_single_file(self): test_file = self.get_temp_file() with open(test_file, 'wb') as f: f.write('') assert filetype.is_file(test_file) assert 1 == filetype.get_file_count(test_file)