def get_file_infos(location): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import sha1, md5 from typecode import contenttype T = contenttype.get_type(location) is_file = T.is_file is_dir = T.is_dir infos = OrderedDict() infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos['sha1'] = is_file and sha1(location) or None infos['md5'] = is_file and md5(location) or None infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = is_file and T.is_binary or None infos['is_text'] = is_file and T.is_text or None infos['is_archive'] = is_file and T.is_archive or None infos['is_media'] = is_file and T.is_media or None infos['is_source'] = is_file and T.is_source or None infos['is_script'] = is_file and T.is_script or None return [infos]
def get_file_info(location, **kwargs): """ Return a mapping of file information collected for the file at `location`. """ result = OrderedDict() # TODO: move date and size these to the inventory collection step??? result['date'] = get_last_modified_date(location) or None result['size'] = getsize(location) or 0 sha1, md5, sha256 = multi_checksums(location, ('sha1', 'md5', 'sha256')).values() result['sha1'] = sha1 result['md5'] = md5 result['sha256'] = sha256 collector = get_type(location) result['mime_type'] = collector.mimetype_file or None result['file_type'] = collector.filetype_file or None result['programming_language'] = collector.programming_language or None result['is_binary'] = bool(collector.is_binary) result['is_text'] = bool(collector.is_text) result['is_archive'] = bool(collector.is_archive) result['is_media'] = bool(collector.is_media) result['is_source'] = bool(collector.is_source) result['is_script'] = bool(collector.is_script) return result
def get_file_infos(location): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import sha1, md5 from typecode import contenttype T = contenttype.get_type(location) is_file = T.is_file is_dir = T.is_dir infos = OrderedDict() infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date(location) or None infos['size'] = T.size infos['sha1'] = is_file and sha1(location) or None infos['md5'] = is_file and md5(location) or None infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = is_file and T.is_binary or None infos['is_text'] = is_file and T.is_text or None infos['is_archive'] = is_file and T.is_archive or None infos['is_media'] = is_file and T.is_media or None infos['is_source'] = is_file and T.is_source or None infos['is_script'] = is_file and T.is_script or None return [infos]
def __init__(self, location): if (not location or (not os.path.exists(location) and not filetype.is_broken_link(location))): raise IOError("[Errno 2] No such file or directory: " "'%(location)r'" % locals()) self.location = location # flags and values self.is_file = filetype.is_file(location) self.is_dir = filetype.is_dir(location) self.is_regular = filetype.is_regular(location) self.is_special = filetype.is_special(location) self.date = filetype.get_last_modified_date(location) self.is_link = filetype.is_link(location) self.is_broken_link = filetype.is_broken_link(location) # FIXME: the way the True and False values are checked in properties is verbose and contrived at best # and is due to use None/True/False as different values # computed on demand self._size = None self._link_target = None self._mimetype_python = None self._filetype_file = None self._mimetype_file = None self._filetype_pygments = None self._is_pdf_with_text = None self._is_text = None self._is_binary = None self._contains_text = None
def __init__(self, location): if not location or (not os.path.exists(location) and not filetype.is_broken_link(location)): raise IOError("[Errno 2] No such file or directory: " "'%(location)r'" % locals()) self.location = location # flags and values self.is_file = filetype.is_file(location) self.is_dir = filetype.is_dir(location) self.is_regular = filetype.is_regular(location) self.is_special = filetype.is_special(location) self.date = filetype.get_last_modified_date(location) self.is_link = filetype.is_link(location) self.is_broken_link = filetype.is_broken_link(location) # FIXME: the way the True and False values are checked in properties is verbose and contrived at best # and is due to use None/True/False as different values # computed on demand self._size = None self._link_target = None self._mimetype_python = None self._filetype_file = None self._mimetype_file = None self._filetype_pygments = None self._is_pdf_with_text = None self._is_text = None self._is_binary = None
def __init__(self, location): if (not location or (not os.path.exists(location) and not filetype.is_broken_link(location))): raise IOError("[Errno 2] No such file or directory: " "'%(location)r'" % locals()) self.location = location # flags and values self.is_file = filetype.is_file(location) self.is_dir = filetype.is_dir(location) self.is_regular = filetype.is_regular(location) self.is_special = filetype.is_special(location) self.date = filetype.get_last_modified_date(location) self.is_link = filetype.is_link(location) self.is_broken_link = filetype.is_broken_link(location) # computed on demand self._size = None self._link_target = None self._mimetype_python = None self._filetype_file = None self._mimetype_file = None self._filetype_pygments = None self._is_pdf_with_text = None self._is_text = None self._is_binary = None
def get_file_infos(location): """ Return a mapping of file information collected from the file or directory at `location`. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from typecode import contenttype if on_linux: location = path_to_bytes(location) else: location = path_to_unicode(location) infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) name = fileutils.file_name(location) if is_file: base_name, extension = fileutils.splitext(location) else: base_name = name extension = '' if on_linux: infos['name'] = path_to_unicode(name) infos['base_name'] = path_to_unicode(base_name) infos['extension'] = path_to_unicode(extension) else: infos['name'] = name infos['base_name'] = base_name infos['extension'] = extension infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos.update(multi_checksums(location, ( 'sha1', 'md5', ))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) return infos
def get_file_infos(location): """ Return a mapping of file information collected from the file or directory at `location`. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from typecode import contenttype if on_linux: location = path_to_bytes(location) else: location = path_to_unicode(location) infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) name = fileutils.file_name(location) if is_file: base_name, extension = fileutils.splitext(location) else: base_name = name extension = '' if on_linux: infos['name'] = path_to_unicode(name) infos['base_name'] = path_to_unicode(base_name) infos['extension'] = path_to_unicode(extension) else: infos['name'] = name infos['base_name'] = base_name infos['extension'] = extension infos['date'] = is_file and filetype.get_last_modified_date(location) or None infos['size'] = T.size infos.update(multi_checksums(location, ('sha1', 'md5',))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) return infos
def get_file_infos(location, as_list=True): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from scancode import utils from typecode import contenttype infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos.update(multi_checksums(location, ( 'sha1', 'md5', ))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) if as_list: return [infos] else: return infos