def test_get_type(self): test_dir = self.extract_test_tar('filetype/types.tar', verbatim=True) results = [] for root, dirs, files in os.walk(test_dir): for d in dirs: results.append((d, filetype.get_type(os.path.join(root, d)))) for f in files: results.append((f, filetype.get_type(os.path.join(root, f)))) expected = [ ('5-DIRTYPE', 'd'), ('0-REGTYPE', 'f'), ('0-REGTYPE-TEXT', 'f'), ('0-REGTYPE-VEEEERY_LONG_NAME___________________________________' '______________________________________________________________' '____________________155', 'f'), ('1-LNKTYPE', 'f'), ('S-SPARSE', 'f'), ('S-SPARSE-WITH-NULLS', 'f') ] # symlinks and special files are not supported on win if on_posix: expected += [ ('2-SYMTYPE', 'l'), ('6-FIFOTYPE', 's'), ] assert sorted(expected) == sorted(results)
def get_file_infos(location): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import sha1, md5 from typecode import contenttype T = contenttype.get_type(location) is_file = T.is_file is_dir = T.is_dir infos = OrderedDict() infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos['sha1'] = is_file and sha1(location) or None infos['md5'] = is_file and md5(location) or None infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = is_file and T.is_binary or None infos['is_text'] = is_file and T.is_text or None infos['is_archive'] = is_file and T.is_archive or None infos['is_media'] = is_file and T.is_media or None infos['is_source'] = is_file and T.is_source or None infos['is_script'] = is_file and T.is_script or None return [infos]
def get_file_infos(location): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import sha1, md5 from typecode import contenttype T = contenttype.get_type(location) is_file = T.is_file is_dir = T.is_dir infos = OrderedDict() infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date(location) or None infos['size'] = T.size infos['sha1'] = is_file and sha1(location) or None infos['md5'] = is_file and md5(location) or None infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = is_file and T.is_binary or None infos['is_text'] = is_file and T.is_text or None infos['is_archive'] = is_file and T.is_archive or None infos['is_media'] = is_file and T.is_media or None infos['is_source'] = is_file and T.is_source or None infos['is_script'] = is_file and T.is_script or None return [infos]
def get_file_infos(location): """ Return a mapping of file information collected from the file or directory at `location`. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from typecode import contenttype if on_linux: location = path_to_bytes(location) else: location = path_to_unicode(location) infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) name = fileutils.file_name(location) if is_file: base_name, extension = fileutils.splitext(location) else: base_name = name extension = '' if on_linux: infos['name'] = path_to_unicode(name) infos['base_name'] = path_to_unicode(base_name) infos['extension'] = path_to_unicode(extension) else: infos['name'] = name infos['base_name'] = base_name infos['extension'] = extension infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos.update(multi_checksums(location, ( 'sha1', 'md5', ))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) return infos
def get_file_infos(location): """ Return a mapping of file information collected from the file or directory at `location`. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from typecode import contenttype if on_linux: location = path_to_bytes(location) else: location = path_to_unicode(location) infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) name = fileutils.file_name(location) if is_file: base_name, extension = fileutils.splitext(location) else: base_name = name extension = '' if on_linux: infos['name'] = path_to_unicode(name) infos['base_name'] = path_to_unicode(base_name) infos['extension'] = path_to_unicode(extension) else: infos['name'] = name infos['base_name'] = base_name infos['extension'] = extension infos['date'] = is_file and filetype.get_last_modified_date(location) or None infos['size'] = T.size infos.update(multi_checksums(location, ('sha1', 'md5',))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) return infos
def get_file_infos(location, as_list=True): """ Return a list of dictionaries of informations collected from the file or directory at location. """ from commoncode import fileutils from commoncode import filetype from commoncode.hash import multi_checksums from scancode import utils from typecode import contenttype infos = OrderedDict() is_file = filetype.is_file(location) is_dir = filetype.is_dir(location) T = contenttype.get_type(location) infos['type'] = filetype.get_type(location, short=False) infos['name'] = fileutils.file_name(location) infos['extension'] = is_file and fileutils.file_extension(location) or '' infos['date'] = is_file and filetype.get_last_modified_date( location) or None infos['size'] = T.size infos.update(multi_checksums(location, ( 'sha1', 'md5', ))) infos['files_count'] = is_dir and filetype.get_file_count(location) or None infos['mime_type'] = is_file and T.mimetype_file or None infos['file_type'] = is_file and T.filetype_file or None infos['programming_language'] = is_file and T.programming_language or None infos['is_binary'] = bool(is_file and T.is_binary) infos['is_text'] = bool(is_file and T.is_text) infos['is_archive'] = bool(is_file and T.is_archive) infos['is_media'] = bool(is_file and T.is_media) infos['is_source'] = bool(is_file and T.is_source) infos['is_script'] = bool(is_file and T.is_script) if as_list: return [infos] else: return infos