Exemple #1
0
def get_file_infos(location):
    """
    Return a list of dictionaries of informations collected from the file or
    directory at location.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import sha1, md5
    from typecode import contenttype

    T = contenttype.get_type(location)
    is_file = T.is_file
    is_dir = T.is_dir
    infos = OrderedDict()
    infos['type'] = filetype.get_type(location, short=False)
    infos['name'] = fileutils.file_name(location)
    infos['extension'] = is_file and fileutils.file_extension(location) or ''
    infos['date'] = is_file and filetype.get_last_modified_date(
        location) or None
    infos['size'] = T.size
    infos['sha1'] = is_file and sha1(location) or None
    infos['md5'] = is_file and md5(location) or None
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = is_file and T.is_binary or None
    infos['is_text'] = is_file and T.is_text or None
    infos['is_archive'] = is_file and T.is_archive or None
    infos['is_media'] = is_file and T.is_media or None
    infos['is_source'] = is_file and T.is_source or None
    infos['is_script'] = is_file and T.is_script or None
    return [infos]
Exemple #2
0
def get_file_info(location, **kwargs):
    """
    Return a mapping of file information collected for the file at `location`.
    """
    result = OrderedDict()

    # TODO: move date and size these to the inventory collection step???
    result['date'] = get_last_modified_date(location) or None
    result['size'] = getsize(location) or 0

    sha1, md5, sha256 = multi_checksums(location, ('sha1', 'md5', 'sha256')).values()
    result['sha1'] = sha1
    result['md5'] = md5
    result['sha256'] = sha256

    collector = get_type(location)
    result['mime_type'] = collector.mimetype_file or None
    result['file_type'] = collector.filetype_file or None
    result['programming_language'] = collector.programming_language or None
    result['is_binary'] = bool(collector.is_binary)
    result['is_text'] = bool(collector.is_text)
    result['is_archive'] = bool(collector.is_archive)
    result['is_media'] = bool(collector.is_media)
    result['is_source'] = bool(collector.is_source)
    result['is_script'] = bool(collector.is_script)
    return result
def get_file_infos(location):
    """
    Return a list of dictionaries of informations collected from the file or
    directory at location.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import sha1, md5
    from typecode import contenttype

    T = contenttype.get_type(location)
    is_file = T.is_file
    is_dir = T.is_dir
    infos = OrderedDict()
    infos['type'] = filetype.get_type(location, short=False)
    infos['name'] = fileutils.file_name(location)
    infos['extension'] = is_file and fileutils.file_extension(location) or ''
    infos['date'] = is_file and filetype.get_last_modified_date(location) or None
    infos['size'] = T.size
    infos['sha1'] = is_file and sha1(location) or None
    infos['md5'] = is_file and md5(location) or None
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = is_file and T.is_binary or None
    infos['is_text'] = is_file and T.is_text or None
    infos['is_archive'] = is_file and T.is_archive or None
    infos['is_media'] = is_file and T.is_media or None
    infos['is_source'] = is_file and T.is_source or None
    infos['is_script'] = is_file and T.is_script or None
    return [infos]
    def __init__(self, location):
        if (not location
            or (not os.path.exists(location)
                and not filetype.is_broken_link(location))):
            raise IOError("[Errno 2] No such file or directory: "
                          "'%(location)r'" % locals())
        self.location = location
        # flags and values
        self.is_file = filetype.is_file(location)
        self.is_dir = filetype.is_dir(location)
        self.is_regular = filetype.is_regular(location)
        self.is_special = filetype.is_special(location)

        self.date = filetype.get_last_modified_date(location)

        self.is_link = filetype.is_link(location)
        self.is_broken_link = filetype.is_broken_link(location)

        # FIXME: the way the True and False values are checked in properties is verbose and contrived at best
        # and is due to use None/True/False as different values
        # computed on demand
        self._size = None
        self._link_target = None

        self._mimetype_python = None
        self._filetype_file = None
        self._mimetype_file = None
        self._filetype_pygments = None
        self._is_pdf_with_text = None
        self._is_text = None
        self._is_binary = None
        self._contains_text = None
    def __init__(self, location):
        if not location or (not os.path.exists(location) and not filetype.is_broken_link(location)):
            raise IOError("[Errno 2] No such file or directory: " "'%(location)r'" % locals())
        self.location = location
        # flags and values
        self.is_file = filetype.is_file(location)
        self.is_dir = filetype.is_dir(location)
        self.is_regular = filetype.is_regular(location)
        self.is_special = filetype.is_special(location)

        self.date = filetype.get_last_modified_date(location)

        self.is_link = filetype.is_link(location)
        self.is_broken_link = filetype.is_broken_link(location)

        # FIXME: the way the True and False values are checked in properties is verbose and contrived at best
        # and is due to use None/True/False as different values
        # computed on demand
        self._size = None
        self._link_target = None

        self._mimetype_python = None
        self._filetype_file = None
        self._mimetype_file = None
        self._filetype_pygments = None
        self._is_pdf_with_text = None
        self._is_text = None
        self._is_binary = None
Exemple #6
0
    def __init__(self, location):
        if (not location
            or (not os.path.exists(location)
                and not filetype.is_broken_link(location))):
            raise IOError("[Errno 2] No such file or directory: "
                          "'%(location)r'" % locals())
        self.location = location
        # flags and values
        self.is_file = filetype.is_file(location)
        self.is_dir = filetype.is_dir(location)
        self.is_regular = filetype.is_regular(location)
        self.is_special = filetype.is_special(location)

        self.date = filetype.get_last_modified_date(location)

        self.is_link = filetype.is_link(location)
        self.is_broken_link = filetype.is_broken_link(location)

        # computed on demand
        self._size = None
        self._link_target = None

        self._mimetype_python = None
        self._filetype_file = None
        self._mimetype_file = None
        self._filetype_pygments = None
        self._is_pdf_with_text = None
        self._is_text = None
        self._is_binary = None
Exemple #7
0
def get_file_infos(location):
    """
    Return a mapping of file information collected from the file or
    directory at `location`.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import multi_checksums
    from typecode import contenttype

    if on_linux:
        location = path_to_bytes(location)
    else:
        location = path_to_unicode(location)

    infos = OrderedDict()
    is_file = filetype.is_file(location)
    is_dir = filetype.is_dir(location)

    T = contenttype.get_type(location)

    infos['type'] = filetype.get_type(location, short=False)
    name = fileutils.file_name(location)
    if is_file:
        base_name, extension = fileutils.splitext(location)
    else:
        base_name = name
        extension = ''

    if on_linux:
        infos['name'] = path_to_unicode(name)
        infos['base_name'] = path_to_unicode(base_name)
        infos['extension'] = path_to_unicode(extension)
    else:
        infos['name'] = name
        infos['base_name'] = base_name
        infos['extension'] = extension

    infos['date'] = is_file and filetype.get_last_modified_date(
        location) or None
    infos['size'] = T.size
    infos.update(multi_checksums(location, (
        'sha1',
        'md5',
    )))
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = bool(is_file and T.is_binary)
    infos['is_text'] = bool(is_file and T.is_text)
    infos['is_archive'] = bool(is_file and T.is_archive)
    infos['is_media'] = bool(is_file and T.is_media)
    infos['is_source'] = bool(is_file and T.is_source)
    infos['is_script'] = bool(is_file and T.is_script)

    return infos
Exemple #8
0
def get_file_infos(location):
    """
    Return a mapping of file information collected from the file or
    directory at `location`.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import multi_checksums
    from typecode import contenttype

    if on_linux:
        location = path_to_bytes(location)
    else:
        location = path_to_unicode(location)

    infos = OrderedDict()
    is_file = filetype.is_file(location)
    is_dir = filetype.is_dir(location)

    T = contenttype.get_type(location)

    infos['type'] = filetype.get_type(location, short=False)
    name = fileutils.file_name(location)
    if is_file:
        base_name, extension = fileutils.splitext(location)
    else:
        base_name = name
        extension = ''

    if on_linux:
        infos['name'] = path_to_unicode(name)
        infos['base_name'] = path_to_unicode(base_name)
        infos['extension'] = path_to_unicode(extension)
    else:
        infos['name'] = name
        infos['base_name'] = base_name
        infos['extension'] = extension

    infos['date'] = is_file and filetype.get_last_modified_date(location) or None
    infos['size'] = T.size
    infos.update(multi_checksums(location, ('sha1', 'md5',)))
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = bool(is_file and T.is_binary)
    infos['is_text'] = bool(is_file and T.is_text)
    infos['is_archive'] = bool(is_file and T.is_archive)
    infos['is_media'] = bool(is_file and T.is_media)
    infos['is_source'] = bool(is_file and T.is_source)
    infos['is_script'] = bool(is_file and T.is_script)

    return infos
Exemple #9
0
def get_file_infos(location, as_list=True):
    """
    Return a list of dictionaries of informations collected from the file or
    directory at location.
    """
    from commoncode import fileutils
    from commoncode import filetype
    from commoncode.hash import multi_checksums
    from scancode import utils
    from typecode import contenttype

    infos = OrderedDict()
    is_file = filetype.is_file(location)
    is_dir = filetype.is_dir(location)

    T = contenttype.get_type(location)

    infos['type'] = filetype.get_type(location, short=False)
    infos['name'] = fileutils.file_name(location)
    infos['extension'] = is_file and fileutils.file_extension(location) or ''
    infos['date'] = is_file and filetype.get_last_modified_date(
        location) or None
    infos['size'] = T.size
    infos.update(multi_checksums(location, (
        'sha1',
        'md5',
    )))
    infos['files_count'] = is_dir and filetype.get_file_count(location) or None
    infos['mime_type'] = is_file and T.mimetype_file or None
    infos['file_type'] = is_file and T.filetype_file or None
    infos['programming_language'] = is_file and T.programming_language or None
    infos['is_binary'] = bool(is_file and T.is_binary)
    infos['is_text'] = bool(is_file and T.is_text)
    infos['is_archive'] = bool(is_file and T.is_archive)
    infos['is_media'] = bool(is_file and T.is_media)
    infos['is_source'] = bool(is_file and T.is_source)
    infos['is_script'] = bool(is_file and T.is_script)

    if as_list:
        return [infos]
    else:
        return infos