예제 #1
0
class IdentifyFileFormat(DBTask):
    queue = 'file_operation'

    def handle_matches(self, fullname, matches, delta_t, matchtype=''):
        if len(matches) == 0:
            raise ValueError("No matches for %s" % fullname)

        f, sigName = matches[-1]
        self.lastFmt = f.find('name').text

    def run(self, filename=None):
        """
        Identifies the format of the file using the fido library

        Args:
            filename: The filename to identify

        Returns:
            The format of the file
        """

        self.fid = Fido()
        self.fid.handle_matches = self.handle_matches
        self.fid.identify_file(filename)

        self.set_progress(100, total=100)

        return self.lastFmt

    def undo(self, filename=None):
        pass

    def event_outcome_success(self, filename=None, block_size=65536, algorithm='SHA-256'):
        return "Identified format of %s" % filename
예제 #2
0
def test_fido_cache_halting_file(fido_cache_halting_file):
    """Tests that time used between raw Fido usage and FidoDetector usage does
    not provide big difference in processing time."""
    fido_object = Fido(
        quiet=True, format_files=["formats-v95.xml", "format_extensions.xml"])
    fido_start_time = time.time()
    fido_object.identify_file(fido_cache_halting_file)
    fido_elapsed_time = time.time() - fido_start_time

    fido_reader_start_time = time.time()
    fido_reader_object = FidoDetector(fido_cache_halting_file)
    fido_reader_object.detect()
    fido_reader_elapsed_time = time.time() - fido_reader_start_time

    # 2 second difference is acceptable with the given test file.
    assert abs(fido_elapsed_time - fido_reader_elapsed_time) < 2
예제 #3
0
class FormatIdentification():
    """
    File Format Identification
    """
    def __init__(self):
        self.fid = Fido()
        self.fid.handle_matches = self.print_matches
        self.lastFmt = None

    def identify_file(self, entry):
        """
        This function identifies the file format of every file that is handed over.
        """
        self.fid.identify_file(entry)
        return self.lastFmt

    def print_matches(self, fullname, matches, delta_t, matchtype=''):
        #print "####" + fullname
        for (f, s) in matches:
            self.lastFmt = self.fid.get_puid(f)
예제 #4
0
class FormatIdentification():
    """
    File Format Identification
    """
    def __init__(self):
        self.fid = Fido()
        self.fid.handle_matches = self.print_matches
        self.lastFmt = None

    def identify_file(self, entry):
        """
        This function identifies the file format of every file that is handed over.
        """
        self.fid.identify_file(entry)
        return self.lastFmt

    def get_mime_for_puid(self, puid):
        """
        Get mime type for a given puid

        @type       puid: string
        @param      puid: PRONOM Persistent Unique Identifier

        @rtype:     string
        @return:    mime type string (default: application/octet-stream)
        """
        mime_tag = "mime"
        fmtres = self.fid.puid_format_map[puid]
        childs = [child for child in fmtres if child.tag.endswith(mime_tag)]
        if len(childs) == 1:
            return (childs[0]).text.strip()
        else:
            return "application/octet-stream"

    def print_matches(self, fullname, matches, delta_t, matchtype=''):
        # print "####" + fullname
        for (f, s) in matches:
            self.lastFmt = self.fid.get_puid(f)
예제 #5
0
class FormatIdentification():
    """
    File Format Identification
    """

    def __init__(self):
        if not fido_disabled:
            self.fid = Fido()
            self.fid.handle_matches = self.print_matches
            self.lastFmt = None

    def identify_file(self, entry):
        """
        This function identifies the file format of every file that is handed over.
        """
        assert not fido_disabled, "Fido module is not available!"
        self.fid.identify_file(entry)
        return self.lastFmt

    def get_mime_for_puid(self, puid):
        """

        :param puid: PRONOM Persistent Unique Identifier
        :return: mime type string (default: application/octet-stream)
        """
        assert not fido_disabled, "Fido module is not available!"
        mime_tag = "mime"
        fmtres = self.fid.puid_format_map[puid]
        childs = [child for child in fmtres if child.tag.endswith(mime_tag)]
        if len(childs) == 1:
            return (childs[0]).text.strip()
        return "application/octet-stream"

    def print_matches(self, fullname, matches, delta_t, matchtype=''):
        assert not fido_disabled, "Fido module is not available!"
        for (f, s) in matches:
            self.lastFmt = self.fid.get_puid(f)