Example #1
0
    def extract(self, icon):
        for index, header in enumerate(icon.array("icon_header")):
            image = Metadata(self)

            # Read size and colors from header
            image.width = header["width"].value
            image.height = header["height"].value
            bpp = header["bpp"].value
            nb_colors = header["nb_color"].value
            if nb_colors != 0:
                image.nb_colors = nb_colors
                if bpp == 0 and nb_colors in self.color_to_bpp:
                    bpp = self.color_to_bpp[nb_colors]
            elif bpp == 0:
                bpp = 8
            image.bits_per_pixel = bpp
            image.setHeader(_("Icon #%u (%sx%s)")
                            % (1 + index,
                               image.get("width", "?"),
                               image.get("height", "?")))

            # Read compression from data (if available)
            key = "icon_data[%u]/header/codec" % index
            if key in icon:
                image.compression = icon[key].display
            key = "icon_data[%u]/pixels" % index
            if key in icon:
                computeComprRate(image, icon[key].size)

            # Store new image
            self.addGroup("image[%u]" % index, image)
Example #2
0
    def extractAVI(self, headers, **kwargs):
        audio_index = 1
        for stream in headers.array("stream"):
            if "stream_hdr/stream_type" not in stream:
                continue
            stream_type = stream["stream_hdr/stream_type"].value
            if stream_type == "vids":
                if "stream_hdr" in stream:
                    meta = Metadata(self)
                    self.extractAVIVideo(stream["stream_hdr"], meta)
                    self.addGroup("video", meta, "Video stream")
            elif stream_type == "auds":
                if "stream_fmt" in stream:
                    meta = Metadata(self)
                    self.extractAVIAudio(stream["stream_fmt"], meta)
                    self.addGroup("audio[%u]" % audio_index, meta, "Audio stream")
                    audio_index += 1
        if "avi_hdr" in headers:
            self.useAviHeader(headers["avi_hdr"])

        # Compute global bit rate
        if self.has("duration") and "/movie/size" in headers:
            self.bit_rate = float(
                headers["/movie/size"].value) * 8 / timedelta2seconds(self.get('duration'))

        # Video has index?
        scan_index = (True, kwargs['scan_index'])['scan_index' in kwargs]
        if scan_index and "/index" in headers:
            self.comment = _("Has audio/video index (%s)") \
                           % humanFilesize(headers["/index"].size // 8)
Example #3
0
 def processSubtitle(self, track):
     sub = Metadata(self)
     self.trackCommon(track, sub)
     try:
         sub.compression = track["CodecID/string"].value
     except MissingField:
         pass
     self.addGroup("subtitle[]", sub, "Subtitle")
Example #4
0
 def processSubtitle(self, track):
     sub = Metadata(self)
     self.trackCommon(track, sub)
     try:
         sub.compression = track["CodecID/string"].value
     except MissingField:
         pass
     self.addGroup("subtitle[]", sub, "Subtitle")
Example #5
0
 def processVideo(self, track):
     video = Metadata(self)
     self.trackCommon(track, video)
     try:
         video.compression = track["CodecID/string"].value
         if "Video" in track:
             video.width = track["Video/PixelWidth/unsigned"].value
             video.height = track["Video/PixelHeight/unsigned"].value
     except MissingField:
         pass
     self.addGroup("video[]", video, "Video stream")
Example #6
0
 def processVideo(self, track):
     video = Metadata(self)
     self.trackCommon(track, video)
     try:
         video.compression = track["CodecID/string"].value
         if "Video" in track:
             video.width = track["Video/PixelWidth/unsigned"].value
             video.height = track["Video/PixelHeight/unsigned"].value
     except MissingField:
         pass
     self.addGroup("video[]", video, "Video stream")
Example #7
0
 def processAudio(self, track):
     audio = Metadata(self)
     self.trackCommon(track, audio)
     if "Audio" in track:
         frequency = self.getDouble(track, "Audio/SamplingFrequency")
         if frequency is not None:
             audio.sample_rate = frequency
         if "Audio/Channels/unsigned" in track:
             audio.nb_channel = track["Audio/Channels/unsigned"].value
         if "Audio/BitDepth/unsigned" in track:
             audio.bits_per_sample = track["Audio/BitDepth/unsigned"].value
     if "CodecID/string" in track:
         audio.compression = track["CodecID/string"].value
     self.addGroup("audio[]", audio, "Audio stream")
Example #8
0
 def extract(self, tar):
     max_nb = maxNbFile(self)
     for index, field in enumerate(tar.array("file")):
         if max_nb is not None and max_nb <= index:
             self.warning("TAR archive contains many files, "
                          "but only first %s files are processed" % max_nb)
             break
         meta = Metadata(self)
         self.extractFile(field, meta)
         if meta.has("filename"):
             title = 'File "%s"' % meta.getText('filename')
         else:
             title = "File"
         self.addGroup(field.name, meta, title)
Example #9
0
 def processFile(self, field):
     meta = Metadata(self)
     meta.filename = field["filename"].value
     meta.creation_date = field["last_mod"].value
     meta.compression = field["compression"].display
     if "data_desc" in field:
         meta.file_size = field["data_desc/file_uncompressed_size"].value
         if field["data_desc/file_compressed_size"].value:
             meta.compr_size = field["data_desc/file_compressed_size"].value
     else:
         meta.file_size = field["uncompressed_size"].value
         if field["compressed_size"].value:
             meta.compr_size = field["compressed_size"].value
     computeCompressionRate(meta)
     self.addGroup(field.name, meta, "File \"%s\"" % meta.get('filename'))
Example #10
0
 def extract(self, tar):
     max_nb = maxNbFile(self)
     for index, field in enumerate(tar.array("file")):
         if max_nb is not None and max_nb <= index:
             self.warning("TAR archive contains many files, "
                          "but only first %s files are processed"
                          % max_nb)
             break
         meta = Metadata(self)
         self.extractFile(field, meta)
         if meta.has("filename"):
             title = _('File "%s"') % meta.getText('filename')
         else:
             title = _("File")
         self.addGroup(field.name, meta, title)
Example #11
0
 def extract(self, mar):
     self.comment = "Contains %s files" % mar["nb_file"].value
     self.format_version = "Microsoft Archive version %s"\
                           % mar["version"].value
     max_nb = maxNbFile(self)
     for index, field in enumerate(mar.array("file")):
         if max_nb is not None and max_nb <= index:
             self.warning("MAR archive contains many files, "
                          "but only first %s files are processed"
                          % max_nb)
             break
         meta = Metadata(self)
         meta.filename = field["filename"].value
         meta.compression = "None"
         meta.file_size = field["filesize"].value
         self.addGroup(field.name, meta,
                       "File \"%s\"" % meta.getText('filename'))
Example #12
0
    def traverse_dir(self, p_dir_entry, p_cur_path, p_list):
        l_loc = p_dir_entry["extent_loc"].value * SECTOR_SIZE
        l_len = p_dir_entry["size"].value

        l_read = 0

        if self.DEBUG is True:
            print(80 * "*")
            for l_index, l_field in enumerate(p_list):
                print("[%d] 0x%0.8x" %
                      (l_index, l_field.absolute_address // 8))
            print(80 * "*")

        while l_read < l_len:
            l_entry = self.find_entry(l_loc, p_list)
            if l_entry is not None:

                if self.DEBUG:
                    for l_field in l_entry:
                        print("%#x:%s=%s" % (l_field.absolute_address // 8,
                                             l_field.name, l_field.display))

                l_new_len = l_entry["rec_length"].value
                l_read += l_new_len
                if l_entry["name_length"].value > 1:
                    l_filename = self.get_filename(l_entry)
                    if l_entry["file_flags"].value & 2:
                        if self.DEBUG:
                            print("entering directory %s" % l_filename)
                        self.traverse_dir(
                            l_entry, "%s%s%s" % (p_cur_path, l_filename, sep),
                            p_list)
                        if self.DEBUG:
                            print("leaving directory %s" % l_filename)
                    else:
                        (acc_time, crea_time,
                         mod_time) = self.get_dates(l_entry)
                        meta = Metadata(self)
                        meta.filename = "%s%s" % (p_cur_path, l_filename)
                        meta.last_modification = mod_time
                        meta.creation_date = crea_time
                        meta.file_size = l_entry["size"].value
                        self.addGroup("file[]", meta,
                                      "File \"%s\"" % meta.get('filename'))
                        if self.DEBUG:
                            print("adding file[] %s" % meta.get('filename'))
                l_loc = l_loc + l_new_len
            else:
                l_node_sec, l_node_rest = divmod(l_loc, SECTOR_SIZE)
                if self.DEBUG:
                    print(
                        "no entry found at %#x, skipping %d bytes to sector boundary"
                        % (l_loc, SECTOR_SIZE - l_node_rest))
                l_loc = (l_node_sec + 1) * SECTOR_SIZE
                l_read += (SECTOR_SIZE - l_node_rest)
Example #13
0
    def extract(self, flv):
        if "video[0]" in flv:
            meta = Metadata(self)
            self.extractVideo(flv["video[0]"], meta)
            self.addGroup("video", meta, "Video stream")
        if "audio[0]" in flv:
            meta = Metadata(self)
            self.extractAudio(flv["audio[0]"], meta)
            self.addGroup("audio", meta, "Audio stream")
        # TODO: Computer duration
        # One technic: use last video/audio chunk and use timestamp
        # But this is very slow
        self.format_version = flv.description

        if "metadata/entry[1]" in flv:
            self.extractAMF(flv["metadata/entry[1]"])
        if self.has('duration'):
            self.bit_rate = flv.size / timedelta2seconds(self.get('duration'))
Example #14
0
 def processFile(self, field):
     meta = Metadata(self)
     meta.filename = field["filename"].value
     meta.creation_date = field["last_mod"].value
     meta.compression = field["compression"].display
     if "data_desc" in field:
         meta.file_size = field["data_desc/file_uncompressed_size"].value
         if field["data_desc/file_compressed_size"].value:
             meta.compr_size = field["data_desc/file_compressed_size"].value
     else:
         meta.file_size = field["uncompressed_size"].value
         if field["compressed_size"].value:
             meta.compr_size = field["compressed_size"].value
     computeCompressionRate(meta)
     self.addGroup(field.name, meta, "File \"%s\"" % meta.get('filename'))
Example #15
0
 def useFile(self, field):
     meta = Metadata(self)
     meta.filename = field["filename"].value
     meta.file_size = field["filesize"].value
     meta.creation_date = field["timestamp"].value
     attr = field["attributes"].value
     if attr != "(none)":
         meta.file_attr = attr
     if meta.has("filename"):
         title = "File \"%s\"" % meta.getText('filename')
     else:
         title = "File"
     self.addGroup(field.name, meta, title)
Example #16
0
 def useStreamProp(self, stream, index):
     meta = Metadata(self)
     meta.comment = "Start: %s" % stream["stream_start"].value
     if getValue(stream, "mime_type") == "logical-fileinfo":
         for prop in stream.array("file_info/prop"):
             self.useFileInfoProp(prop)
     else:
         meta.bit_rate = stream["avg_bit_rate"].value
         meta.duration = timedelta(milliseconds=stream["duration"].value)
         meta.mime_type = getValue(stream, "mime_type")
     meta.title = getValue(stream, "desc")
     self.addGroup("stream[%u]" % index, meta, "Stream #%u" % (1 + index))
Example #17
0
    def extract(self, ogg):
        granule_quotient = None
        for index, page in enumerate(ogg.array("page")):
            if "segments" not in page:
                continue
            page = page["segments"]
            if "vorbis_hdr" in page:
                meta = Metadata(self)
                self.vorbisHeader(page["vorbis_hdr"], meta)
                self.addGroup("audio[]", meta, "Audio")
                if not granule_quotient and meta.has("sample_rate"):
                    granule_quotient = meta.get('sample_rate')
            if "theora_hdr" in page:
                meta = Metadata(self)
                self.theoraHeader(page["theora_hdr"], meta)
                self.addGroup("video[]", meta, "Video")
            if "video_hdr" in page:
                meta = Metadata(self)
                self.videoHeader(page["video_hdr"], meta)
                self.addGroup("video[]", meta, "Video")
                if not granule_quotient and meta.has("frame_rate"):
                    granule_quotient = meta.get('frame_rate')
            if "comment" in page:
                readVorbisComment(self, page["comment"])
            if 3 <= index:
                # Only process pages 0..3
                break

        # Compute duration
        if granule_quotient and QUALITY_NORMAL <= self.quality:
            page = ogg.createLastPage()
            if page and "abs_granule_pos" in page:
                try:
                    self.duration = timedelta(
                        seconds=float(page["abs_granule_pos"].value) /
                        granule_quotient)
                except OverflowError:
                    pass
Example #18
0
 def useFile(self, field):
     meta = Metadata(self)
     meta.filename = field["filename"].value
     meta.file_size = field["filesize"].value
     meta.creation_date = field["timestamp"].value
     attr = field["attributes"].value
     if attr != "(none)":
         meta.file_attr = attr
     if meta.has("filename"):
         title = _("File \"%s\"") % meta.getText('filename')
     else:
         title = _("File")
     self.addGroup(field.name, meta, title)
Example #19
0
 def useStreamProp(self, stream, index):
     meta = Metadata(self)
     meta.comment = "Start: %s" % stream["stream_start"].value
     if getValue(stream, "mime_type") == "logical-fileinfo":
         for prop in stream.array("file_info/prop"):
             self.useFileInfoProp(prop)
     else:
         meta.bit_rate = stream["avg_bit_rate"].value
         meta.duration = timedelta(milliseconds=stream["duration"].value)
         meta.mime_type = getValue(stream, "mime_type")
     meta.title = getValue(stream, "desc")
     self.addGroup("stream[%u]" % index, meta, "Stream #%u" % (1 + index))
Example #20
0
 def processAudio(self, track):
     audio = Metadata(self)
     self.trackCommon(track, audio)
     if "Audio" in track:
         frequency = self.getDouble(track, "Audio/SamplingFrequency")
         if frequency is not None:
             audio.sample_rate = frequency
         if "Audio/Channels/unsigned" in track:
             audio.nb_channel = track["Audio/Channels/unsigned"].value
         if "Audio/BitDepth/unsigned" in track:
             audio.bits_per_sample = track["Audio/BitDepth/unsigned"].value
     if "CodecID/string" in track:
         audio.compression = track["CodecID/string"].value
     self.addGroup("audio[]", audio, "Audio stream")
Example #21
0
 def extract(self, mar):
     self.comment = "Contains %s files" % mar["nb_file"].value
     self.format_version = ("Microsoft Archive version %s" %
                            mar["version"].value)
     max_nb = maxNbFile(self)
     for index, field in enumerate(mar.array("file")):
         if max_nb is not None and max_nb <= index:
             self.warning("MAR archive contains many files, "
                          "but only first %s files are processed" % max_nb)
             break
         meta = Metadata(self)
         meta.filename = field["filename"].value
         meta.compression = "None"
         meta.file_size = field["filesize"].value
         self.addGroup(field.name, meta,
                       "File \"%s\"" % meta.getText('filename'))
Example #22
0
    def extract(self, ogg):
        granule_quotient = None
        for index, page in enumerate(ogg.array("page")):
            if "segments" not in page:
                continue
            page = page["segments"]
            if "vorbis_hdr" in page:
                meta = Metadata(self)
                self.vorbisHeader(page["vorbis_hdr"], meta)
                self.addGroup("audio[]", meta, "Audio")
                if not granule_quotient and meta.has("sample_rate"):
                    granule_quotient = meta.get('sample_rate')
            if "theora_hdr" in page:
                meta = Metadata(self)
                self.theoraHeader(page["theora_hdr"], meta)
                self.addGroup("video[]", meta, "Video")
            if "video_hdr" in page:
                meta = Metadata(self)
                self.videoHeader(page["video_hdr"], meta)
                self.addGroup("video[]", meta, "Video")
                if not granule_quotient and meta.has("frame_rate"):
                    granule_quotient = meta.get('frame_rate')
            if "comment" in page:
                readVorbisComment(self, page["comment"])
            if 3 <= index:
                # Only process pages 0..3
                break

        # Compute duration
        if granule_quotient and QUALITY_NORMAL <= self.quality:
            page = ogg.createLastPage()
            if page and "abs_granule_pos" in page:
                try:
                    self.duration = timedelta(seconds=float(page["abs_granule_pos"].value) / granule_quotient)
                except OverflowError:
                    pass
Example #23
0
    def extract(self, rar):
        l_max_nb = maxNbFile(self)

        l_rarformat = rar["signature"].value
        if l_rarformat == b"RE~^":
            l_format_version = "1.4"
        elif l_rarformat[0:6] == b"Rar!\x1A\x07":
            if l_rarformat[6:7] == b"\x00":
                l_format_version = "1.5"  # RAR 4
            elif l_rarformat[6:7] == b"\x01":
                l_format_version = "5.0"
            elif l_rarformat[6:7] == b"\x02":
                l_format_version = "> 5.0"

        self.format_version = "RAR version %s" % l_format_version

        if l_format_version != "1.5":
            self.warning("RAR TODO: unknown format_version \"%s\" " %
                         l_format_version)

        l_has_recovery_record = False
        l_has_auth_verification = False
        l_has_password = False
        l_is_multivolume = False
        l_is_solid = False

        if rar["/archive_start/flags/has_comment"].value:
            self.warning("RAR TODO: comment extraction not implemented")
            self.comment = "HACHOIR: comment extraction not implemented"

        l_has_recovery_record = rar[
            "/archive_start/flags/has_recovery_record"].value
        l_has_auth_verification = rar[
            "/archive_start/flags/has_auth_information"].value
        l_has_password = rar["/archive_start/flags/is_locked"].value
        l_is_multivolume = rar["/archive_start/flags/vol"].value
        l_is_solid = rar["/archive_start/flags/is_solid"].value
        is_first_vol = rar["/archive_start/flags/is_first_vol"].value

        for l_index, l_field in enumerate(rar.array("new_sub_block")):
            if l_field["filename"].value == "CMT":
                self.warning("RAR TODO: comment unpacking not implemented")
                self.comment = "HACHOIR: comment unpacking not implemented"
            elif l_field["filename"].value == "AV":
                l_has_auth_verification = True
            elif l_field["filename"].value == "RR":
                l_has_recovery_record = True
            else:
                self.warning("RAR TODO: unknown sub_block \"%s\" " %
                             l_field["filename"].value)

        self.has_recovery_record = l_has_recovery_record
        self.has_auth_verification = l_has_auth_verification
        self.has_password = l_has_password
        self.is_multivolume = l_is_multivolume
        self.is_solid = l_is_solid
        self.is_first_vol = is_first_vol

        for l_index, l_field in enumerate(rar.array("file")):
            if l_max_nb is not None and l_max_nb <= l_index:
                self.warning(
                    "RAR archive contains many files, but only first %s files are processed"
                    % l_max_nb)
                break
            l_meta = Metadata(self)
            l_meta.filename = l_field["filename"].value
            l_meta.last_modification = l_field["ftime"].value
            l_meta.os = l_field["host_os"].display
            l_meta.application_version = l_field["version"].display
            l_meta.compression = l_field["method"].display
            l_meta.file_size = l_field["uncompressed_size"].value
            l_meta.compr_size = l_field["compressed_size"].value
            self.addGroup(l_field.name, l_meta,
                          "File \"%s\"" % l_meta.get('filename'))
Example #24
0
    def extract(self, icon):
        for index, header in enumerate(icon.array("icon_header")):
            image = Metadata(self)

            # Read size and colors from header
            image.width = header["width"].value
            image.height = header["height"].value
            bpp = header["bpp"].value
            nb_colors = header["nb_color"].value
            if nb_colors != 0:
                image.nb_colors = nb_colors
                if bpp == 0 and nb_colors in self.color_to_bpp:
                    bpp = self.color_to_bpp[nb_colors]
            elif bpp == 0:
                bpp = 8
            image.bits_per_pixel = bpp
            image.setHeader(
                "Icon #%u (%sx%s)" %
                (1 + index, image.get("width", "?"), image.get("height", "?")))

            # Read compression from data (if available)
            key = "icon_data[%u]/header/codec" % index
            if key in icon:
                image.compression = icon[key].display
            key = "icon_data[%u]/pixels" % index
            if key in icon:
                computeComprRate(image, icon[key].size)

            # Store new image
            self.addGroup("image[%u]" % index, image)
Example #25
0
    def processHeader(self, header):
        compression = []
        is_vbr = None

        if "ext_desc/content" in header:
            # Extract all data from ext_desc
            data = {}
            for desc in header.array("ext_desc/content/descriptor"):
                self.useExtDescItem(desc, data)

            # Have ToolName and ToolVersion? If yes, group them to producer key
            if "ToolName" in data and "ToolVersion" in data:
                self.producer = "%s (version %s)" % (data["ToolName"],
                                                     data["ToolVersion"])
                del data["ToolName"]
                del data["ToolVersion"]

            # "IsVBR" key
            if "IsVBR" in data:
                is_vbr = (data["IsVBR"] == 1)
                del data["IsVBR"]

            # Store data
            for key, value in data.items():
                if key in self.EXT_DESC_TO_ATTR:
                    key = self.EXT_DESC_TO_ATTR[key]
                else:
                    if isinstance(key, str):
                        key = makePrintable(key, "ISO-8859-1")
                    value = "%s=%s" % (key, value)
                    key = "comment"
                setattr(self, key, value)

        if "file_prop/content" in header:
            self.useFileProp(header["file_prop/content"], is_vbr)

        if "codec_list/content" in header:
            for codec in header.array("codec_list/content/codec"):
                if "name" in codec:
                    text = codec["name"].value
                    if "desc" in codec and codec["desc"].value:
                        text = "%s (%s)" % (text, codec["desc"].value)
                    compression.append(text)

        audio_index = 1
        video_index = 1
        for index, stream_prop in enumerate(header.array("stream_prop")):
            if "content/audio_header" in stream_prop:
                meta = Metadata(self)
                self.streamProperty(header, index, meta)
                self.streamAudioHeader(stream_prop["content/audio_header"],
                                       meta)
                if self.addGroup("audio[%u]" % audio_index, meta,
                                 "Audio stream #%u" % audio_index):
                    audio_index += 1
            elif "content/video_header" in stream_prop:
                meta = Metadata(self)
                self.streamProperty(header, index, meta)
                self.streamVideoHeader(stream_prop["content/video_header"],
                                       meta)
                if self.addGroup("video[%u]" % video_index, meta,
                                 "Video stream #%u" % video_index):
                    video_index += 1

        if "metadata/content" in header:
            info = header["metadata/content"]
            try:
                self.title = info["title"].value
                self.author = info["author"].value
                self.copyright = info["copyright"].value
            except MissingField:
                pass