continue value = value.value if not value: continue name = properties.getFieldByAddress(offset0 + index["name_offset"].value * 8) if not name: continue name = name.value if name not in self.PROP_TO_KEY: warning("Skip %s=%r" % (name, value)) continue key = self.PROP_TO_KEY[name] setattr(self, key, value) class SwfMetadata(RootMetadata): def extract(self, swf): self.height = swf["rect/ymax"].value # twips self.width = swf["rect/xmax"].value # twips self.format_version = "flash version %s" % swf["version"].value self.frame_rate = swf["frame_rate"].value self.comment = "Frame count: %s" % swf["frame_count"].value registerExtractor(TorrentFile, TorrentMetadata) registerExtractor(TrueTypeFontFile, TTF_Metadata) registerExtractor(OLE2_File, OLE2_Metadata) registerExtractor(PcfFile, PcfMetadata) registerExtractor(SwfFile, SwfMetadata)
def usePE_OptHeader(self, hdr): self.comment = "Subsystem: %s" % hdr["subsystem"].display def readVersionInfo(self, info): values = {} for node in info.array("node"): if "value" not in node or "name" not in node: continue value = node["value"].value.strip(" \0") if not value: continue key = node["name"].value values[key] = value if "ProductName" in values and "FileDescription" in values: # Make sure that FileDescription is set before ProductName # as title value self.title = values["FileDescription"] self.title = values["ProductName"] del values["FileDescription"] del values["ProductName"] for key, value in values.items(): if key in self.KEY_TO_ATTR: setattr(self, self.KEY_TO_ATTR[key], value) elif key not in self.SKIP_KEY: self.comment = "%s=%s" % (key, value) registerExtractor(ExeFile, ExeMetadata)
# Compute global bit rate if self.has("duration") and "/movie/size" in headers: self.bit_rate = float( headers["/movie/size"].value) * 8 / timedelta2seconds( self.get('duration')) # Video has index? scan_index = (True, kwargs['scan_index'])['scan_index' in kwargs] if scan_index and "/index" in headers: self.comment = "Has audio/video index (%s)" \ % humanFilesize(headers["/index"].size // 8) @fault_tolerant def extractAnim(self, riff): if "anim_rate/rate[0]" in riff: count = 0 total = 0 for rate in riff.array("anim_rate/rate"): count += 1 if 100 < count: break total += rate.value / 60.0 if count and total: self.frame_rate = count / total if not self.has("frame_rate") and "anim_hdr/jiffie_rate" in riff: self.frame_rate = 60.0 / riff["anim_hdr/jiffie_rate"].value registerExtractor(RiffFile, RiffMetadata)
text = "%s (CBR)" % text else: text = "%s (max)" % text self.bit_rate = (value, text) def streamProperty(self, header, index, meta): key = "bit_rates/content/bit_rate[%u]/avg_bitrate" % index if key in header: meta.bit_rate = header[key].value # TODO: Use codec list # It doesn't work when the video uses /header/content/bitrate_mutex # since the codec list are shared between streams but... how is it # shared? # key = "codec_list/content/codec[%u]" % index # if key in header: # codec = header[key] # if "name" in codec: # text = codec["name"].value # if "desc" in codec and codec["desc"].value: # meta.compression = "%s (%s)" % (text, codec["desc"].value) # else: # meta.compression = text registerExtractor(MP4File, MP4Metadata) registerExtractor(AsfFile, AsfMetadata) registerExtractor(FlvFile, FlvMetadata) registerExtractor(MkvFile, MkvMetadata)
value = value.replace("\n", " ") # Skip unknown tag tag = field["tag"].value if tag == 55: datestr = value continue if tag == 60: hourstr = value continue if tag not in self.IPTC_KEY: if tag != 0: self.warning("Skip IPTC key %s: %s" % (field["tag"].display, makeUnicode(value))) continue setattr(self, self.IPTC_KEY[tag], value) if datestr and hourstr: try: year = int(datestr[0:4]) month = int(datestr[4:6]) day = int(datestr[6:8]) hour = int(hourstr[0:2]) min = int(hourstr[2:4]) sec = int(hourstr[4:6]) self.creation_date = datetime(year, month, day, hour, min, sec) except ValueError: pass registerExtractor(JpegFile, JpegMetadata)
self.addGroup(field.name, meta, title) class MarMetadata(MultipleMetadata): def extract(self, mar): self.comment = "Contains %s files" % mar["nb_file"].value self.format_version = ("Microsoft Archive version %s" % mar["version"].value) max_nb = maxNbFile(self) for index, field in enumerate(mar.array("file")): if max_nb is not None and max_nb <= index: self.warning("MAR archive contains many files, " "but only first %s files are processed" % max_nb) break meta = Metadata(self) meta.filename = field["filename"].value meta.compression = "None" meta.file_size = field["filesize"].value self.addGroup(field.name, meta, "File \"%s\"" % meta.getText('filename')) registerExtractor(CabFile, CabMetadata) registerExtractor(GzipParser, GzipMetadata) registerExtractor(Bzip2Parser, Bzip2Metadata) registerExtractor(TarFile, TarMetadata) registerExtractor(ZipFile, ZipMetadata) registerExtractor(MarFile, MarMetadata)
self.duration = timedelta(seconds=sec) if "codec" in info: self.compression = info["codec"].display class FlacMetadata(RootMetadata): def extract(self, flac): if "metadata/stream_info/content" in flac: self.useStreamInfo(flac["metadata/stream_info/content"]) if "metadata/comment/content" in flac: readVorbisComment(self, flac["metadata/comment/content"]) @fault_tolerant def useStreamInfo(self, info): self.nb_channel = info["nb_channel"].value + 1 self.bits_per_sample = info["bits_per_sample"].value + 1 self.sample_rate = info["sample_hertz"].value sec = info["total_samples"].value if sec: sec = float(sec) / info["sample_hertz"].value self.duration = timedelta(seconds=sec) registerExtractor(AuFile, AuMetadata) registerExtractor(MpegAudioFile, MpegAudioMetadata) registerExtractor(OggFile, OggMetadata) registerExtractor(RealMediaFile, RealMediaMetadata) registerExtractor(RealAudioFile, RealAudioMetadata) registerExtractor(AiffFile, AiffMetadata) registerExtractor(FlacParser, FlacMetadata)
self.producer = desc if emf["nb_colors"].value: self.nb_colors = emf["nb_colors"].value self.bits_per_pixel = 8 else: self.bits_per_pixel = 24 self.width = emf["width_px"].value self.height = emf["height_px"].value class PsdMetadata(RootMetadata): @fault_tolerant def extract(self, psd): self.width = psd["width"].value self.height = psd["height"].value self.bits_per_pixel = psd["depth"].value * psd["nb_channels"].value self.pixel_format = psd["color_mode"].display self.compression = psd["compression"].display registerExtractor(IcoFile, IcoMetadata) registerExtractor(GifFile, GifMetadata) registerExtractor(XcfFile, XcfMetadata) registerExtractor(TargaFile, TargaMetadata) registerExtractor(PcxFile, PcxMetadata) registerExtractor(BmpFile, BmpMetadata) registerExtractor(PngFile, PngMetadata) registerExtractor(TiffFile, TiffMetadata) registerExtractor(WMF_File, WmfMetadata) registerExtractor(PsdFile, PsdMetadata)
self.date_time_original = tiff["exif[0]"]["value[7]"].value self.date_time_digitized = tiff["exif[0]"]["value[8]"].value def useIFD(self, ifd): attr = {} for entry in ifd.array("entry"): self.processIfdEntry(ifd, entry, attr) if 'BitsPerSample' in attr and 'SamplesPerPixel' in attr: self.bits_per_pixel = attr['BitsPerSample'] * attr[ 'SamplesPerPixel'] @fault_tolerant def processIfdEntry(self, ifd, entry, attr): tag = entry["tag"].display if tag in {"BitsPerSample", "SamplesPerPixel"}: value = ifd.getEntryValues(entry)[0].value attr[tag] = value return try: attrname = self.key_to_attr[tag] except KeyError: return value = ifd.getEntryValues(entry)[0].value if tag in {"XResolution", "YResolution"}: value = round(value) setattr(self, attrname, value) registerExtractor(CR2File, CR2Metadata)
from hachoir_py3.metadata.metadata import RootMetadata, registerExtractor from hachoir_py3.metadata.safe import fault_tolerant from hachoir_py3.parser.file_system import ISO9660 from datetime import datetime class ISO9660_Metadata(RootMetadata): def extract(self, iso): desc = iso['volume[0]/content'] self.title = desc['volume_id'].value self.title = desc['vol_set_id'].value self.author = desc['publisher'].value self.author = desc['data_preparer'].value self.producer = desc['application'].value self.copyright = desc['copyright'].value self.readTimestamp('creation_date', desc['creation_ts'].value) self.readTimestamp('last_modification', desc['modification_ts'].value) @fault_tolerant def readTimestamp(self, key, value): if value.startswith("0000"): return value = datetime(int(value[0:4]), int(value[4:6]), int(value[6:8]), int(value[8:10]), int(value[10:12]), int(value[12:14])) setattr(self, key, value) registerExtractor(ISO9660, ISO9660_Metadata)