def buildMetadata(self): AnalysisFactory.buildMetadata(self) # print "Doing TARFile analysis..." tf = None try: tf = tarfile.open(self.filename, "r") if tf.posix: self.metadata.append(("tarformat", "USTAR_FORMAT")) else: self.metadata.append(("tarformat", "GNU_FORMAT")) handle = None for tarEntry in tf.getmembers(): if not tarEntry.isfile(): continue handle = tf.extractfile(tarEntry) contained = self.processNestedTarEntry(handle, tarEntry, self.fileModel) files.filetype.qClose(handle) self.fileModel.addRelationship("contains", contained) return True except Exception, err: template = "{0} Arguments:\n{1!r}" message = template.format(type(err).__name__, err.args) print("Failed to process tarfile %s : %s" % (self.filename, message)) traceback.print_exc(file=sys.stdout) return False
def buildMetadata(self): AnalysisFactory.buildMetadata(self) from PIL import Image from PIL.ExifTags import TAGS i = None success = True try: i = Image.open(self.filename) self.metadata.append(("mode", str(i.mode))) self.metadata.append(("format", str(i.format))) self.metadata.append(("width", str(i.size[0]))) self.metadata.append(("height", str(i.size[1]))) if i.palette is not None: self.metadata.append(("palette", str(i.palette))) info = i._getexif() if info is None: self.metadata.append(("EXIF", "None present")) else: for tag, value in info.items(): decoded = TAGS.get(tag, tag) if decoded is not None and value is not None: self.metadata.append((str(decoded), str(value))) except Exception, err: template = "{0} Arguments:\n{1!r}" message = template.format(type(err).__name__, err.args) print("Failed to extract image metadata from %s: %s" % (self.filename, message)) success = False
def buildMetadata(self): AnalysisFactory.buildMetadata(self) if files.filetype.ID3TOOL is not None: print("Doing ID3 analysis...") else: print("Skipping ID3 analysis, ID3TOOL not supported.") more = self.runGenericExtractProgram([files.filetype.ID3TOOL, self.filename], ": ") for item in more: self.metadata.append(item) return True
def buildMetadata(self): AnalysisFactory.buildMetadata(self) if PDFINFO is not None: print("Doing PDF analysis...") else: print("PDF analysis unsupported.") more = self.runGenericExtractProgram([PDFINFO, self.filename], ": ") for item in more: self.metadata.append(item) return True
def buildMetadata(self): AnalysisFactory.buildMetadata(self) if not is_zipfile(self.filename): self.metadata.append(("Data format warning", "File cannot be processed as a ZIP file")) return True # print "Doing ZIPFile analysis..." zf = None try: zf = ZipFile(self.filename, "r") for zipInfo in zf.infolist(): handle = None try: if zipInfo.filename.endswith('/'): # Don't process directories continue handle = zf.open(zipInfo, "r") contained = self.processNestedZipEntry(handle, zipInfo, self.fileModel) # Add some zip-specific metadata. self.fileModel.addRelationship("contains", contained) except Exception, err: template = "{0} Arguments:\n{1!r}" message = template.format(type(err).__name__, err.args) print("Failed to process zipfile item %s (%s): %s" % (zipInfo.orig_filename, self.filename, message)) enc = "encrypted" in message if enc: self.metadata.append(("ZIPEncrypted", "True")) return True traceback.print_exc(file=sys.stdout) exc_type, exc_value, exc_traceback = sys.exc_info() print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) finally: files.filetype.qClose(handle)
def buildMetadata(self): AnalysisFactory.buildMetadata(self) #print("Doing XMLFile analysis...") try: if self.filename is not None: doc = minidom.parse(self.filename) else: self.fh.seek(0) doc = minidom.parse(self.fh) #print("Got doc %s" % str(doc)) except DTDForbidden: print("XML file defines DTD") self.metadata.append(("defines-dtd", "true")) return True except EntitiesForbidden: print("XML file defines entities") self.metadata.append(("defines-entities", "true")) return True except Exception, err: template = "Failed to seek/open/parse XML: {0} Arguments:\n{1!r}" message = template.format(type(err).__name__, err.args) print(message) return False