def getOrCreateFileNameModel(self): if self.fileModel is None: raise "File model hasn't been created" objects = FileName.objects.all().filter( location=self.location).filter( basefile=self.fileModel) if len(objects) > 0: self.fileNameModel = objects[0] else: self.fileNameModel = FileName(basefile=self.fileModel, location=self.location) self.fileNameModel.save() return self.fileNameModel
class AnalysisFactory: """Create an object to analyze a file or input stream. @param filename: if provided, the filename to open and analyze. @param stream: if provided, the filename will be ignored and the provided file handle will be analyzed instead. If you provide this stream, it will be read and closed. @param location: if provided, the location will be used as the name/location of the file. @param redoAnalysis: if true, analysis will take place even if the file has already been analyzed. If false, if, a previous analysis has been performed that will be used.""" def __init__(self, filename=None, stream=None, location=None, redoAnalysis=False): self.redoAnalysis = redoAnalysis self.firstAnalysis = True self.filename = filename self.fh = stream self.filemagic = None self.deleteFileOnCleanup = False """An array of two-item tuples; key/value metadata pairs.""" self.metadata = [] if location is None: self.location = filename else: self.location = location if location is None and filename is None: raise Exception, "If processing a stream, you must provide a location" if filename is not None and stream is not None: raise Exception, "You may only specify one of filename, stream" if filename is not None: self.fh = open(self.filename, "r") def analyze(self): self.getOrCreateFileModel() self.getOrCreateFileNameModel() self.buildMetadata() self.writeMetadata() self.cleanup() return True def getOrCreateFileNameModel(self): if self.fileModel is None: raise "File model hasn't been created" objects = FileName.objects.all().filter( location=self.location).filter( basefile=self.fileModel) if len(objects) > 0: self.fileNameModel = objects[0] else: self.fileNameModel = FileName(basefile=self.fileModel, location=self.location) self.fileNameModel.save() return self.fileNameModel def getOrCreateFileModel(self): md5 = hashlib.md5() sha1 = hashlib.sha1() crc32 = None import zlib import binascii import struct firstBlock = True # In some cases we'll be given a stream. # Some analysis tools will only work well on files, # which support seek(). So if we're given a stream, # while we're hashing it we'll be writing it to a temp # file so that we can use other tools on it subsequently. if self.filename is None: (blah, self.filename) = tempfile.mkstemp()#[1] os.close(blah) self.writehandle = open(self.filename, "wb") self.deleteFileOnCleanup = True else: self.writehandle = None x = 1 totalRead = 0 while x > 0: buf = self.fh.read(1024 * 64) if self.writehandle is not None: self.writehandle.write(buf) # Check the file magic for the first block. if firstBlock: self.runExtractorOnBuffer(buf, buf.__len__()) try: self.filemagic = magic.from_buffer(buf) # print("FILEMAGIC: %s" % self.filemagic) self.metadata.append(("filemagic", self.filemagic)) except Exception, err: template = "{0} Arguments:\n{1!r}" message = template.format(type(err).__name__, err.args) print("Failed to extract file magic: %s" % message) firstBlock = False md5.update(buf) sha1.update(buf) if crc32 is None: crc32 = zlib.crc32(buf) else: crc32 = zlib.crc32(buf, crc32) x = buf.__len__() totalRead = totalRead + x filetype.qClose(self.fh) if self.writehandle is not None: self.writehandle.close() md5sum = md5.hexdigest().lower() sha1sum = sha1.hexdigest().lower() crcbin = struct.pack('!l', crc32) crc32sum = binascii.hexlify(crcbin).lower() if self.filename is not None: fullPath = os.path.abspath(self.filename) statinfo = os.stat(fullPath) size = statinfo.st_size else: size = totalRead try: self.fileModel = File.objects.get(md5=md5sum) self.firstAnalysis = False except File.DoesNotExist: self.fileModel = File(md5=md5sum, sha1=sha1sum, crc32=crc32sum, size=size) self.fileModel.save() self.firstAnalysis = True return self.fileModel