def extract(infile, dirname=None): if dirname is None: dirname = os.getcwd() try: if OleFileIO_PL.isOleFile(infile) is not True: print('Error - %s is not a valid OLE file.' % infile, file=sys.stderr) sys.exit(1) ole = OleFileIO_PL.OleFileIO(infile) filelist = ole.listdir() for fname in filelist: if not ole.get_size(fname[0]): print( 'Warning: The "%s" stream reports a size of 0. Possibly a corrupt bup.' % fname[0]) data = ole.openstream(fname[0]).read() fp = open(os.path.join(dirname, fname[0]), 'wb') fp.write(data) fp.close() ole.close() return filelist except Exception as e: print('Error - %s' % e, file=sys.stderr) sys.exit(1)
def printDump(bupname, DumpFunction=IdentityFunction, allfiles=False): # #Print Hex dump/Hex-ASCII dump of first or all streams # if sys.platform == 'win32' and DumpFunction == IdentityFunction: import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) try: if OleFileIO_PL.isOleFile(bupname) is not True: print('Error - %s is not a valid OLE file.' % bupname, file=sys.stderr) sys.exit(1) ole = OleFileIO_PL.OleFileIO(bupname) printNewline = False for entry in ole.listdir(): if entry[0] != "Details": if printNewline: print() printNewline = True StdoutWriteChunked( DumpFunction(decryptStream( ole.openstream(entry[0]).read()))) if not allfiles: break ole.close() except Exception as e: print('Error - %s' % e, file=sys.stderr) sys.exit(1)
def readRSF(filePath, fileName): '''Parses the information stored in the RecoveryStore file. Accepts single argument: the file name of the RecoveryStore file.''' fname = os.path.join(filePath, fileName) print("\n" + fileName + ":") try: #Check if file is the correct format if not (OleFileIO_PL.isOleFile(fname)): print(" Error: Unable to parse file '%s'. Incorrect format!" % fname) return path = os.path.dirname(fname) rs = OleFileIO_PL.OleFileIO(fname) #Get list of streams streams = rs.listdir() sStreams = [] for s in (streams): sStreams.append(s[0]) p = rs.getproperties('\x05KjjaqfajN2c0uzgv1l4qy5nfWe') #Get session times closed = (buildTime(p[3])) opened = (buildTime(p[7])) print(" Opened: " + opened + " UTC") if (opened != closed): print(" Closed: " + closed + " UTC") else: print(" Closed: N/A") #Check for InPrivate Browsing if (int("5") in p): print(" InPrivate Browsing: YES") else: print(" InPrivate Browsing: No") #Get all open tabs (TS#) print("\n Open Tabs:") for s in streams: if ((s[0][:2] == "TS")): tempStream = rs.openstream(s[0]) data = tempStream.read() tsStr = "".join("{:02x}".format(c) for c in data) b = 0 while b < len(tsStr): tdName = "{" + buildGUID(tsStr[b:b + 32]) + "}.dat" readTDF(filePath, tdName, " ") b += 32 #Get all closed tabs print("\n Closed Tabs:") for s in streams: if (s[0] == "ClosedTabList"): tempStream = rs.openstream(s[0]) data = tempStream.read() tempStream = rs.openstream(s[0]) data = tempStream.read() tsStr = "".join("{:02x}".format(c) for c in data) b = 0 while b < len(tsStr): tdName = "{" + buildGUID(tsStr[b:b + 32]) + "}.dat" readTDF(filePath, tdName, " ") b += 32 except: print("\nError reading '" + fname + "': ", sys.exc_info()[1])
def bupextract(): # Check for valid OLE if not OleFileIO_PL.isOleFile(__sessions__.current.file.path): print_error("Not a valid BUP File") return ole = OleFileIO_PL.OleFileIO(__sessions__.current.file.path) # We know that BUPS are xor'd with 6A which is dec 106 for the decoder print_info("Switching Session to Embedded File") data = xordata(ole.openstream('File_0').read(), 106) # this is a lot of work jsut to get a filename. data2 = xordata(ole.openstream('Details').read(), 106) ole.close() lines = data2.split('\n') for line in lines: if line.startswith('OriginalName'): fullpath = line.split('=')[1] pathsplit = fullpath.split('\\') filename = str(pathsplit[-1][:-1]) # now lets write the data out to a file and get a session on it if data: tempName = os.path.join('/tmp', filename) with open(tempName, 'w') as temp: temp.write(data) __sessions__.new(tempName) return else: print_error("Unble to Switch Session")
def zvi_read(fname, plane, ole=None): """ returns ZviItemTuple of the plane from zvi file fname """ if ole is None: ole = OleFileIO_PL.OleFileIO(fname) s = ['Image', 'Item(%d)' % plane, 'Contents'] stream = ole.openstream(s) return read_item_storage_content(stream)
def extract(filename): out = os.popen( "PYTHONPATH= python /Users/adam/code/unoconv/unoconv -T 3 -o tmp.txt -f txt %s" % filename).read() txt = file("tmp.txt").read().split('\n') end = 0 start = len(txt) date = None for i, line in enumerate(txt): if 'בברכה' in line: end = i if 'בכבוד רב' in line: end = i if 'הנדון' in line: start = i dates = date_re.findall(line) if len(dates) > 0: date = dates[0] if date is None: ole = OleFileIO_PL.OleFileIO(filename) meta = ole.get_metadata() date = meta.last_saved_time else: date = parser.parse(date) explanation = None if start < end: explanation = "".join(txt[start + 1:end]).decode('utf8') return date, explanation
def getHashes(bupname, htype): # #Return a dictionary of stream name and hash. # try: if OleFileIO_PL.isOleFile(bupname) is not True: print >> sys.stderr, 'Error - %s is not a valid OLE file.' % bupname sys.exit(1) ole = OleFileIO_PL.OleFileIO(bupname) hashes = {} for entry in ole.listdir(): if entry[0] != "Details": fdata = ole.openstream(entry[0]).read() ptext = decryptStream(fdata) if htype == 'md5': m = hashlib.md5() elif htype == 'sha1': m = hashlib.sha1() elif htype == 'sha256': m = hashlib.sha256() m.update(ptext) hashes[entry[0]] = m.hexdigest() ole.close() return hashes except Exception as e: print >> sys.stderr, 'Error - %s' % e sys.exit(1)
def makeMutationList_(self): fuzz_offset = [] fuzzing_list = [] mutate_position = [] ole = OLE.OleFileIO(self.targetFile) ole_list = ole.listdir() for entry in ole_list: if "BodyText" in entry: sections = entry[1:] for sec in sections: stream = entry[0] + "/" + sec size = ole.get_size(stream) fuzzing_list.append( (ole.openstream(stream).read(16), size)) if "BinData" in entry: sections = entry[1:] for sec in sections: stream = entry[0] + "/" + sec size = ole.get_size(stream) fuzzing_list.append( (ole.openstream(stream).read(16), size)) ole.close() for magic, size in fuzzing_list: if self.data.find(magic) != -1: offset = self.data.find(magic) mutate_position.append((offset, size)) for offset, size in mutate_position: fuzz_offset += sample(xrange(offset, offset + size), int(size * uniform(0.001, 0.003))) # 0.1% ~ 0.3% for index in fuzz_offset: self.mutationList.append([index]) self.mutationList[-1].append(chr(randrange(256)))
def get_layer_count(file_name, ole=None): """ returns the number of image planes""" if ole is None: ole = OleFileIO_PL.OleFileIO(file_name) s = ['Image', 'Contents'] stream = ole.openstream(s) zvi_image = read_image_container_content(stream) return zvi_image.Count
def mutation(dest_file): """ :param dest_file: 뮤테이션 할 파일 경로 전달 """ find_list = [] mutate_position = [] # HWP파일의 OLE구조에서 Bindata, BodyText, BinOLE 스토리지 하위 스트림 분석 # 해당 스트림의 상위 16바이트를 Magic으로 사용하고 사이즈를 구함 ole = OLE.OleFileIO(dest_file) ole_list = ole.listdir() for entry in ole_list: if "BinData" in entry and entry[1].find("OLE") != -1: find_list.append((ole.openstream("BinData/" + entry[1]).read(16), ole.get_size("BinData/" + entry[1]))) if "BodyText" in entry: find_list.append((ole.openstream("BodyText/" + entry[1]).read(16), ole.get_size("BodyText/" + entry[1]))) if "BinOLE" in entry: find_list.append((ole.openstream("BinOLE/" + entry[1]).read(16), ole.get_size("BinOLE/" + entry[1]))) if "Workbook" in entry: find_list.append((ole.openstream("Workbook").read(16), ole.get_size("Workbook"))) ole.close() fuzz_offset = [] fuzz_byte = xrange(256) with open(dest_file, 'rb') as f: hwp = f.read() hwp_write = bytearray(hwp) hwp_length = len(hwp) # 파일에서 Magic의 오프셋을 검색하여 리스트에 저장 for magic, size in find_list: if hwp.find(magic) != -1: offset = hwp.find(magic) mutate_position.append((offset, size)) # 해당 스트림 사이즈의 1 ~ 10% 변조 할 오프셋 선택 for offset, size in mutate_position: fuzz_offset += sample(xrange(offset, offset + size), int(size * uniform(0.01, 0.1))) # 변조 for index in fuzz_offset: if index >= hwp_length: continue hwp_write[index] = choice(fuzz_byte) # 파일로 저장 try: with open(dest_file, 'wb') as f: f.write(hwp_write) return True except IOError as error: print error return False
def test_get_rootentry_name(): # Arrange ole_file = "Tests/images/test-ole-file.doc" ole = OleFileIO.OleFileIO(ole_file) # Act root = ole.get_rootentry_name() # Assert assert_equal(root, "Root Entry")
def test_get_size(): # Arrange ole_file = "Tests/images/test-ole-file.doc" ole = OleFileIO.OleFileIO(ole_file) # Act size = ole.get_size('worddocument') # Assert assert_greater(size, 0)
def test_get_type(): # Arrange ole_file = "Tests/images/test-ole-file.doc" ole = OleFileIO.OleFileIO(ole_file) # Act type = ole.get_type('worddocument') # Assert assert_equal(type, OleFileIO.STGTY_STREAM)
def test_exists_no_vba_macros(): # Arrange ole_file = "Tests/images/test-ole-file.doc" ole = OleFileIO.OleFileIO(ole_file) # Act exists = ole.exists('macros/vba') # Assert assert_false(exists)
def test_exists_worddocument(): # Arrange ole_file = "Tests/images/test-ole-file.doc" ole = OleFileIO.OleFileIO(ole_file) # Act exists = ole.exists('worddocument') # Assert assert_true(exists)
def get_dir(file_name, ole=None): """ returns the content structure(streams) of the zvi file + length of each streams """ dirs = [] if ole is None: ole = OleFileIO_PL.OleFileIO(file_name) for s in ole.listdir(): stream = ole.openstream(s) dirs.append('%10d %s'%(len(stream.read()), s)) return dirs
def load_image(path_img): ole = OleFileIO_PL.OleFileIO(path_img) nb = get_layer_count('', ole=ole) # logging.debug('Count layers = %i', nb) image = [] for i in range(nb): zvi = zvi_read('', i, ole=ole) image.append(zvi.Image.Array) image = np.array(image) return image
def test_meta(): # Arrange ole_file = "Tests/images/test-ole-file.doc" ole = OleFileIO.OleFileIO(ole_file) # Act meta = ole.get_metadata() # Assert assert_equal(meta.author, b"Laurence Ipsum") assert_equal(meta.num_pages, 1)
def OLEDump(filename): if OleFileIO_PL.isOleFile(filename) is not True: print >> sys.stderr, 'Error - %s is not a valid OLE file.' % infile sys.exit(1) ole = OleFileIO_PL.OleFileIO(filename) for fname in ole.listdir(): stream = ole.openstream(fname).read() if '\x00Attribut' in stream: StdoutWriteChunked(SearchAndDecompress(stream)) ole.close() return
def run(self): super(Office, self).run() if self.args is None: return if not __sessions__.is_set(): self.log('error', "No session opened") return if not HAVE_OLE: self.log( 'error', "Missing dependency, install OleFileIO (`pip install OleFileIO_PL`)" ) return # Tests to check for valid Office structures. OLE_FILE = OleFileIO_PL.isOleFile(__sessions__.current.file.path) XML_FILE = zipfile.is_zipfile(__sessions__.current.file.path) if OLE_FILE: ole = OleFileIO_PL.OleFileIO(__sessions__.current.file.path) elif XML_FILE: zip_xml = zipfile.ZipFile(__sessions__.current.file.path, 'r') else: self.log('error', "Not a valid office document") return if self.args.export is not None: if OLE_FILE: self.export(ole, self.args.export) elif XML_FILE: self.xml_export(zip_xml, self.args.export) elif self.args.meta: if OLE_FILE: self.metadata(ole) elif XML_FILE: self.xmlmeta(zip_xml) elif self.args.streams: if OLE_FILE: self.metatimes(ole) elif XML_FILE: self.xmlstruct(zip_xml) elif self.args.oleid: if OLE_FILE: self.oleid(ole) else: self.log('error', "Not an OLE file") elif self.args.vba or self.args.code: self.parse_vba(self.args.code) else: self.log('error', 'At least one of the parameters is required') self.usage()
def is_ole(self, f): meta = OleFileIO_PL.OleFileIO(f).get_metadata() ret = {} for prop in (meta.SUMMARY_ATTRIBS + meta.DOCSUM_ATTRIBS): value = getattr(meta, prop) if value: if prop == 'creating_application': ret = self.addto(ret, 'Software', value) elif prop == 'security': ret = self.addto(ret, 'security', SEC_VALS[value]) elif prop in ('create_time', 'last_printed', 'last_saved_time'): prop = prop.replace('num_', '').replace('_', ' ').replace(' time', '').title() try: prop += '</b>: ' + value.strftime("%m-%d-%Y %H:%M:%S") except: prop += '</b>: Never' ret = self.addto(ret, 'dates', "<small><b>" + prop + "</small>") elif prop in ('author', 'last_saved_by'): ret = self.addto(ret, 'users', value) elif prop in ('codepage', 'codepage_doc'): try: x = self.CODEPAGE_VALS[value] except: x = 'Unknown: ' + str(value) ret = self.addto(ret, 'Encoding', x) elif prop in ('paragraphs', 'num_words', 'num_pages', 'num_chars', 'lines', 'chars_with_spaces', 'slides', 'notes'): ret = self.addto( ret, 'Statistics', "%s: %s " % (prop.replace('num_', '').replace('_', ' '), value)) elif prop not in ('content_status', 'thumbnail', 'version', 'bytes', 'total_edit_time'): # don't care ret = self.addto(ret, prop, str(value)) return ret
def getDetails(bupname): try: if OleFileIO_PL.isOleFile(bupname) is not True: print >> sys.stderr, 'Error - %s is not a valid OLE file.' % bupname sys.exit(1) ole = OleFileIO_PL.OleFileIO(bupname) #clean this up later by catching exception data = ole.openstream("Details").read() ptext = decryptStream(data) ole.close() return ptext except Exception as e: print >> sys.stderr, 'Error - %s' % e sys.exit(1)
def get_dataframe(self): self.download() df = None try: df = pd.read_excel(self.file_name, **self) except CompDocError: with open(self.file_name, 'rb') as file: ole = OleFileIO_PL.OleFileIO(file) if ole.exists('Workbook'): stream = ole.openstream('Workbook') df = pd.read_excel(stream, **self) if hasattr(self, 'columns'): df = df[self.columns] if hasattr(self, "remove_local_files"): self.remove_local_files() return df
def _process(self, filename): """ Busca imágenes dentro de stream y guarda referencia a su ubicación. """ olefile = OleFile.OleFileIO(filename) # Al igual que en pptx esto no es un error if not olefile.exists("Pictures"): return #raise IOError("Pictures stream not found") self.__stream = olefile.openstream("Pictures") stream = self.__stream offset = 0 # cantidad de imágenes encontradas n = 1 while True: header = stream.read(self.headerlen) offset += self.headerlen if not header: break # cabecera recInstance, recType, recLen = struct.unpack_from("<HHL", header) # mover a siguiente cabecera stream.seek(recLen, 1) if DEBUG: print "%X %X %sb" % (recType, recInstance, recLen) extrabytes, ext = formats.get((recType, recInstance)) # Eliminar bytes extra recLen -= extrabytes offset += extrabytes # Nombre de Imagen filename = "{0}{1}{2}".format(self.basename, n, ext) self._files[filename] = (offset, recLen) offset += recLen n += 1
def _read_ole_file(data): print "_read_ole_file() called" f = StringIO.StringIO(data) try: doc = ofio.OleFileIO(f) except IOError as msg: print "_read_ole_file():", msg return (JWS_ERROR_INVALID_FILE, msg) if doc.exists('DataInfo'): try: str = doc.openstream('DataInfo') header_data = str.read() except IOError as msg: print "_read_ole_file():", msg return (JWS_ERROR_INVALID_FILE, msg) else: print "_read_ole_file(): no DataInfo section" return (JWS_ERROR_INVALID_FILE, "Invalid JWS OLE file.") try: header_obj = _unpack_ole_jws_header(header_data) except IOError as msg: print "_read_ole_file():", msg return (JWS_ERROR_INVALID_FILE, msg) print "_read_ole_file(): header read successfully" if doc.exists('Y-Data'): try: str = doc.openstream('Y-Data') ydata = str.read() except IOError as msg: return (JWS_ERROR_INVALID_FILE, "Could not read Y-Data.") if len(ydata) != header_obj.point_number * 4: return (JWS_ERROR_INVALID_FILE, "Wrong Y-Data length.") fmt = 'f' * header_obj.point_number values = unpack(fmt, ydata) channels = [ values, ] return (JWS_ERROR_SUCCESS, header_obj, channels) else: return (JWS_ERROR_INVALID_FILE, "The file does not contain Y-Data.")
def __rename_doc_file(self): try: ole = OleFileIO_PL.OleFileIO(self.__filename) except IOError: print " Error: %s is not a doc file or cprrupted."\ % self.__filename return # Each Word Binary File must contain a stream called # "WordDocument" stream, and this stream must start with a # File Information Block (FIB). docstream = ole.openstream('worddocument').read() if ole.exists('1Table'): table = ole.openstream('1Table').read() else: table = ole.openstream('0Table').read() print "Use 0Table" # return the first cp in the mainstream offset, size, utf_16 = self.__parse_fib(docstream, table) if utf_16: text = docstream[offset:offset + 2 * size] text = text.decode('utf-16').encode('utf-8') else: offset /= 2 text = docstream[offset:offset + size] # get the first non-empty paragraph paragraphs = text.split('\r') for paragraph in paragraphs: if paragraph: content = paragraph break author = ole.get_metadata().author ole.close() if content: filename = content if author is not None: filename += '-' + author self.__rename_file(filename)
def replace(self, output): # Create a copy of the input file, we need it because of how the # library OleFileIO_PL works shutil.copyfile(self.input, output) # Open the OLE2 document and select a random stream ole = OleFileIO_PL.OleFileIO(output, write_mode=True) dirs = ole.listdir() # Select a number of directories to mutate dones = [] for i in xrange(random.randint(1, len(dirs))): # Select a random dir to modify ensuring we're not modifying the # same one again and again... Allow only 1 change per execution to # each directory random_dir = random.choice(ole.listdir()) if random_dir in dones: continue dones.append(random_dir) data = ole.openstream(random_dir).read() # Get the newly mutated data new_data = self.mutate_data(data) # Write the new stream data ole.write_stream(random_dir, new_data) # Close the modified OLE document ole.close() # Lazy (but likely the best) way to generate the differences file diff = self.get_diff( open(self.input, "rb").read(), open(output, "rb").read()) # And, finaly, write the .diff file too f = open(output + ".diff", "wb") f.write("# Original file created by 'OLE file mutator' was %s\n" % self.input) f.write("\n".join(diff)) f.close()
def mutations(dest_file): dest_file = os.getcwd()+"\\temp\\"+dest_file find_list = [] mutate_position = [] ole = OLE.OleFileIO(dest_file) ole_list = ole.listdir() for entry in ole_list: if "BinData" in entry and not ".OLE" in entry[1]: find_list.append((ole.openstream("BinData/"+entry[1]).read(16), ole.get_size("BinData/"+entry[1]))) ole.close() print find_list fuzz_offset = [] fuzz_byte = xrange(256) with open(dest_file, 'rb') as f: hwp = f.read() hwp_write = bytearray(hwp) hwp_length = len(hwp) for magic, size in find_list: if hwp.find(magic) != -1: offset = hwp.find(magic) mutate_position.append((offset, size)) for offset, size in mutate_position: fuzz_offset += sample(xrange(offset, offset+size), int(size*uniform(0.001, 0.03))) for index in fuzz_offset: if index >= hwp_length : continue hwp_write[index] = choice(fuzz_byte) try: with open(dest_file, 'wb') as f: f.write(hwp_write) return True except IOError as error: print error return False
def OLEDump(filename): if OleFileIO_PL.isOleFile(filename) is not True: print >>sys.stderr, 'Error - %s is not a valid OLE file.' % infile sys.exit(1) ole = OleFileIO_PL.OleFileIO(filename) for fname in ole.listdir(): stream = ole.openstream(fname).read() if '\x00Attribut' in stream: line = SearchAndDecompress(stream) line = re.sub (r'[cC][hH][rR][wW\$]*\(([\d+\+\-\s\.]*)\)', do_chr, line) line = re.sub (" & ", "", line) line = re.sub ("& ", "", line) line = line.replace("\"","") line = re.sub (r'[sS][tT][rR][rR][eE][vV][eE][rR][sS][eE]\(([^)]*)\)', do_strme, line) line = line.rstrip () StdoutWriteChunked(line) ole.close() return
def extract(infile, dirname=None): if dirname is None: dirname = os.getcwd() try: if OleFileIO_PL.isOleFile(infile) is not True: print >> sys.stderr, 'Error - %s is not a valid OLE file.' % infile sys.exit(1) ole = OleFileIO_PL.OleFileIO(infile) filelist = ole.listdir() for fname in filelist: data = ole.openstream(fname[0]).read() fp = open(os.path.join(dirname, fname[0]), 'wb') fp.write(data) fp.close() ole.close() return filelist except Exception as e: print >> sys.stderr, 'Error - %s' % e sys.exit(1)