def get_streams(): # This function is brutally ripped from Brandon Dixon's swf_mastah.py. # Initialize peepdf parser. parser = PDFParser() # Parse currently opened PDF document. ret, pdf = parser.parse(__sessions__.current.file.path, True, False) # Generate statistics. stats = pdf.getStats() results = [] objects = [] count = 0 for version in range(len(stats["Version"])): body = pdf.body[count] objects = body.objects for index in objects: oid = objects[index].id offset = objects[index].offset size = objects[index].size details = objects[index].object if details.type == "stream": encoded_stream = details.encodedStream decoded_stream = details.decodedStream result = [oid, offset, size, get_type(decoded_stream)[:100]] # If the stream needs to be dumped or opened, we do it # and expand the results with the path to the stream dump. if arg_open or arg_dump: # If was instructed to dump, we already have a base folder. if arg_dump: folder = arg_dump # Otherwise we juts generate a temporary one. else: folder = tempfile.mkdtemp() # Dump stream to this path. # TODO: sometimes there appear to be multiple streams # with the same object ID. Is that even possible? # It will cause conflicts. dump_path = "{0}/{1}_{2}_stream.bin".format(folder, __sessions__.current.file.md5, oid) with open(dump_path, "wb") as handle: handle.write(decoded_stream.strip()) # Add dump path to the stream attributes. result.append(dump_path) # Update list of streams. results.append(result) count += 1 return results
def get_resources(pe): resources = [] if hasattr(pe, 'DIRECTORY_ENTRY_RESOURCE'): count = 1 for resource_type in pe.DIRECTORY_ENTRY_RESOURCE.entries: try: resource = {} if resource_type.name is not None: name = str(resource_type.name) else: name = str(pefile.RESOURCE_TYPE.get(resource_type.struct.Id)) if name is None: name = str(resource_type.struct.Id) if hasattr(resource_type, 'directory'): for resource_id in resource_type.directory.entries: if hasattr(resource_id, 'directory'): for resource_lang in resource_id.directory.entries: data = pe.get_data(resource_lang.data.struct.OffsetToData, resource_lang.data.struct.Size) filetype = get_type(data) md5 = get_md5(data) language = pefile.LANG.get(resource_lang.data.lang, None) sublanguage = pefile.get_sublang_name_for_lang( resource_lang.data.lang, resource_lang.data.sublang) offset = ('%-8s' % hex(resource_lang.data.struct.OffsetToData)).strip() size = ('%-8s' % hex(resource_lang.data.struct.Size)).strip() resource = [count, name, offset, md5, size, filetype, language, sublanguage] # Dump resources if requested to and if the file currently being # processed is the opened session file. # This is to avoid that during a --scan all the resources being # scanned are dumped as well. if (self.args.open or self.args.dump) and pe == self.pe: if self.args.dump: folder = self.args.dump else: folder = tempfile.mkdtemp() resource_path = os.path.join( folder, '{0}_{1}_{2}'.format(__sessions__.current.file.md5, offset, name)) resource.append(resource_path) with open(resource_path, 'wb') as resource_handle: resource_handle.write(data) resources.append(resource) count += 1 except Exception as e: self.log('error', e) continue return resources
def get_streams(): # This function is brutally ripped from Brandon Dixon's swf_mastah.py. # Initialize peepdf parser. parser = PDFParser() # Parse currently opened PDF document. ret, pdf = parser.parse(__sessions__.current.file.path, True, False) # Generate statistics. results = [] objects = [] count = 0 object_counter = 1 for i in range(len(pdf.body)): body = pdf.body[count] objects = body.objects for index in objects: oid = objects[index].id offset = objects[index].offset size = objects[index].size details = objects[index].object if details.type == 'stream': decoded_stream = details.decodedStream result = [ object_counter, oid, offset, size, get_type(decoded_stream)[:100] ] # If the stream needs to be dumped or opened, we do it # and expand the results with the path to the stream dump. if arg_open or arg_dump: # If was instructed to dump, we already have a base folder. if arg_dump: folder = arg_dump # Otherwise we juts generate a temporary one. else: folder = tempfile.gettempdir() # Confirm the dump path if not os.path.exists(folder): try: os.makedirs(folder) except Exception as e: self.log('error', "Unable to create directory at {0}: {1}".format(folder, e)) return results else: if not os.path.isdir(folder): self.log('error', "You need to specify a folder not a file") return results # Dump stream to this path. # TODO: sometimes there appear to be multiple streams # with the same object ID. Is that even possible? # It will cause conflicts. dump_path = '{0}/{1}_{2}_pdf_stream.bin'.format(folder, __sessions__.current.file.md5, object_counter) with open(dump_path, 'wb') as handle: handle.write(decoded_stream.strip()) # Add dump path to the stream attributes. result.append(dump_path) # Update list of streams. results.append(result) object_counter += 1 count += 1 return results
def get_streams(): # This function is brutally ripped from Brandon Dixon's swf_mastah.py. # Initialize peepdf parser. parser = PDFParser() # Parse currently opened PDF document. ret, pdf = parser.parse(__sessions__.current.file.path, True, False) # Generate statistics. results = [] objects = [] count = 0 object_counter = 1 for i in range(len(pdf.body)): body = pdf.body[count] objects = body.objects for index in objects: oid = objects[index].id offset = objects[index].offset size = objects[index].size details = objects[index].object if details.type == 'stream': decoded_stream = details.decodedStream result = [ object_counter, oid, offset, size, get_type(decoded_stream)[:100] ] # If the stream needs to be dumped or opened, we do it # and expand the results with the path to the stream dump. if arg_open or arg_dump: # If was instructed to dump, we already have a base folder. if arg_dump: folder = arg_dump # Otherwise we juts generate a temporary one. else: folder = tempfile.gettempdir() # Confirm the dump path if not os.path.exists(folder): try: os.makedirs(folder) except Exception as e: self.log( 'error', "Unable to create directory at {0}: {1}" .format(folder, e)) return results else: if not os.path.isdir(folder): self.log( 'error', "You need to specify a folder not a file" ) return results # Dump stream to this path. # TODO: sometimes there appear to be multiple streams # with the same object ID. Is that even possible? # It will cause conflicts. dump_path = '{0}/{1}_{2}_pdf_stream.bin'.format( folder, __sessions__.current.file.md5, object_counter) with open(dump_path, 'wb') as handle: handle.write(decoded_stream.strip()) # Add dump path to the stream attributes. result.append(dump_path) # Update list of streams. results.append(result) object_counter += 1 count += 1 return results