def testOpenSegmentByURN(self): try: resolver = data_store.MemoryDataStore() # This is required in order to load and parse metadata from this volume # into a fresh empty resolver. with zip.ZipFile.NewZipFile(resolver, Version(1, 1, "pyaff4"), self.filename_urn) as zip_file: segment_urn = zip_file.urn.Append( escaping.arnPathFragment_from_path(self.segment_name), quote=False) unc_segment_urn = zip_file.urn.Append( escaping.arnPathFragment_from_path(self.unc_segment_name), quote=False) period_start_segment_urn = self.volume_urn.Append( escaping.arnPathFragment_from_path( self.period_start_segment_name), quote=False) with resolver.AFF4FactoryOpen(segment_urn) as segment: self.assertEquals(segment.Read(1000), self.data1) with resolver.AFF4FactoryOpen(unc_segment_urn) as segment: self.assertEquals(segment.Read(1000), self.data1) with resolver.AFF4FactoryOpen(unc_segment_urn) as segment: self.assertEquals(segment.Read(1000), self.data1) except Exception: traceback.print_exc() self.fail()
def setUp(self): with data_store.MemoryDataStore() as resolver: resolver.Set(lexicon.transient_graph, self.filename_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) with zip.ZipFile.NewZipFile(resolver, Version(1, 1, "pyaff4"), self.filename_urn) as zip_file: self.volume_urn = zip_file.urn segment_urn = self.volume_urn.Append( escaping.arnPathFragment_from_path(self.segment_name), quote=False) with zip_file.CreateMember(segment_urn) as segment: segment.Write(self.data1) unc_segment_urn = self.volume_urn.Append( escaping.arnPathFragment_from_path(self.unc_segment_name), quote=False) with zip_file.CreateMember(unc_segment_urn) as segment: segment.Write(self.data1) period_start_segment_urn = self.volume_urn.Append( self.period_start_segment_name, quote=False) with zip_file.CreateMember( period_start_segment_urn) as segment: segment.Write(self.data1)
def extract(container_name, imageURNs, destFolder): with data_store.MemoryDataStore() as resolver: container_urn = rdfvalue.URN.FromFileName(container_name) urn = None with container.Container.openURNtoContainer(container_urn) as volume: printVolumeInfo(file, volume) resolver = volume.resolver for imageUrn in imageURNs: imageUrn = utils.SmartUnicode(imageUrn) pathName = next(resolver.QuerySubjectPredicate(volume.urn, imageUrn, volume.lexicon.pathName)) with resolver.AFF4FactoryOpen(imageUrn) as srcStream: if destFolder != "-": pathName = escaping.arnPathFragment_from_path(pathName.value) while pathName.startswith("/"): pathName = pathName[1:] destFile = os.path.join(destFolder, pathName) if not os.path.exists(os.path.dirname(destFile)): try: os.makedirs(os.path.dirname(destFile)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise with open(destFile, "w") as destStream: shutil.copyfileobj(srcStream, destStream, length=32*2014) print ("\tExtracted %s to %s" % (pathName, destFile)) else: shutil.copyfileobj(srcStream, sys.stdout)
def containsLogicalImage(self, pathfragment): arn = self.urn.Append(escaping.arnPathFragment_from_path(pathfragment), quote=False) types = self.resolver.Get(lexicon.any, arn, lexicon.AFF4_TYPE) if lexicon.standard11.FileImage in types: return True else: return False
def addPathNamesToVolume(resolver, volume, pathnames, recursive, hashbased): for pathname in pathnames: if not os.path.exists(pathname): print("Path %s not found. Skipping.") continue pathname = utils.SmartUnicode(pathname) print("\tAdding: %s" % pathname) fsmeta = logical.FSMetadata.create(pathname) if os.path.isdir(pathname): image_urn = None if volume.isAFF4Collision(pathname): image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: image_urn = volume.urn.Append( escaping.arnPathFragment_from_path(pathname), quote=False) fsmeta.urn = image_urn fsmeta.store(resolver) resolver.Set(volume.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(pathname)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FolderImage)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) if recursive: for child in os.listdir(pathname): pathnames.append(os.path.join(pathname, child)) else: with open(pathname, "rb") as src: hasher = linear_hasher.StreamHasher( src, [lexicon.HASH_SHA1, lexicon.HASH_MD5, lexicon.HASH_SHA256]) if hashbased == False: urn = volume.writeLogicalStream(pathname, hasher, fsmeta.length) else: urn = volume.writeLogicalStreamRabinHashBased( pathname, hasher, fsmeta.length) fsmeta.urn = urn fsmeta.store(resolver) bc_writer = blockchain.BlockChainWriter.getBlockchainWriter() hash_dict = {} for h in hasher.hashes: hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h]) resolver.Add(urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) hash_dict[h.name] = hh if bc_writer: bc_writer.Set_hash(hash_dict["md5"], hash_dict["sha1"], hash_dict["sha256"])
def testRemoveThenReAdd(self): try: os.unlink(self.filename) except (IOError, OSError): pass with data_store.MemoryDataStore() as resolver: resolver.Set(lexicon.transient_graph, self.filename_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) with zip.ZipFile.NewZipFile(resolver, Version(1, 1, "pyaff4"), self.filename_urn) as zip_file: self.volume_urn = zip_file.urn segment_urn = self.volume_urn.Append( escaping.arnPathFragment_from_path(self.segment_name), quote=False) with zip_file.CreateMember(segment_urn) as segment: segment.Write(self.data1) segment.Flush() zip_file.RemoveMember(segment_urn) with zip_file.CreateMember(segment_urn) as segment: segment.Write(self.data2) with data_store.MemoryDataStore() as resolver: with zip.ZipFile.NewZipFile(resolver, Version(1, 1, "pyaff4"), self.filename_urn) as zip_file: self.volume_urn = zip_file.urn segment_urn = self.volume_urn.Append( escaping.arnPathFragment_from_path(self.segment_name), quote=False) self.assertTrue(zip_file.ContainsMember(segment_urn)) with zip_file.OpenMember(segment_urn) as segment: self.assertEquals(self.data2, segment.Read(len(self.data2))) self.assertEquals(629, os.stat(self.filename).st_size)
def addPathNames(container_name, pathnames, recursive, append, hashbased): with data_store.MemoryDataStore() as resolver: container_urn = rdfvalue.URN.FromFileName(container_name) urn = None if append == False: volume = container.Container.createURN(resolver, container_urn) print("Creating AFF4Container: file://%s <%s>" % (container_name, volume.urn)) else: volume = container.Container.openURNtoContainer(container_urn, mode="+", resolver=resolver) print("Appending to AFF4Container: file://%s <%s>" % (container_name, volume.urn)) with volume as volume: for pathname in pathnames: if not os.path.exists(pathname): print("Path %s not found. Skipping.") continue pathname = utils.SmartUnicode(pathname) print ("\tAdding: %s" % pathname) fsmeta = logical.FSMetadata.create(pathname) if os.path.isdir(pathname): image_urn = None if volume.isAFF4Collision(pathname): image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: image_urn = volume.urn.Append(escaping.arnPathFragment_from_path(pathname), quote=False) fsmeta.urn = image_urn fsmeta.store(resolver) resolver.Set(volume.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(pathname)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FolderImage)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) if recursive: for child in os.listdir(pathname): pathnames.append(os.path.join(pathname, child)) else: with open(pathname, "rb") as src: hasher = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5]) if hashbased == False: urn = volume.writeLogicalStream(pathname, hasher, fsmeta.length) else: urn = volume.writeLogicalStreamRabinHashBased(pathname, hasher, fsmeta.length) fsmeta.urn = urn fsmeta.store(resolver) for h in hasher.hashes: hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h]) resolver.Add(urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) return urn
def writeLogical(self, filename, readstream, length): image_urn = None if self.isAFF4Collision(filename): image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: image_urn = self.urn.Append(escaping.arnPathFragment_from_path(filename), quote=False) if length > self.maxSegmentResidentSize: self.writeCompressedBlockStream(image_urn, filename, readstream) else: self.writeZipStream(image_urn, filename, readstream) #self.resolver.Set(image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.AFF4_ZIP_SEGMENT_IMAGE_TYPE)) self.resolver.Add(self.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FileImage)) self.resolver.Add(self.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) self.resolver.Add(self.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(filename)) return image_urn
def newLogicalStream(self, filename, length): image_urn = None if self.isAFF4Collision(filename): image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: image_urn = self.urn.Append(escaping.arnPathFragment_from_path(filename), quote=False) writer = None if length > self.maxSegmentResidentSize: writer = self.newCompressedBlockStream(image_urn, filename) else: writer = self.newZipStream(image_urn, filename) self.resolver.Add(self.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FileImage)) self.resolver.Add(self.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) self.resolver.Add(self.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(filename)) return writer
def testPathToARNPathFragment(self): self.assertEqual( u"/my_filename_with_forbidden_chars_like?_and_#", arnPathFragment_from_path( u"my_filename_with_forbidden_chars_like?_and_#")) self.assertEqual(u"/ネコ.txt", arnPathFragment_from_path(u"ネコ.txt")) self.assertEqual(u"/ネコ.txt", arnPathFragment_from_path(u"/ネコ.txt")) self.assertEqual(u"/ネコ.txt", arnPathFragment_from_path(u"\ネコ.txt")) self.assertEqual(u"/a/b/c/ネコ.txt", arnPathFragment_from_path(u"\\a\\b\\c\\ネコ.txt")) self.assertEqual(u"/c:/a/b/c/ネコ.txt", arnPathFragment_from_path(u"c:\\a\\b\\c\\ネコ.txt")) self.assertEqual( u"storage/a/b/c/ネコ.txt", arnPathFragment_from_path(u"\\\\storage\\a\\b\\c\\ネコ.txt")) self.assertEqual(u"/c:", arnPathFragment_from_path(u"c:")) self.assertEqual(u"/c:/", arnPathFragment_from_path(u"c:\\")) self.assertEqual(u"/c:/foo", arnPathFragment_from_path(u"c:\\foo")) self.assertEqual(u"bar/c$", arnPathFragment_from_path(u"\\\\bar\\c$")) self.assertEqual(u"bar/c$/foo.txt", arnPathFragment_from_path(u"\\\\bar\\c$\\foo.txt")) self.assertEqual( u"bar/c$/foo/ネコ.txt", arnPathFragment_from_path(u"\\\\bar\\c$\\foo\\ネコ.txt")) self.assertEqual(u"/foo/bar", arnPathFragment_from_path(u"/foo/bar")) self.assertEqual(u"/some%20file", arnPathFragment_from_path(u"/some file")) self.assertEqual(u"/some%20file", arnPathFragment_from_path(u"./some file")) self.assertEqual(u"/some%20file", arnPathFragment_from_path(u"../some file")) # examples from https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/ self.assertEqual( u"laptop/My%20Documents/FileSchemeURIs.doc", arnPathFragment_from_path( u"\\\\laptop\\My Documents\\FileSchemeURIs.doc")) self.assertEqual( u"/C:/Documents%20and%20Settings/davris/FileSchemeURIs.doc", arnPathFragment_from_path( u"C:\\Documents and Settings\\davris\\FileSchemeURIs.doc")) self.assertEqual( u"/D:/Program%20Files/Viewer/startup.htm", arnPathFragment_from_path( u"D:\\Program Files\\Viewer\\startup.htm")) self.assertEqual( u"/C:/Program%20Files/Music/Web%20Sys/main.html?REQUEST=RADIO", arnPathFragment_from_path( u"C:\\Program Files\\Music\Web Sys\\main.html?REQUEST=RADIO")) self.assertEqual( u"applib/products/a-b/abc_9/4148.920a/media/start.swf", arnPathFragment_from_path( u"\\\\applib\\products/a-b/abc_9/4148.920a/media/start.swf")) self.assertEqual(u"/C:/exampleㄓ.txt", arnPathFragment_from_path(u"C:\exampleㄓ.txt")) # microsoft device paths self.assertEqual(u"./Windows/foo.txt", arnPathFragment_from_path(u"\\\\.\\Windows\\foo.txt")) # MacOS seen self.assertEqual(u"/", u"/")
def writeLogicalStreamHashBased(self, filename, readstream, length, check_bytes=False): logical_file_id = None if self.isAFF4Collision(filename): logical_file_id = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: logical_file_id = self.urn.Append(escaping.arnPathFragment_from_path(filename), quote=False) chunk_size = self.block_store_stream.chunk_size with aff4_map.AFF4Map.NewAFF4Map( self.resolver, logical_file_id, self.urn) as logical_file_map: file_offset = 0 while file_offset < length: toread = min(length-file_offset, chunk_size) chunk = readstream.read(toread) # pad the chunk to chunksize if it is small read_chunk_size = len(chunk) if read_chunk_size < chunk_size: chunk = chunk + b"\x00" * (chunk_size - read_chunk_size) h = hashes.new(lexicon.HASH_SHA512) h.update(chunk) # we use RFC rfc4648 hashid = rdfvalue.URN("aff4:sha512:" + base64.urlsafe_b64encode(h.digest()).decode()) # check if this hash is in the container already existing_bytestream_reference_id = self.resolver.GetUnique(lexicon.any, hashid, rdfvalue.URN(lexicon.standard.dataStream)) if existing_bytestream_reference_id == None: block_stream_address = self.block_store_stream.TellWrite() self.block_store_stream.Write(chunk) chunk_reference_id = self.block_store_stream.urn.SerializeToString() + "[0x%x:0x%x]" % (block_stream_address, chunk_size) chunk_reference_id = rdfvalue.URN(chunk_reference_id) self.resolver.Add(self.urn, hashid, rdfvalue.URN(lexicon.standard.dataStream), chunk_reference_id) logical_file_map.AddRange(file_offset, 0, toread, hashid) #print("[%x, %x] -> %s -> %s" % (file_offset, toread, hashid, chunk_reference_id)) else: if check_bytes: with self.resolver.AFF4FactoryOpen(existing_bytestream_reference_id) as existing_chunk_stream: existing_chunk_length = existing_chunk_stream.length existing_chunk = existing_chunk_stream.Read(existing_chunk_length) if chunk != existing_chunk: # we hit the jackpot and found a hash collision # in this highly unlikely event, we store the new bytes using regular logical # imaging. To record the collision, we add the colliding stream as a property print("!!!Collision found for hash %s" % hashid) block_stream_address = self.block_store_stream.TellWrite() self.block_store_stream.Write(chunk) chunk_reference_id = self.block_store_stream.urn.SerializeToString() + "[0x%x:0x%x]" % ( block_stream_address, chunk_size) chunk_reference_id = rdfvalue.URN(chunk_reference_id) logical_file_map.AddRange(file_offset, block_stream_address, chunk_size, self.block_store_stream.urn) self.resolver.Add(self.urn, hashid, rdfvalue.URN(lexicon.standard11.collidingDataStream), chunk_reference_id) else: logical_file_map.AddRange(file_offset, 0, toread, hashid) else: logical_file_map.AddRange(file_offset, 0, toread, hashid) #print("[%x, %x] -> %s -> %s" % (file_offset, toread, hashid, existing_bytestream_reference_id)) file_offset += toread logical_file_map.Close() self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FileImage)) self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(filename)) return logical_file_id
def writeLogicalStreamRabinHashBased(self, filename, readstream, length, check_bytes=False): logical_file_id = None if self.isAFF4Collision(filename): logical_file_id = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: logical_file_id = self.urn.Append(escaping.arnPathFragment_from_path(filename), quote=False) chunk_size = 32*1024 cdc = fastchunking.RabinKarpCDC(window_size=48, seed=0) chunker = cdc.create_chunker(chunk_size=4096) with aff4_map.AFF4Map.NewAFF4Map( self.resolver, logical_file_id, self.urn) as logical_file_map: file_offset = 0 lastbuffer = None lastoffset = 0 chunk_offset = 0 while file_offset < length: toread = min(length-file_offset, chunk_size) buffer = readstream.read(toread) foundBoundaries = False for boundary in chunker.next_chunk_boundaries(buffer): foundBoundaries = True if lastbuffer != None: l = len(lastbuffer) chunk = lastbuffer[lastoffset:] chunk_offset = file_offset - len(chunk) chunk = chunk + buffer[:boundary] lastbuffer = None else: chunk = buffer[lastoffset:boundary] chunk_offset = file_offset + lastoffset h = hashes.new(lexicon.HASH_SHA512) h.update(chunk) self.preserveChunk(logical_file_map, chunk, chunk_offset, h, check_bytes) lastoffset = boundary if not foundBoundaries: if lastbuffer != None: lastbuffer = lastbuffer + buffer else: lastbuffer = buffer else: lastbuffer = buffer file_offset += toread if lastbuffer != None and lastoffset < len(lastbuffer): chunk = lastbuffer[lastoffset:] chunk_offset = file_offset - len(chunk) h = hashes.new(lexicon.HASH_SHA512) h.update(chunk) self.preserveChunk(logical_file_map, chunk, chunk_offset, h, check_bytes) logical_file_map.Close() self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FileImage)) self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(filename)) return logical_file_id
def testRemoveDoesRewind(self): try: os.unlink(self.filename) except (IOError, OSError): pass with data_store.MemoryDataStore() as resolver: resolver.Set(lexicon.transient_graph, self.filename_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) with zip.ZipFile.NewZipFile(resolver, Version(1, 1, "pyaff4"), self.filename_urn) as zip_container: self.volume_urn = zip_container.urn with zip_container.CreateZipSegment("foo") as segment: segment.Write(self.data1) segment.Flush() with zip_container.CreateZipSegment("bar") as segment: segment.Write(self.data1) segment.Flush() backing_store_urn = resolver.GetUnique(lexicon.transient_graph, self.volume_urn, lexicon.AFF4_STORED) with resolver.AFF4FactoryOpen( backing_store_urn) as backing_store: print() self.assertEquals(93, backing_store.writeptr) zip_container.RemoveSegment("bar") with zip_container.CreateZipSegment("nar") as segment: segment.Write(self.data2) segment.Flush() with data_store.MemoryDataStore() as resolver: resolver.Set(lexicon.transient_graph, self.filename_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("append")) with zip.ZipFile.NewZipFile(resolver, Version(1, 1, "pyaff4"), self.filename_urn) as zip_file: self.volume_urn = zip_file.urn segment_urn = self.volume_urn.Append( escaping.arnPathFragment_from_path(self.segment_name), quote=False) self.assertFalse(zip_file.ContainsSegment("bar")) self.assertTrue(zip_file.ContainsSegment("foo")) self.assertTrue(zip_file.ContainsSegment("nar")) with zip_file.OpenZipSegment("foo") as segment: self.assertEquals(self.data1, segment.Read(len(self.data1))) with zip_file.OpenZipSegment("nar") as segment: self.assertEquals(self.data2, segment.Read(len(self.data2))) self.assertEquals(736, os.stat(self.filename).st_size)