def testCreateAndAppendSinglePathImage(self): try: try: os.unlink(self.containerName) except: pass container_urn = rdfvalue.URN.FromFileName(self.containerName) resolver = data_store.MemoryDataStore() urn = None frag1path = os.path.join(self.testImagesPath, "paper-hash_based_disk_imaging_using_aff4.pdf.frag.1") with container.Container.createURN(resolver, container_urn) as volume: with open(frag1path, "rb") as src: stream = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1]) urn = volume.writeLogicalStreamHashBased(frag1path, stream, 32768, False) for h in stream.hashes: hh = hashes.newImmutableHash(h.hexdigest(), stream.hashToType[h]) self.assertEqual("deb3fa3b60c6107aceb97f684899387c78587eae", hh.value) resolver.Add(volume.urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) frag2path = os.path.join(self.testImagesPath, "paper-hash_based_disk_imaging_using_aff4.pdf.frag.2") with container.Container.openURNtoContainer(container_urn, mode="+") as volume: with open(frag2path, "rb") as src: stream = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5 ]) urn = volume.writeLogicalStreamHashBased(frag2path, stream, 2*32768, False) for h in stream.hashes: hh = hashes.newImmutableHash(h.hexdigest(), stream.hashToType[h]) resolver.Add(volume.urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) with container.Container.openURNtoContainer(container_urn) as volume: images = list(volume.images()) images = sorted(images, key=lambda x: utils.SmartUnicode(x.pathName), reverse=False) self.assertEqual(2, len(images), "Only two logical images") fragmentA = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) fragmentB = escaping.member_name_for_urn(images[1].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) self.assertTrue(fragmentA.endswith("paper-hash_based_disk_imaging_using_aff4.pdf.frag.1")) self.assertTrue(fragmentB.endswith("paper-hash_based_disk_imaging_using_aff4.pdf.frag.2")) hasher = linear_hasher.LinearHasher2(volume.resolver, self) for image in volume.images(): print("\t%s <%s>" % (image.name(), image.urn)) hasher.hash(image) except: traceback.print_exc() self.fail() finally: #os.unlink(containerName) pass
def CreateMember(self, child_urn): # Check that child is a relative path in our URN. relative_path = self.urn.RelativePath(child_urn) if relative_path == child_urn.SerializeToString(): raise IOError("Child URN is not within container URN.") # Use this filename. Note that since filesystems can not typically # represent files and directories as the same path component we can not # allow slashes in the filename. Otherwise we will fail to create # e.g. stream/0000000 and stream/0000000/index. filename = escaping.member_name_for_urn(child_urn, self.version, self.urn) # We are allowed to create any files inside the directory volume. self.resolver.Set(lexicon.transient_graph, child_urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_FILE_TYPE)) self.resolver.Set(lexicon.transient_graph, child_urn, lexicon.AFF4_STREAM_WRITE_MODE, rdfvalue.XSDString("truncate")) self.resolver.Set(lexicon.transient_graph, child_urn, lexicon.AFF4_DIRECTORY_CHILD_FILENAME, rdfvalue.XSDString(filename)) # Store the member inside our storage location. self.resolver.Set( lexicon.transient_graph, child_urn, lexicon.AFF4_FILE_NAME, rdfvalue.XSDString(self.root_path + os.sep + filename)) result = self.resolver.AFF4FactoryOpen(child_urn) self.MarkDirty() self.children.add(child_urn) return result
def createAndReadSinglePathImagePush(self, containerName, pathName, arnPathFragment, minImageStreamSize): try: hasher = linear_hasher.PushHasher( [lexicon.HASH_SHA1, lexicon.HASH_MD5]) container_urn = rdfvalue.URN.FromFileName(containerName) with data_store.MemoryDataStore() as resolver: with container.Container.createURN(resolver, container_urn) as volume: volume.maxSegmentResidentSize = minImageStreamSize with volume.newLogicalStream(pathName, 20) as writer: writer_arn = writer.urn # add in some data using the Push API, hashing while we go data = u"helloworld" data_bytes = data.encode("utf-8") writer.Write(data_bytes) hasher.update(data_bytes) writer.Write(data_bytes) hasher.update(data_bytes) # write in the hashes before auto-close for h in hasher.hashes: hh = hashes.newImmutableHash( h.hexdigest(), hasher.hashToType[h]) volume.resolver.Add( volume.urn, writer_arn, rdfvalue.URN(lexicon.standard.hash), hh) with container.Container.openURNtoContainer( container_urn) as volume: images = list(volume.images()) self.assertEqual(1, len(images), "Only one logical image") self.assertEqual(pathName, images[0].name(), "unicode filename should be preserved") fragment = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) self.assertEqual(arnPathFragment, fragment) try: with volume.resolver.AFF4FactoryOpen(images[0].urn) as fd: txt = fd.ReadAll() self.assertEqual(b"helloworldhelloworld", txt, "content should be same") except Exception: traceback.print_exc() self.fail() except Exception: traceback.print_exc() self.fail() finally: os.unlink(containerName)
def createAndReadSinglePathImage(self, containerName, pathName, arnPathFragment): try: container_urn = rdfvalue.URN.FromFileName(containerName) with data_store.MemoryDataStore() as resolver: urn = None with container.Container.createURN(resolver, container_urn) as volume: src = io.BytesIO("hello".encode('utf-8')) urn = volume.writeLogical(pathName, src, 10) with container.Container.openURNtoContainer( container_urn) as volume: images = list(volume.images()) self.assertEqual(1, len(images), "Only one logical image") imagename = images[0].name() self.assertEqual(pathName, imagename, "unicode filename should be preserved") fragment = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) self.assertEqual(arnPathFragment, fragment) try: with volume.resolver.AFF4FactoryOpen(images[0].urn) as fd: txt = fd.ReadAll() self.assertEqual(b"hello", txt, "content should be same") except Exception: traceback.print_exc() self.fail() except Exception: traceback.print_exc() self.fail() finally: os.unlink(containerName)
def testARNtoZipSegment(self): version = container.Version(1, 1, "pyaff4") self.assertEqual( u"/c:/foo", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18//c:/foo", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"bar/c$", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18/bar/c$", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"bar/c$/foo.txt", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18/bar/c$/foo.txt", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"/foo/bar", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18//foo/bar", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"/foo/some file", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18//foo/some%20file", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"/foo/some file", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18//foo/some%20%20file", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"bar/c$/foo/ネコ.txt", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18/bar/c$/foo/ネコ.txt", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) # examples from https://blogs.msdn.microsoft.com/ie/2006/12/06/file-uris-in-windows/ self.assertEqual( u"laptop/My Documents/FileSchemeURIs.doc", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18/laptop/My%20Documents/FileSchemeURIs.doc", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"/C:/Documents and Settings/davris/FileSchemeURIs.doc", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18//C:/Documents and Settings/davris/FileSchemeURIs.doc", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"/D:/Program Files/Viewer/startup.htm", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18//D:/Program Files/Viewer/startup.htm", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"/C:/Program Files/Music/Web Sys/main.html?REQUEST=RADIO", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18//C:/Program Files/Music/Web%20Sys/main.html?REQUEST=RADIO", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"applib/products/a-b/abc_9/4148.920a/media/start.swf", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18/applib/products/a-b/abc_9/4148.920a/media/start.swf", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True)) self.assertEqual( u"/C:/exampleㄓ.txt", member_name_for_urn( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18//C:/exampleㄓ.txt", version, base_urn=rdfvalue.URN( u"aff4://e6bae91b-0be3-4770-8a36-14d231833e18"), use_unicode=True))
def StreamAddMember(self, member_urn, stream, compression_method=ZIP_STORED, progress=None): """An efficient interface to add a new archive member. Args: member_urn: The new member URN to be added. stream: A file-like object (with read() method) that generates data to be written as the member. compression_method: How to compress the member. """ if progress is None: progress = aff4.EMPTY_PROGRESS backing_store_urn = self.resolver.GetUnique(lexicon.transient_graph, self.urn, lexicon.AFF4_STORED) with self.resolver.AFF4FactoryOpen(backing_store_urn) as backing_store: LOGGER.info("Writing member %s", member_urn) # Append member at the end of the file. backing_store.SeekWrite(0, aff4.SEEK_END) # zip_info offsets are relative to the start of the zip file (take # global_offset into account). zip_info = ZipInfo(local_header_offset=backing_store.TellWrite() - self.global_offset, filename=escaping.member_name_for_urn( member_urn, self.version, self.urn, use_unicode=USE_UNICODE), file_size=0, crc32=0, compression_method=compression_method) # For now we do not support streamed writing so we need to seek back # to this position later with an updated crc32. zip_info.WriteFileHeader(backing_store) if compression_method == ZIP_DEFLATE: zip_info.compression_method = ZIP_DEFLATE compressor = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, zlib.DEFLATED, -15) while True: try: data = stream.read(BUFF_SIZE) if not data: break except IOError: break c_data = compressor.compress(data) zip_info.compress_size += len(c_data) zip_info.file_size += len(data) # Python 2 erronously returns a signed int here. zip_info.crc32 = zlib.crc32(data, zip_info.crc32) & 0xffffffff backing_store.Write(c_data) progress.Report(zip_info.file_size) # Finalize the compressor. c_data = compressor.flush() zip_info.compress_size += len(c_data) backing_store.Write(c_data) # Just write the data directly. elif compression_method == ZIP_STORED: zip_info.compression_method = ZIP_STORED while True: data = stream.read(BUFF_SIZE) if not data: break zip_info.compress_size += len(data) zip_info.file_size += len(data) # Python 2 erronously returns a signed int here. zip_info.crc32 = zlib.crc32(data, zip_info.crc32) & 0xffffffff progress.Report(zip_info.file_size) backing_store.Write(data) else: raise RuntimeError("Unsupported compression method") # Update the local file header now that CRC32 is calculated. zip_info.WriteFileHeader(backing_store) self.members[member_urn] = zip_info
def CreateMember(self, child_urn): member_filename = escaping.member_name_for_urn(child_urn, self.version, self.urn, use_unicode=USE_UNICODE) return self.CreateZipSegment(member_filename, arn=child_urn)
def testCreateAndAppendSinglePathImageLarge2(self): try: containerName = tempfile.gettempdir() + u"/test-append-large2.aff4" pathA = u"/a.txt" pathB = u"/b.txt" largedata = io.BytesIO(os.urandom(1100000)) container_urn = rdfvalue.URN.FromFileName(containerName) resolver = data_store.MemoryDataStore() urn = None with container.Container.createURN(resolver, container_urn) as volume: src = io.BytesIO("hello".encode('utf-8')) urn = volume.writeLogical(pathA, src, 10) urn = None with container.Container.openURNtoContainer(container_urn, mode="+") as volume: src = largedata urn = volume.writeLogical(pathB, src, 110000) with container.Container.openURNtoContainer( container_urn) as volume: images = list(volume.images()) images = sorted(images, key=lambda x: utils.SmartUnicode(x.pathName), reverse=False) self.assertEqual(2, len(images), "Only two logical images") fragmentA = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) fragmentB = escaping.member_name_for_urn(images[1].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) self.assertEqual(pathA, fragmentA) self.assertEqual(pathB, fragmentB) try: with volume.resolver.AFF4FactoryOpen(images[0].urn) as fd: txt = fd.ReadAll() self.assertEqual(b"hello", txt, "content should be same") with volume.resolver.AFF4FactoryOpen(images[1].urn) as fd: index = 0 while index < 110000: fd.SeekRead(index) bufa = fd.Read(1000) largedata.seek(index) bufb = largedata.read(1000) index = index + 1000 self.assertEqual(bufa, bufb, "content should be same") except Exception: traceback.print_exc() self.fail() except: traceback.print_exc() self.fail() finally: pass
def testCreateAndAppendSinglePathImage(self): try: containerName = "/tmp/test-append.aff4" pathA = u"/a.txt" pathB = u"/b.txt" try: os.unlink(containerName) except: pass container_urn = rdfvalue.URN.FromFileName(containerName) resolver = data_store.MemoryDataStore() urn = None with container.Container.createURN(resolver, container_urn) as volume: src = io.BytesIO("hello".encode('utf-8')) urn = volume.writeLogical(pathA, src, 10) urn = None with container.Container.openURNtoContainer(container_urn, mode="+") as volume: src = io.BytesIO("hello2".encode('utf-8')) urn = volume.writeLogical(pathB, src, 12) with container.Container.openURNtoContainer( container_urn) as volume: images = list(volume.images()) images = sorted(images, key=lambda x: utils.SmartUnicode(x.pathName), reverse=False) self.assertEqual(2, len(images), "Only two logical images") fragmentA = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) fragmentB = escaping.member_name_for_urn(images[1].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) self.assertEqual(pathA, fragmentA) self.assertEqual(pathB, fragmentB) try: with volume.resolver.AFF4FactoryOpen(images[0].urn) as fd: txt = fd.ReadAll() self.assertEqual(b"hello", txt, "content should be same") with volume.resolver.AFF4FactoryOpen(images[1].urn) as fd: txt = fd.ReadAll() self.assertEqual(b"hello2", txt, "content should be same") except Exception: traceback.print_exc() self.fail() except: traceback.print_exc() self.fail() finally: os.unlink(containerName)
def testCreateAndAppendSinglePathImageLarge(self): try: length = 10000 containerName = "/tmp/test-append-large.aff4" pathA = u"/a.txt" pathB = u"/b.txt" largedata = io.BytesIO(os.urandom(1100000)) container_urn = rdfvalue.URN.FromFileName(containerName) resolver = data_store.MemoryDataStore() urn = None with container.Container.createURN(resolver, container_urn) as volume: src = io.BytesIO("hello".encode('utf-8')) urn = volume.writeLogical(pathA, src, 10) urn = None with container.Container.openURNtoContainer(container_urn, mode="+") as volume: src = largedata with volume.newLogicalStream(pathB, length) as image: image.chunk_size = 10 image.chunks_per_segment = 3 index = 0 while index < length: src.seek(index) image.Write(src.read(1000)) index = index + 1000 foo = 11 image.Close() with container.Container.openURNtoContainer( container_urn) as volume: images = list(volume.images()) images = sorted(images, key=lambda x: utils.SmartUnicode(x.pathName), reverse=False) self.assertEqual(2, len(images), "Only two logical images") fragmentA = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) fragmentB = escaping.member_name_for_urn(images[1].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) if fragmentA != u"/a.txt": ffffff = 1 self.assertEqual(pathA, fragmentA) self.assertEqual(pathB, fragmentB) try: with volume.resolver.AFF4FactoryOpen(images[0].urn) as fd: txt = fd.ReadAll() self.assertEqual(b"hello", txt, "content should be same") with volume.resolver.AFF4FactoryOpen(images[1].urn) as fd: blen = fd.Length() self.assertEqual(length, blen) except Exception: traceback.print_exc() self.fail() except: traceback.print_exc() self.fail() finally: pass
def testFuzz(self): chunksize = 512 for length in [ chunksize - 1, chunksize, chunksize + 1, chunksize * 2 - 1, chunksize * 2, chunksize * 2 + 1, chunksize * 1000, 0 ]: for maxSegmentResidentSize in [ 0, 1, chunksize - 1, chunksize, chunksize + 1 ]: try: containerName = tempfile.gettempdir( ) + "/testfuzz-length-%d-maxresident%d.aff4" % ( length, maxSegmentResidentSize) print(containerName) hasher = linear_hasher.PushHasher( [lexicon.HASH_SHA1, lexicon.HASH_MD5]) container_urn = rdfvalue.URN.FromFileName(containerName) with data_store.MemoryDataStore() as resolver: with container.Container.createURN( resolver, container_urn) as volume: volume.maxSegmentResidentSize = maxSegmentResidentSize with volume.newLogicalStream("/foo", length) as writer: with open("/dev/random", "rb") as randomStream: writer.chunk_size = chunksize writer_arn = writer.urn pos = 0 while pos < length: toread = int( min( math.ceil(1024 * random.random()), length - pos)) data = randomStream.read(toread) writer.Write(data) hasher.update(data) pos += toread # write in the hashes before auto-close for h in hasher.hashes: hh = hashes.newImmutableHash( h.hexdigest(), hasher.hashToType[h]) volume.resolver.Add( volume.urn, writer_arn, rdfvalue.URN(lexicon.standard.hash), hh) print() with container.Container.openURNtoContainer( container_urn) as volume: images = list(volume.images()) self.assertEqual(1, len(images), "Only one logical image") self.assertEqual( "/foo", images[0].name(), "unicode filename should be preserved") fragment = escaping.member_name_for_urn( images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) hasher = linear_hasher.LinearHasher2( volume.resolver, self) for image in volume.images(): hasher.hash(image) os.unlink(containerName) except Exception: traceback.print_exc() self.fail() continue