Ejemplo n.º 1
0
    def testCreateAndAppendSinglePathImage(self):
        try:
            try:
                os.unlink(self.containerName)
            except:
                pass

            container_urn = rdfvalue.URN.FromFileName(self.containerName)
            resolver = data_store.MemoryDataStore()
            urn = None

            frag1path = os.path.join(self.testImagesPath, "paper-hash_based_disk_imaging_using_aff4.pdf.frag.1")

            with container.Container.createURN(resolver, container_urn) as volume:
                with open(frag1path, "rb") as src:
                    stream = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1])
                    urn = volume.writeLogicalStreamHashBased(frag1path, stream, 32768, False)
                    for h in stream.hashes:
                        hh = hashes.newImmutableHash(h.hexdigest(), stream.hashToType[h])
                        self.assertEqual("deb3fa3b60c6107aceb97f684899387c78587eae", hh.value)
                        resolver.Add(volume.urn, urn, rdfvalue.URN(lexicon.standard.hash), hh)

            frag2path = os.path.join(self.testImagesPath, "paper-hash_based_disk_imaging_using_aff4.pdf.frag.2")

            with container.Container.openURNtoContainer(container_urn, mode="+") as volume:
                with open(frag2path, "rb") as src:
                    stream = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5 ])
                    urn = volume.writeLogicalStreamHashBased(frag2path, stream, 2*32768, False)
                    for h in stream.hashes:
                        hh = hashes.newImmutableHash(h.hexdigest(), stream.hashToType[h])
                        resolver.Add(volume.urn, urn, rdfvalue.URN(lexicon.standard.hash), hh)

            with container.Container.openURNtoContainer(container_urn) as volume:
                images = list(volume.images())
                images = sorted(images, key=lambda x: utils.SmartUnicode(x.pathName), reverse=False)
                self.assertEqual(2, len(images), "Only two logical images")

                fragmentA = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True)
                fragmentB = escaping.member_name_for_urn(images[1].urn.value, volume.version, base_urn=volume.urn, use_unicode=True)

                self.assertTrue(fragmentA.endswith("paper-hash_based_disk_imaging_using_aff4.pdf.frag.1"))
                self.assertTrue(fragmentB.endswith("paper-hash_based_disk_imaging_using_aff4.pdf.frag.2"))

                hasher = linear_hasher.LinearHasher2(volume.resolver, self)
                for image in volume.images():
                    print("\t%s <%s>" % (image.name(), image.urn))
                    hasher.hash(image)

        except:
            traceback.print_exc()
            self.fail()

        finally:
            #os.unlink(containerName)
            pass
Ejemplo n.º 2
0
    def calculateBlockMapHash(self, mapStreamURI, imageStreamURI,
                              storedHashDataType):

        storedBlockHashesHash = self.getStoredBlockHashes(str(imageStreamURI))

        storedBlockHashesHash = sorted(storedBlockHashesHash,
                                       cmp=hashLengthComparator)

        calculatedHash = hashes.new(storedHashDataType)
        for hash in storedBlockHashesHash:
            bytes = hash.digest()
            calculatedHash.update(bytes)

        bytes = self.resolver.QuerySubjectPredicate(
            mapStreamURI, self.lexicon.mapPointHash).next().digest()
        calculatedHash.update(bytes)

        bytes = self.resolver.QuerySubjectPredicate(
            mapStreamURI, self.lexicon.mapIdxHash).next().digest()
        # bytes = self.calculateMapIdxHash(mapStreamURI).digest()
        calculatedHash.update(bytes)

        try:
            bytes = self.resolver.QuerySubjectPredicate(
                mapStreamURI, self.lexicon.mapPathHash).next().digest()
            # bytes = self.calculateMapPathHash(mapStreamURI).digest()
            calculatedHash.update(bytes)
        except:
            pass

        return hashes.newImmutableHash(calculatedHash.hexdigest(),
                                       storedHashDataType)
Ejemplo n.º 3
0
    def calculateBlockMapHash(self, mapStreamURI, imageStreamURI,
                              storedHashDataType):
        storedBlockHashesHash = sorted(
            self.getStoredBlockHashes(str(imageStreamURI)),
            key=lambda x: hashOrderingMap[x.blockHashAlgo])

        calculatedHash = hashes.new(storedHashDataType)
        for hash in storedBlockHashesHash:
            bytes = hash.digest()
            calculatedHash.update(bytes)

        for hash in self.resolver.QuerySubjectPredicate(
                self.volume_arn, mapStreamURI, self.lexicon.mapPointHash):
            calculatedHash.update(hash.digest())

        for hash in self.resolver.QuerySubjectPredicate(
                self.volume_arn, mapStreamURI, self.lexicon.mapIdxHash):
            calculatedHash.update(hash.digest())

        for hash in self.resolver.QuerySubjectPredicate(
                self.volume_arn, mapStreamURI, self.lexicon.mapPathHash):
            calculatedHash.update(hash.digest())

        return hashes.newImmutableHash(calculatedHash.hexdigest(),
                                       storedHashDataType)
Ejemplo n.º 4
0
    def createAndReadSinglePathImagePush(self, containerName, pathName,
                                         arnPathFragment, minImageStreamSize):
        try:
            hasher = linear_hasher.PushHasher(
                [lexicon.HASH_SHA1, lexicon.HASH_MD5])

            container_urn = rdfvalue.URN.FromFileName(containerName)
            with data_store.MemoryDataStore() as resolver:
                with container.Container.createURN(resolver,
                                                   container_urn) as volume:
                    volume.maxSegmentResidentSize = minImageStreamSize
                    with volume.newLogicalStream(pathName, 20) as writer:
                        writer_arn = writer.urn

                        # add in some data using the Push API, hashing while we go
                        data = u"helloworld"
                        data_bytes = data.encode("utf-8")
                        writer.Write(data_bytes)
                        hasher.update(data_bytes)
                        writer.Write(data_bytes)
                        hasher.update(data_bytes)

                        # write in the hashes before auto-close
                        for h in hasher.hashes:
                            hh = hashes.newImmutableHash(
                                h.hexdigest(), hasher.hashToType[h])
                            volume.resolver.Add(
                                volume.urn, writer_arn,
                                rdfvalue.URN(lexicon.standard.hash), hh)

            with container.Container.openURNtoContainer(
                    container_urn) as volume:
                images = list(volume.images())
                self.assertEqual(1, len(images), "Only one logical image")
                self.assertEqual(pathName, images[0].name(),
                                 "unicode filename should be preserved")

                fragment = escaping.member_name_for_urn(images[0].urn.value,
                                                        volume.version,
                                                        base_urn=volume.urn,
                                                        use_unicode=True)

                self.assertEqual(arnPathFragment, fragment)
                try:
                    with volume.resolver.AFF4FactoryOpen(images[0].urn) as fd:
                        txt = fd.ReadAll()
                        self.assertEqual(b"helloworldhelloworld", txt,
                                         "content should be same")
                except Exception:
                    traceback.print_exc()
                    self.fail()
        except Exception:
            traceback.print_exc()
            self.fail()

        finally:
            os.unlink(containerName)
Ejemplo n.º 5
0
    def calculateSegmentHash(self, parentURI, subSegment, hashDataType):
        calculatedHash = hashes.new(hashDataType)

        data = self.readSegment(parentURI, subSegment)
        if data != None:
            calculatedHash.update(data)
            b = calculatedHash.hexdigest()
            return hashes.newImmutableHash(b, hashDataType)
        else:
            raise Exception
Ejemplo n.º 6
0
def ingestZipfile(container_name, zipfiles, append, check_bytes):
    # TODO: check path in exists
    start = time.time()
    with data_store.MemoryDataStore() as resolver:


        container_urn = rdfvalue.URN.FromFileName(container_name)
        urn = None

        if not os.path.exists(container_name):
            volume = container.Container.createURN(resolver, container_urn)
            print("Creating AFF4Container: file://%s <%s>" % (container_name, volume.urn))
        else:
            volume = container.Container.openURNtoContainer(container_urn, mode="+")
            print("Appending to AFF4Container: file://%s <%s>" % (container_name, volume.urn))

        resolver = volume.resolver

        with volume as volume:
            for zipfile in zipfiles:
                basefilename = os.path.basename(zipfile)
                if basefilename.endswith(".bag.zip"):
                    basefilename = basefilename[0:len(basefilename) - len(".bag.zip")]


                filename_arn = rdfvalue.URN.FromFileName(zipfile)

                # the following coaxes our ZIP implementation to treat this file
                # as a regular old zip
                result = zip.BasicZipFile(resolver, urn=None, version=version.basic_zip)
                resolver.Set(lexicon.transient_graph, result.urn, lexicon.AFF4_TYPE, rdfvalue.URN("StandardZip"))
                resolver.Set(lexicon.transient_graph, result.urn, lexicon.AFF4_STORED, rdfvalue.URN(filename_arn))

                with resolver.AFF4FactoryOpen(result.urn, version=version.basic_zip) as zip_file:
                    for member in zip_file.members:
                        info = zip_file.members[member]
                        pathname = basefilename +  member.SerializeToString()[len(result.urn.SerializeToString()):]
                        print(pathname)

                        with resolver.AFF4FactoryOpen(member, version=version.aff4v10) as src:

                            hasher = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5])
                            if volume.containsLogicalImage(pathname):
                                print("\tCollision: this ARN is already present in this volume.")
                                continue

                            urn = volume.writeLogicalStreamRabinHashBased(pathname, hasher, info.file_size, check_bytes)
                            #fsmeta.urn = urn
                            #fsmeta.store(resolver)
                            for h in hasher.hashes:
                                hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h])
                                resolver.Add(container_urn, urn, rdfvalue.URN(lexicon.standard.hash), hh)

        print ("Finished in %d (s)" % int(time.time() - start))
        return urn
Ejemplo n.º 7
0
def addPathNamesToVolume(resolver, volume, pathnames, recursive, hashbased):
    for pathname in pathnames:
        if not os.path.exists(pathname):
            print("Path %s not found. Skipping.")
            continue
        pathname = utils.SmartUnicode(pathname)
        print("\tAdding: %s" % pathname)
        fsmeta = logical.FSMetadata.create(pathname)
        if os.path.isdir(pathname):
            image_urn = None
            if volume.isAFF4Collision(pathname):
                image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4())
            else:
                image_urn = volume.urn.Append(
                    escaping.arnPathFragment_from_path(pathname), quote=False)

            fsmeta.urn = image_urn
            fsmeta.store(resolver)
            resolver.Set(volume.urn, image_urn,
                         rdfvalue.URN(lexicon.standard11.pathName),
                         rdfvalue.XSDString(pathname))
            resolver.Add(volume.urn, image_urn,
                         rdfvalue.URN(lexicon.AFF4_TYPE),
                         rdfvalue.URN(lexicon.standard11.FolderImage))
            resolver.Add(volume.urn, image_urn,
                         rdfvalue.URN(lexicon.AFF4_TYPE),
                         rdfvalue.URN(lexicon.standard.Image))
            if recursive:
                for child in os.listdir(pathname):
                    pathnames.append(os.path.join(pathname, child))
        else:
            with open(pathname, "rb") as src:
                hasher = linear_hasher.StreamHasher(
                    src,
                    [lexicon.HASH_SHA1, lexicon.HASH_MD5, lexicon.HASH_SHA256])
                if hashbased == False:
                    urn = volume.writeLogicalStream(pathname, hasher,
                                                    fsmeta.length)
                else:
                    urn = volume.writeLogicalStreamRabinHashBased(
                        pathname, hasher, fsmeta.length)
                fsmeta.urn = urn
                fsmeta.store(resolver)
                bc_writer = blockchain.BlockChainWriter.getBlockchainWriter()
                hash_dict = {}
                for h in hasher.hashes:
                    hh = hashes.newImmutableHash(h.hexdigest(),
                                                 hasher.hashToType[h])
                    resolver.Add(urn, urn, rdfvalue.URN(lexicon.standard.hash),
                                 hh)
                    hash_dict[h.name] = hh

                if bc_writer:
                    bc_writer.Set_hash(hash_dict["md5"], hash_dict["sha1"],
                                       hash_dict["sha256"])
Ejemplo n.º 8
0
    def calculateMapHash(self, mapURI, storedHashDataType):
        calculatedHash = hashes.new(storedHashDataType)

        calculatedHash.update(self.readSegment(mapURI, "map"))
        calculatedHash.update(self.readSegment(mapURI, "idx"))

        try:
            calculatedHash.update(self.readSegment(mapURI, "mapPath"))
        except:
            pass

        return hashes.newImmutableHash(calculatedHash.hexdigest(),
                                       storedHashDataType)
Ejemplo n.º 9
0
    def readBlockHash(self, chunk_id, hash_datatype):
        bevy_id = old_div(chunk_id, self.chunks_per_segment)
        bevy_blockHash_urn = self._get_block_hash_urn(bevy_id, hash_datatype)
        blockLength = hashes.length(hash_datatype)

        with self.resolver.AFF4FactoryOpen(
                bevy_blockHash_urn) as bevy_blockHashes:
            idx = chunk_id * blockLength

            bevy_blockHashes.SeekRead(idx)
            hash_value = bevy_blockHashes.Read(blockLength)

            return hashes.newImmutableHash(binascii.hexlify(hash_value),
                                           hash_datatype)
Ejemplo n.º 10
0
def addPathNames(container_name, pathnames, recursive, append, hashbased):
    with data_store.MemoryDataStore() as resolver:
        container_urn = rdfvalue.URN.FromFileName(container_name)
        urn = None

        if append == False:
            volume = container.Container.createURN(resolver, container_urn)
            print("Creating AFF4Container: file://%s <%s>" % (container_name, volume.urn))
        else:
            volume = container.Container.openURNtoContainer(container_urn, mode="+", resolver=resolver)
            print("Appending to AFF4Container: file://%s <%s>" % (container_name, volume.urn))

        with volume as volume:
            for pathname in pathnames:
                if not os.path.exists(pathname):
                    print("Path %s not found. Skipping.")
                    continue
                pathname = utils.SmartUnicode(pathname)
                print ("\tAdding: %s" % pathname)
                fsmeta = logical.FSMetadata.create(pathname)
                if os.path.isdir(pathname):
                    image_urn = None
                    if volume.isAFF4Collision(pathname):
                        image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4())
                    else:
                        image_urn = volume.urn.Append(escaping.arnPathFragment_from_path(pathname), quote=False)

                    fsmeta.urn = image_urn
                    fsmeta.store(resolver)
                    resolver.Set(volume.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(pathname))
                    resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FolderImage))
                    resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image))
                    if recursive:
                        for child in os.listdir(pathname):
                            pathnames.append(os.path.join(pathname, child))
                else:
                    with open(pathname, "rb") as src:
                        hasher = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5])
                        if hashbased == False:
                            urn = volume.writeLogicalStream(pathname, hasher, fsmeta.length)
                        else:
                            urn = volume.writeLogicalStreamRabinHashBased(pathname, hasher, fsmeta.length)
                        fsmeta.urn = urn
                        fsmeta.store(resolver)
                        for h in hasher.hashes:
                            hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h])
                            resolver.Add(urn, urn, rdfvalue.URN(lexicon.standard.hash), hh)
        return urn
Ejemplo n.º 11
0
    def readBlockHash(self, chunk_id, hash_datatype):
        bevy_id = old_div(chunk_id, self.chunks_per_segment)
        bevy_blockHash_urn = self.urn.Append(
            "%08d.blockHash.%s" %
            (bevy_id, hashes.toShortAlgoName(hash_datatype)))
        blockLength = hashes.length(hash_datatype)

        with self.resolver.AFF4FactoryOpen(
                bevy_blockHash_urn) as bevy_blockHashes:
            idx = chunk_id * blockLength

            bevy_blockHashes.Seek(idx)
            hash_value = bevy_blockHashes.Read(blockLength)

            return hashes.newImmutableHash(hash_value.encode('hex'),
                                           hash_datatype)
Ejemplo n.º 12
0
    def doHash(self, mapURI, hashDataType):
        hash = hashes.new(hashDataType)
        if self.isMap(mapURI):
            with self.resolver.AFF4FactoryOpen(mapURI) as mapStream:
                remaining = mapStream.Size()
                count = 0
                while remaining > 0:
                    toRead = min(32 * 1024, remaining)
                    data = mapStream.Read(toRead)
                    assert len(data) == toRead
                    remaining -= len(data)
                    hash.update(data)
                    count = count + 1

                b = hash.hexdigest()
                return hashes.newImmutableHash(b, hashDataType)
        raise Exception("IllegalState")
Ejemplo n.º 13
0
    def testFuzz(self):
        chunksize = 512
        for length in [
                chunksize - 1, chunksize, chunksize + 1, chunksize * 2 - 1,
                chunksize * 2, chunksize * 2 + 1, chunksize * 1000, 0
        ]:
            for maxSegmentResidentSize in [
                    0, 1, chunksize - 1, chunksize, chunksize + 1
            ]:
                try:
                    containerName = tempfile.gettempdir(
                    ) + "/testfuzz-length-%d-maxresident%d.aff4" % (
                        length, maxSegmentResidentSize)
                    print(containerName)
                    hasher = linear_hasher.PushHasher(
                        [lexicon.HASH_SHA1, lexicon.HASH_MD5])

                    container_urn = rdfvalue.URN.FromFileName(containerName)
                    with data_store.MemoryDataStore() as resolver:
                        with container.Container.createURN(
                                resolver, container_urn) as volume:
                            volume.maxSegmentResidentSize = maxSegmentResidentSize
                            with volume.newLogicalStream("/foo",
                                                         length) as writer:
                                with open("/dev/random", "rb") as randomStream:
                                    writer.chunk_size = chunksize
                                    writer_arn = writer.urn

                                    pos = 0
                                    while pos < length:
                                        toread = int(
                                            min(
                                                math.ceil(1024 *
                                                          random.random()),
                                                length - pos))
                                        data = randomStream.read(toread)
                                        writer.Write(data)
                                        hasher.update(data)
                                        pos += toread

                            # write in the hashes before auto-close
                            for h in hasher.hashes:
                                hh = hashes.newImmutableHash(
                                    h.hexdigest(), hasher.hashToType[h])
                                volume.resolver.Add(
                                    volume.urn, writer_arn,
                                    rdfvalue.URN(lexicon.standard.hash), hh)
                            print()

                    with container.Container.openURNtoContainer(
                            container_urn) as volume:
                        images = list(volume.images())
                        self.assertEqual(1, len(images),
                                         "Only one logical image")
                        self.assertEqual(
                            "/foo", images[0].name(),
                            "unicode filename should be preserved")

                        fragment = escaping.member_name_for_urn(
                            images[0].urn.value,
                            volume.version,
                            base_urn=volume.urn,
                            use_unicode=True)

                        hasher = linear_hasher.LinearHasher2(
                            volume.resolver, self)
                        for image in volume.images():
                            hasher.hash(image)

                    os.unlink(containerName)
                except Exception:
                    traceback.print_exc()
                    self.fail()
                    continue