Example #1
0
    def calculateBlockMapHash(self, mapStreamURI, imageStreamURI,
                              storedHashDataType):

        storedBlockHashesHash = self.getStoredBlockHashes(str(imageStreamURI))

        storedBlockHashesHash = sorted(storedBlockHashesHash,
                                       cmp=hashLengthComparator)

        calculatedHash = hashes.new(storedHashDataType)
        for hash in storedBlockHashesHash:
            bytes = hash.digest()
            calculatedHash.update(bytes)

        bytes = self.resolver.QuerySubjectPredicate(
            mapStreamURI, self.lexicon.mapPointHash).next().digest()
        calculatedHash.update(bytes)

        bytes = self.resolver.QuerySubjectPredicate(
            mapStreamURI, self.lexicon.mapIdxHash).next().digest()
        # bytes = self.calculateMapIdxHash(mapStreamURI).digest()
        calculatedHash.update(bytes)

        try:
            bytes = self.resolver.QuerySubjectPredicate(
                mapStreamURI, self.lexicon.mapPathHash).next().digest()
            # bytes = self.calculateMapPathHash(mapStreamURI).digest()
            calculatedHash.update(bytes)
        except:
            pass

        return hashes.newImmutableHash(calculatedHash.hexdigest(),
                                       storedHashDataType)
Example #2
0
    def calculateBlockMapHash(self, mapStreamURI, imageStreamURI,
                              storedHashDataType):
        storedBlockHashesHash = sorted(
            self.getStoredBlockHashes(str(imageStreamURI)),
            key=lambda x: hashOrderingMap[x.blockHashAlgo])

        calculatedHash = hashes.new(storedHashDataType)
        for hash in storedBlockHashesHash:
            bytes = hash.digest()
            calculatedHash.update(bytes)

        for hash in self.resolver.QuerySubjectPredicate(
                self.volume_arn, mapStreamURI, self.lexicon.mapPointHash):
            calculatedHash.update(hash.digest())

        for hash in self.resolver.QuerySubjectPredicate(
                self.volume_arn, mapStreamURI, self.lexicon.mapIdxHash):
            calculatedHash.update(hash.digest())

        for hash in self.resolver.QuerySubjectPredicate(
                self.volume_arn, mapStreamURI, self.lexicon.mapPathHash):
            calculatedHash.update(hash.digest())

        return hashes.newImmutableHash(calculatedHash.hexdigest(),
                                       storedHashDataType)
Example #3
0
    def testReadMapOfImage(self):
        fileSize = 629087

        # take the lexicon from our new container
        (version, lex) = Container.identify(self.fileName)

        # setup a resolver
        resolver = data_store.MemoryDataStore(lex)

        # open the two containers within the same resolver (needed so the transitive links work)
        with zip.ZipFile.NewZipFile(resolver, version,
                                    rdfvalue.URN.FromFileName(
                                        self.stdLinear)) as targetContainer:
            with zip.ZipFile.NewZipFile(
                    resolver, version, rdfvalue.URN.FromFileName(
                        self.fileName)) as sourceContainer:

                # open the virtual file and read
                image_urn = sourceContainer.urn.Append("pdf1")
                with resolver.AFF4FactoryOpen(image_urn) as image:
                    # check the size is right
                    self.assertEquals(629087, image.Size())

                    # read the header of the virtual file
                    image.SeekRead(0, 0)
                    self.assertEquals(b"%PDF", image.Read(4))

                    # read the whole virtual file and compare with a known hash of it
                    image.SeekRead(0, 0)
                    buf = image.Read(629087)
                    hash = hashes.new(lexicon.HASH_SHA1)
                    hash.update(buf)
                    self.assertEquals(
                        "5A2FEE16139C7B017B7F1961D842D355A860C7AC".lower(),
                        hash.hexdigest())
Example #4
0
    def calculateBlockHashesHash(self, imageStreamURI):
        hash = self.getStoredBlockHashes(imageStreamURI)

        with self.resolver.AFF4FactoryOpen(imageStreamURI) as imageStream:

            calculatedBlockHashes = []
            for h in hash:
                calculatedBlockHashes.append(hashes.new(h.hashDataType))

            offset = 0
            while offset < imageStream.size:
                imageStream.seek(offset)
                block = imageStream.Read(imageStream.chunk_size)

                for i in range(len(hash)):
                    calculatedBlockHashesHash = calculatedBlockHashes[i]
                    hashDataType = hash[i].blockHashAlgo

                    # verify the block hash
                    h = hashes.new(hashDataType)
                    h.update(block)
                    calculatedBlockHash = h.hexdigest()

                    chunkIdx = old_div(offset, imageStream.chunk_size)
                    storedBlockHash = imageStream.readBlockHash(
                        chunkIdx, hashDataType)
                    if calculatedBlockHash != storedBlockHash.value:
                        self.listener.onInvalidBlockHash(
                            calculatedBlockHash, storedBlockHash.value,
                            imageStreamURI, offset)
                    else:
                        self.listener.onValidBlockHash(calculatedBlockHash)

                    calculatedBlockHashesHash.update(h.digest())

                offset = offset + imageStream.chunk_size

        # we now have the block hashes hash calculated
        res = []
        for i in range(len(hash)):
            a = hash[i].blockHashAlgo
            b = calculatedBlockHashes[i].hexdigest()
            c = hash[i].hashDataType
            blockHashesHash = BlockHashesHash(a, b, c)
            res.append(blockHashesHash)

        return res
Example #5
0
    def calculateSegmentHash(self, parentURI, subSegment, hashDataType):
        calculatedHash = hashes.new(hashDataType)

        data = self.readSegment(parentURI, subSegment)
        if data != None:
            calculatedHash.update(data)
            b = calculatedHash.hexdigest()
            return hashes.newImmutableHash(b, hashDataType)
        else:
            raise Exception
Example #6
0
    def calculateMapHash(self, mapURI, storedHashDataType):
        calculatedHash = hashes.new(storedHashDataType)

        calculatedHash.update(self.readSegment(mapURI, "map"))
        calculatedHash.update(self.readSegment(mapURI, "idx"))

        try:
            calculatedHash.update(self.readSegment(mapURI, "mapPath"))
        except:
            pass

        return hashes.newImmutableHash(calculatedHash.hexdigest(),
                                       storedHashDataType)
Example #7
0
    def doHash(self, mapURI, hashDataType):
        hash = hashes.new(hashDataType)
        if self.isMap(mapURI):
            with self.resolver.AFF4FactoryOpen(mapURI) as mapStream:
                remaining = mapStream.Size()
                count = 0
                while remaining > 0:
                    toRead = min(32 * 1024, remaining)
                    data = mapStream.Read(toRead)
                    assert len(data) == toRead
                    remaining -= len(data)
                    hash.update(data)
                    count = count + 1

                b = hash.hexdigest()
                return hashes.newImmutableHash(b, hashDataType)
        raise Exception("IllegalState")
Example #8
0
    def doValidateContainer(self):
        image = next(
            self.resolver.QueryPredicateObject(lexicon.AFF4_TYPE,
                                               self.lexicon.Image))
        datastreams = list(
            self.resolver.QuerySubjectPredicate(image,
                                                self.lexicon.dataStream))

        calculatedHashes = {}

        for stream in datastreams:
            if self.isMap(stream):
                for imageStreamURI in self.resolver.QuerySubjectPredicate(
                        stream, self.lexicon.dependentStream):
                    parentMap = self.getParentMap(imageStreamURI)
                    if parentMap == stream:
                        # only validate the map and stream pair in the same container
                        self.validateBlockHashesHash(imageStreamURI)
                        self.validateMapIdxHash(parentMap)
                        self.validateMapPointHash(parentMap)
                        self.validateMapPathHash(parentMap)
                        self.validateMapHash(parentMap)

                        calculatedHash = self.validateBlockMapHash(
                            parentMap, imageStreamURI)
                        calculatedHashes[parentMap] = calculatedHash

        storedHash = next(
            self.resolver.QuerySubjectPredicate(image, self.lexicon.hash))

        hasha = ""
        hashb = ""
        parentmap = None

        # TODO: handle more cleanlythe sematic difference between datatypes
        if len(list(calculatedHashes.keys())) == 1:
            # This is a single part image
            # The single AFF4 hash is just the blockMapHash

            parentMap = list(calculatedHashes.keys())[0]
            calculatedHash = calculatedHashes[parentMap]

            hasha = storedHash
            hashb = calculatedHash

        else:
            # This is a multiple part image
            # The single AFF4 hash is one layer up in the Merkel tree again, with the
            # subordinate nodes being the blockMapHashes for the map stored in each container volume

            # The hash algorithm we use for the single AFF4 hash is the same algorithm we
            # use for all of the Merkel tree inner nodes
            firstCalculatedHash = calculatedHashes[list(
                calculatedHashes.keys())[0]]
            currentHash = hashes.new(firstCalculatedHash.datatype)

            # We rely on the natural ordering of the map URN's as they are stored in the map
            # to order the blockMapHashes in the Merkel tree.
            for parentMap in list(calculatedHashes.keys()):
                calculatedHash = calculatedHashes[parentMap]
                bytes = calculatedHash.digest()
                currentHash.update(bytes)

            hasha = storedHash.value
            hashb = currentHash.hexdigest()

        if hasha != hashb:
            self.listener.onInvalidHash("AFF4Hash", hasha, hashb, parentMap)
        else:
            self.listener.onValidHash("AFF4Hash", hasha, parentMap)
Example #9
0
    def doValidateContainer(self):
        # FIXME: This should further restrict by container URN since
        # the same data store may be used for multiple containers with
        # many images.
        for image in self.resolver.QueryPredicateObject(
                self.volume_arn, lexicon.AFF4_TYPE, self.lexicon.Image):

            datastreams = list(
                self.resolver.QuerySubjectPredicate(self.volume_arn, image,
                                                    self.lexicon.dataStream))

            calculated_hashes = collections.OrderedDict()
            hash_datatype = None

            for stream in datastreams:
                if self.isMap(stream):
                    for image_stream_uri in self.resolver.QuerySubjectPredicate(
                            self.volume_arn, stream,
                            self.lexicon.dependentStream):
                        parent_map = self.getParentMap(image_stream_uri)
                        if parent_map == stream:
                            # only validate the map and stream pair in the same container
                            self.validateBlockHashesHash(image_stream_uri)
                            self.validateMapIdxHash(parent_map)
                            self.validateMapPointHash(parent_map)
                            self.validateMapPathHash(parent_map)
                            self.validateMapHash(parent_map)

                            calculated_hash = self.validateBlockMapHash(
                                parent_map, image_stream_uri)
                            calculated_hashes[parent_map] = calculated_hash

                            # Assume all block hashes are the same type.
                            if (hash_datatype is not None and
                                    hash_datatype != calculated_hash.datatype):
                                raise AttributeError(
                                    "Block hashes are not all the same type.")
                            else:
                                hash_datatype = calculated_hash.datatype

            for stored_hash in self.resolver.QuerySubjectPredicate(
                    self.volume_arn, image, self.lexicon.hash):
                hasha = ""
                hashb = ""
                parent_map = None

                # TODO: handle more cleanly the sematic difference between datatypes
                if len(calculated_hashes) == 1:
                    # This is a single part image
                    # The single AFF4 hash is just the blockMapHash
                    parent_map, calculated_hash = calculated_hashes.popitem()
                    hasha = stored_hash
                    hashb = calculated_hash

                else:
                    # This is a multiple part image The single AFF4
                    # hash is one layer up in the Merkel tree again,
                    # with the subordinate nodes being the
                    # blockMapHashes for the map stored in each
                    # container volume

                    # The hash algorithm we use for the single AFF4
                    # hash is the same algorithm we use for all of the
                    # Merkel tree inner nodes
                    current_hash = hashes.new(hash_datatype)

                    # The canonical striped images and Evimetry rely on the natural ordering
                    # (string comparison) of the map URN's
                    # as they are stored in the map to order the
                    # blockMapHashes in the Merkel tree.
                    #
                    # For example for a striped image composed of two containers, we would have one map per
                    # container. c1  -- >  aff4://363ac10c-8d8d-4905-ac25-a14aaddd8a41
                    #            c2  -->   aff4://2dd04819-73c8-40e3-a32b-fdddb0317eac
                    # At this level of the merkel tree, we order the concatenated hashes based on
                    # the map URI, so we would calculate the hash from c2 then c1
                    # TODO: update the specification to reflect this rule

                    for parent_map, calculated_hash in sorted(
                            calculated_hashes.items()):
                        current_hash.update(calculated_hash.digest())

                    hasha = stored_hash.value
                    hashb = current_hash.hexdigest()

                if hasha != hashb:
                    self.listener.onInvalidHash("AFF4Hash", hasha, hashb,
                                                parent_map)
                else:
                    self.listener.onValidHash("AFF4Hash", hasha, parent_map)
Example #10
0
    def writeLogicalStreamHashBased(self, filename, readstream, length, check_bytes=False):
        logical_file_id = None
        if self.isAFF4Collision(filename):
            logical_file_id = rdfvalue.URN("aff4://%s" % uuid.uuid4())
        else:
            logical_file_id = self.urn.Append(escaping.arnPathFragment_from_path(filename), quote=False)

        chunk_size = self.block_store_stream.chunk_size

        with aff4_map.AFF4Map.NewAFF4Map(
                self.resolver, logical_file_id, self.urn) as logical_file_map:
            file_offset = 0
            while file_offset < length:
                toread = min(length-file_offset, chunk_size)
                chunk = readstream.read(toread)

                # pad the chunk to chunksize if it is small
                read_chunk_size = len(chunk)
                if read_chunk_size < chunk_size:
                    chunk = chunk + b"\x00" * (chunk_size - read_chunk_size)

                h = hashes.new(lexicon.HASH_SHA512)
                h.update(chunk)
                # we use RFC rfc4648
                hashid = rdfvalue.URN("aff4:sha512:" + base64.urlsafe_b64encode(h.digest()).decode())

                # check if this hash is in the container already
                existing_bytestream_reference_id =  self.resolver.GetUnique(lexicon.any, hashid, rdfvalue.URN(lexicon.standard.dataStream))

                if existing_bytestream_reference_id == None:
                    block_stream_address = self.block_store_stream.TellWrite()
                    self.block_store_stream.Write(chunk)

                    chunk_reference_id = self.block_store_stream.urn.SerializeToString() + "[0x%x:0x%x]" % (block_stream_address, chunk_size)
                    chunk_reference_id = rdfvalue.URN(chunk_reference_id)
                    self.resolver.Add(self.urn, hashid, rdfvalue.URN(lexicon.standard.dataStream), chunk_reference_id)

                    logical_file_map.AddRange(file_offset, 0, toread, hashid)
                    #print("[%x, %x] -> %s -> %s" % (file_offset, toread, hashid, chunk_reference_id))
                else:
                    if check_bytes:
                        with self.resolver.AFF4FactoryOpen(existing_bytestream_reference_id) as existing_chunk_stream:
                            existing_chunk_length = existing_chunk_stream.length
                            existing_chunk = existing_chunk_stream.Read(existing_chunk_length)

                            if chunk != existing_chunk:
                                # we hit the jackpot and found a hash collision
                                # in this highly unlikely event, we store the new bytes using regular logical
                                # imaging. To record the collision, we add the colliding stream as a property
                                print("!!!Collision found for hash %s" % hashid)
                                block_stream_address = self.block_store_stream.TellWrite()
                                self.block_store_stream.Write(chunk)

                                chunk_reference_id = self.block_store_stream.urn.SerializeToString() + "[0x%x:0x%x]" % (
                                block_stream_address, chunk_size)
                                chunk_reference_id = rdfvalue.URN(chunk_reference_id)
                                logical_file_map.AddRange(file_offset, block_stream_address, chunk_size, self.block_store_stream.urn)

                                self.resolver.Add(self.urn, hashid, rdfvalue.URN(lexicon.standard11.collidingDataStream),
                                                  chunk_reference_id)
                            else:
                                logical_file_map.AddRange(file_offset, 0, toread, hashid)
                    else:
                        logical_file_map.AddRange(file_offset, 0, toread, hashid)
                    #print("[%x, %x] -> %s -> %s" % (file_offset, toread, hashid, existing_bytestream_reference_id))

                file_offset += toread

        logical_file_map.Close()

        self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FileImage))
        self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image))
        self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(filename))
        return logical_file_id
Example #11
0
    def writeLogicalStreamRabinHashBased(self, filename, readstream, length, check_bytes=False):
        logical_file_id = None
        if self.isAFF4Collision(filename):
            logical_file_id = rdfvalue.URN("aff4://%s" % uuid.uuid4())
        else:
            logical_file_id = self.urn.Append(escaping.arnPathFragment_from_path(filename), quote=False)

        chunk_size = 32*1024
        cdc = fastchunking.RabinKarpCDC(window_size=48, seed=0)
        chunker = cdc.create_chunker(chunk_size=4096)


        with aff4_map.AFF4Map.NewAFF4Map(
                self.resolver, logical_file_id, self.urn) as logical_file_map:
            file_offset = 0
            lastbuffer = None
            lastoffset = 0
            chunk_offset = 0
            while file_offset < length:
                toread = min(length-file_offset, chunk_size)
                buffer = readstream.read(toread)

                foundBoundaries = False
                for boundary in chunker.next_chunk_boundaries(buffer):
                    foundBoundaries = True

                    if lastbuffer != None:
                        l = len(lastbuffer)
                        chunk = lastbuffer[lastoffset:]
                        chunk_offset = file_offset - len(chunk)
                        chunk = chunk + buffer[:boundary]
                        lastbuffer = None
                    else:
                        chunk = buffer[lastoffset:boundary]
                        chunk_offset = file_offset + lastoffset

                    h = hashes.new(lexicon.HASH_SHA512)
                    h.update(chunk)

                    self.preserveChunk(logical_file_map, chunk, chunk_offset, h, check_bytes)

                    lastoffset = boundary

                if not foundBoundaries:
                    if lastbuffer != None:
                        lastbuffer = lastbuffer + buffer
                    else:
                        lastbuffer = buffer
                else:
                    lastbuffer = buffer
                file_offset += toread


            if lastbuffer != None and lastoffset < len(lastbuffer):
                chunk = lastbuffer[lastoffset:]
                chunk_offset = file_offset - len(chunk)
                h = hashes.new(lexicon.HASH_SHA512)
                h.update(chunk)
                self.preserveChunk(logical_file_map, chunk, chunk_offset, h, check_bytes)

        logical_file_map.Close()

        self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FileImage))
        self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image))
        self.resolver.Add(self.urn, logical_file_id, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(filename))
        return logical_file_id
Example #12
0
    def doValidateContainer(self):
        # FIXME: This should further restrict by container URN since
        # the same data store may be used for multiple containers with
        # many images.
        for image in self.resolver.QueryPredicateObject(
                lexicon.AFF4_TYPE, self.lexicon.Image):

            datastreams = list(
                self.resolver.QuerySubjectPredicate(image,
                                                    self.lexicon.dataStream))

            calculated_hashes = collections.OrderedDict()
            hash_datatype = None

            for stream in datastreams:
                if self.isMap(stream):
                    for image_stream_uri in self.resolver.QuerySubjectPredicate(
                            stream, self.lexicon.dependentStream):
                        parent_map = self.getParentMap(image_stream_uri)
                        if parent_map == stream:
                            # only validate the map and stream pair in the same container
                            self.validateBlockHashesHash(image_stream_uri)
                            self.validateMapIdxHash(parent_map)
                            self.validateMapPointHash(parent_map)
                            self.validateMapPathHash(parent_map)
                            self.validateMapHash(parent_map)

                            calculated_hash = self.validateBlockMapHash(
                                parent_map, image_stream_uri)
                            calculated_hashes[parent_map] = calculated_hash

                            # Assume all block hashes are the same type.
                            if (hash_datatype is not None and
                                    hash_datatype != calculated_hash.datatype):
                                raise AttributeError(
                                    "Block hashes are not all the same type.")
                            else:
                                hash_datatype = calculated_hash.datatype

            for stored_hash in self.resolver.QuerySubjectPredicate(
                    image, self.lexicon.hash):
                hasha = ""
                hashb = ""
                parent_map = None

                # TODO: handle more cleanly the sematic difference between datatypes
                if len(calculated_hashes) == 1:
                    # This is a single part image
                    # The single AFF4 hash is just the blockMapHash
                    parent_map, calculated_hash = calculated_hashes.popitem()
                    hasha = stored_hash
                    hashb = calculated_hash

                else:
                    # This is a multiple part image The single AFF4
                    # hash is one layer up in the Merkel tree again,
                    # with the subordinate nodes being the
                    # blockMapHashes for the map stored in each
                    # container volume

                    # The hash algorithm we use for the single AFF4
                    # hash is the same algorithm we use for all of the
                    # Merkel tree inner nodes
                    current_hash = hashes.new(hash_datatype)

                    # FIXME: This is a flaw in the scheme since there
                    # is no reasonable order specified. We temporarily
                    # sort the results to get the test to pass but
                    # this needs to be properly addressed.

                    # We rely on the natural ordering of the map URN's
                    # as they are stored in the map to order the
                    # blockMapHashes in the Merkel tree.
                    for parent_map, calculated_hash in sorted(
                            calculated_hashes.items()):
                        current_hash.update(calculated_hash.digest())

                    hasha = stored_hash.value
                    hashb = current_hash.hexdigest()

                if hasha != hashb:
                    self.listener.onInvalidHash("AFF4Hash", hasha, hashb,
                                                parent_map)
                else:
                    self.listener.onValidHash("AFF4Hash", hasha, parent_map)