def LoadFromTurtle(self, stream, volume_arn): data = streams.ReadAll(stream) g = rdflib.Graph() g.parse(data=data, format="turtle") for urn, attr, value in g: urn = utils.SmartUnicode(urn) attr = utils.SmartUnicode(attr) serialized_value = value if isinstance(value, rdflib.URIRef): value = rdfvalue.URN(utils.SmartUnicode(serialized_value)) elif value.datatype in registry.RDF_TYPE_MAP: dt = value.datatype value = registry.RDF_TYPE_MAP[value.datatype]( serialized_value) else: # Default to a string literal. value = rdfvalue.XSDString(value) if attr == rdfvalue.URN(lexicon.AFF4_TYPE) and value == rdfvalue.URN(lexicon.AFF4_IMAGE_TYPE): self.Add(lexicon.transient_graph, urn, lexicon.AFF4_STORED, volume_arn) self.Add(volume_arn, urn, attr, value) # look for the AFF4 namespace defined in the turtle for (_, b) in g.namespace_manager.namespaces(): if (str(b) == lexicon.AFF4_NAMESPACE or str(b) == lexicon.AFF4_LEGACY_NAMESPACE): self.aff4NS = b
def DumpToTurtle(self, stream=None, verbose=False): g = rdflib.Graph() for urn, items in self.store.items(): urn = rdflib.URIRef(utils.SmartUnicode(urn)) type = items.get(utils.SmartStr(lexicon.AFF4_TYPE)) if type is None: continue for attr, value in list(items.items()): attr = utils.SmartUnicode(attr) # We suppress certain facts which can be deduced from the file # format itself. This ensures that we do not have conflicting # data in the data store. The data in the data store is a # combination of explicit facts and implied facts. if not verbose: if attr.startswith(lexicon.AFF4_VOLATILE_NAMESPACE): continue if attr in self.suppressed_rdftypes.get(type, ()): continue attr = rdflib.URIRef(attr) if not isinstance(value, list): value = [value] for item in value: g.add((urn, attr, item.GetRaptorTerm())) result = g.serialize(format='turtle') if stream: stream.write(result) return result
def Put(self, aff4_obj, in_use_state=False): if type(aff4_obj) == aff4_map.ByteRangeARN: return key = aff4_obj.urn.SerializeToString() #LOGGER.debug("Putting %s in cache" % key) CHECK( key not in self.in_use, u"Object %s Put in cache while already in use." % utils.SmartUnicode(key)) CHECK( key not in self.lru_map, u"Object %s Put in cache while already in cache." % utils.SmartUnicode(key)) entry = AFF4ObjectCacheEntry(key, aff4_obj) if in_use_state: entry.use_count = 1 self.in_use[key] = entry return self.lru_list.append(entry) self.lru_map[key] = entry self._Trim()
def QuerySubjectPredicate(self, graph, subject, predicate): if isinstance(subject, rdfvalue.URN): subject = subject.SerializeToString() else: subject = utils.SmartUnicode(subject) if isinstance(predicate, rdfvalue.URN): predicate = predicate.SerializeToString() else: predicate = utils.SmartUnicode(predicate) if graph == lexicon.any or graph == None: for val in self.QuerySubjectPredicateInternal( self.transient_store, subject, predicate): yield val for val in self.QuerySubjectPredicateInternal( self.store, subject, predicate): yield val elif graph == transient_graph: for val in self.QuerySubjectPredicateInternal( self.transient_store, subject, predicate): yield val else: for val in self.QuerySubjectPredicateInternal( self.store, subject, predicate): yield val
def LoadFromURN(self): self.storage = self.resolver.Get(self.urn, lexicon.AFF4_STORED) if not self.storage: LOGGER.error("Unable to find storage for AFF4Directory %s", self.urn) raise IOError("NOT_FOUND") # The actual filename for the root directory. self.root_path = self.storage.ToFilename() try: # We need to get the URN of the container before we can process # anything. with self.resolver.AFF4FactoryOpen( self.storage.Append( lexicon.AFF4_CONTAINER_DESCRIPTION)) as desc: if desc: urn_string = utils.SmartUnicode(desc.Read(1000)) if (urn_string and self.urn.SerializeToString() != urn_string): self.resolver.DeleteSubject(self.urn) self.urn.Set(urn_string) # Set these triples with the new URN so we know how to open # it. self.resolver.Set( self.urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_DIRECTORY_TYPE)) self.resolver.Set(self.urn, lexicon.AFF4_STORED, rdfvalue.URN(self.storage)) LOGGER.info("AFF4Directory volume found: %s", self.urn) # Try to load the RDF metadata file from the storage. with self.resolver.AFF4FactoryOpen( self.storage.Append( lexicon.AFF4_CONTAINER_INFO_TURTLE)) as turtle_stream: if turtle_stream: self.resolver.LoadFromTurtle(turtle_stream) # Find all the contained objects and adjust their filenames. for subject in self.resolver.SelectSubjectsByPrefix( utils.SmartUnicode(self.urn)): child_filename = self.resolver.Get( subject, lexicon.AFF4_DIRECTORY_CHILD_FILENAME) if child_filename: self.resolver.Set( subject, lexicon.AFF4_FILE_NAME, rdfvalue.XSDString( "%s%s%s" % (self.root_path, os.sep, child_filename))) except IOError: pass
def Dump(self): # Now dump the objects in use. print("Objects in use:") for key, entry in list(self.in_use.items()): print(u"%s - %s" % (utils.SmartUnicode(key), entry.use_count)) print("Objects in cache:") for entry in self.lru_list: print(u"%s - %s" % (utils.SmartUnicode(entry.key), entry.use_count))
def trimVolume(volume, image): global TERSE if TERSE: volstring = utils.SmartUnicode(volume) imagestring = utils.SmartUnicode(image) if imagestring.startswith(volstring): imagestring = imagestring[len(volstring):] return imagestring else: return image
def LoadFromURN(self): map_urn = self.urn.Append("map") map_idx_urn = self.urn.Append("idx") # Parse the map out of the map stream. If the stream does not exist yet # we just start with an empty map. try: with self.resolver.AFF4FactoryOpen(map_idx_urn) as map_idx: self.targets = [ rdfvalue.URN(utils.SmartUnicode(x)) for x in map_idx.Read(map_idx.Size()).splitlines() ] with self.resolver.AFF4FactoryOpen(map_urn) as map_stream: read_length = struct.calcsize(Range.format_str) while 1: data = map_stream.Read(read_length) if not data: break range = self.deserializeMapPoint(data) if range.length > 0: self.tree.addi(range.map_offset, range.map_end, range) except IOError: pass
def extractAll(container_name, destFolder): container_urn = rdfvalue.URN.FromFileName(container_name) urn = None with container.Container.openURNtoContainer(container_urn) as volume: printVolumeInfo(file, volume) resolver = volume.resolver for imageUrn in resolver.QueryPredicateObject(volume.urn, lexicon.AFF4_TYPE, lexicon.standard11.FileImage): imageUrn = utils.SmartUnicode(imageUrn) pathName = next(resolver.QuerySubjectPredicate(volume.urn, imageUrn, lexicon.standard11.pathName)).value if pathName.startswith("/"): pathName = "." + pathName with resolver.AFF4FactoryOpen(imageUrn) as srcStream: if destFolder != "-": destFile = os.path.join(destFolder, pathName) if not os.path.exists(os.path.dirname(destFile)): try: os.makedirs(os.path.dirname(destFile)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise with open(destFile, "w") as destStream: shutil.copyfileobj(srcStream, destStream) print ("\tExtracted %s to %s" % (pathName, destFile)) lastWritten = nextOrNone(resolver.QuerySubjectPredicate(volume.urn, imageUrn, lexicon.standard11.lastWritten)) lastAccessed = nextOrNone(resolver.QuerySubjectPredicate(volume.urn, imageUrn, lexicon.standard11.lastAccessed)) recordChanged = nextOrNone(resolver.QuerySubjectPredicate(volume.urn, imageUrn, lexicon.standard11.recordChanged)) birthTime = nextOrNone(resolver.QuerySubjectPredicate(volume.urn, imageUrn, lexicon.standard11.birthTime)) logical.resetTimestamps(destFile, lastWritten, lastAccessed, recordChanged, birthTime) else: shutil.copyfileobj(srcStream, sys.stdout)
def extract(container_name, imageURNs, destFolder): with data_store.MemoryDataStore() as resolver: container_urn = rdfvalue.URN.FromFileName(container_name) urn = None with container.Container.openURNtoContainer(container_urn) as volume: printVolumeInfo(file, volume) resolver = volume.resolver for imageUrn in imageURNs: imageUrn = utils.SmartUnicode(imageUrn) pathName = next(resolver.QuerySubjectPredicate(volume.urn, imageUrn, volume.lexicon.pathName)) with resolver.AFF4FactoryOpen(imageUrn) as srcStream: if destFolder != "-": pathName = escaping.arnPathFragment_from_path(pathName.value) while pathName.startswith("/"): pathName = pathName[1:] destFile = os.path.join(destFolder, pathName) if not os.path.exists(os.path.dirname(destFile)): try: os.makedirs(os.path.dirname(destFile)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise with open(destFile, "w") as destStream: shutil.copyfileobj(srcStream, destStream, length=32*2014) print ("\tExtracted %s to %s" % (pathName, destFile)) else: shutil.copyfileobj(srcStream, sys.stdout)
def printTurtle(resolver, volume): metadataURN = volume.urn.Append("information.turtle") try: with resolver.AFF4FactoryOpen(metadataURN) as fd: txt = fd.ReadAll() print(utils.SmartUnicode(txt)) except: pass
def QuerySubjectPredicate(self, graph, subject, predicate): for o in super(HDTAssistedDataStore, self).QuerySubjectPredicate(graph, subject, predicate): yield o if self.hdt == None: return if graph == transient_graph: return if isinstance(subject, rdfvalue.URN): subject = subject.SerializeToString() else: subject = utils.SmartUnicode(subject) if isinstance(predicate, rdfvalue.URN): predicate = predicate.SerializeToString() else: predicate = utils.SmartUnicode(predicate) (triples, cardinality) = self.hdt.search_triples(subject, predicate, "") for (s, p, o) in triples: if o.startswith("\""): # it is a literal (v, t) = o.split("^^") v = v.replace("\"", "") t = t[1:len(t) - 1] datatype = rdflib.URIRef(t) if datatype in registry.RDF_TYPE_MAP: o = registry.RDF_TYPE_MAP[datatype](v) else: # Default to a string literal. o = rdfvalue.XSDString(v) elif o.startswith("<"): o = rdfvalue.URN(utils.SmartUnicode(o)) elif o.startswith("aff4://"): o = rdfvalue.URN(utils.SmartUnicode(o)) else: o = rdfvalue.URN(utils.SmartUnicode(o)) yield o
def _DumpToTurtle(self, volumeurn, verbose=False): g = rdflib.Graph() g.bind("aff4", rdflib.Namespace(self.lexicon.base)) # looks like rdflib has some problems with re-constituting subjects using @base # comment out for now #volumeNamespace = rdflib.Namespace(volumeurn.value + "/") #volumeBase = volumeurn.value + "/" for urn, items in self.store.items(): urn = utils.SmartUnicode(urn) type = items.get(utils.SmartUnicode(lexicon.AFF4_TYPE)) # only dump objects and pseudo map entries if type is None: if not urn.startswith(u"aff4:sha512:"): continue for attr, value in list(items.items()): attr = utils.SmartUnicode(attr) # We suppress certain facts which can be deduced from the file # format itself. This ensures that we do not have conflicting # data in the data store. The data in the data store is a # combination of explicit facts and implied facts. if not verbose: if attr.startswith(lexicon.AFF4_VOLATILE_NAMESPACE): continue if not isinstance(value, list): value = [value] for item in value: if self._should_ignore(urn, attr, item): continue g.add((rdflib.URIRef(urn), rdflib.URIRef(attr), item.GetRaptorTerm())) #result = g.serialize(format='turtle', base=volumeNamespace) result = g.serialize(format='turtle') result = utils.SmartUnicode(result) #basestart = "@base <%s> .\r\n" % (volumeBase) #result = basestart + result return result
def QueryPredicatesBySubject(self, graph, subject): subject = utils.SmartUnicode(subject) if graph == transient_graph: store = self.transient_store else: store = self.store for pred, value in list(store.get(subject, {}).items()): yield (rdfvalue.URN().UnSerializeFromString(pred), value)
def testCreateAndAppendSinglePathImage(self): try: try: os.unlink(self.containerName) except: pass container_urn = rdfvalue.URN.FromFileName(self.containerName) resolver = data_store.MemoryDataStore() urn = None frag1path = os.path.join(self.testImagesPath, "paper-hash_based_disk_imaging_using_aff4.pdf.frag.1") with container.Container.createURN(resolver, container_urn) as volume: with open(frag1path, "rb") as src: stream = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1]) urn = volume.writeLogicalStreamHashBased(frag1path, stream, 32768, False) for h in stream.hashes: hh = hashes.newImmutableHash(h.hexdigest(), stream.hashToType[h]) self.assertEqual("deb3fa3b60c6107aceb97f684899387c78587eae", hh.value) resolver.Add(volume.urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) frag2path = os.path.join(self.testImagesPath, "paper-hash_based_disk_imaging_using_aff4.pdf.frag.2") with container.Container.openURNtoContainer(container_urn, mode="+") as volume: with open(frag2path, "rb") as src: stream = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5 ]) urn = volume.writeLogicalStreamHashBased(frag2path, stream, 2*32768, False) for h in stream.hashes: hh = hashes.newImmutableHash(h.hexdigest(), stream.hashToType[h]) resolver.Add(volume.urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) with container.Container.openURNtoContainer(container_urn) as volume: images = list(volume.images()) images = sorted(images, key=lambda x: utils.SmartUnicode(x.pathName), reverse=False) self.assertEqual(2, len(images), "Only two logical images") fragmentA = escaping.member_name_for_urn(images[0].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) fragmentB = escaping.member_name_for_urn(images[1].urn.value, volume.version, base_urn=volume.urn, use_unicode=True) self.assertTrue(fragmentA.endswith("paper-hash_based_disk_imaging_using_aff4.pdf.frag.1")) self.assertTrue(fragmentB.endswith("paper-hash_based_disk_imaging_using_aff4.pdf.frag.2")) hasher = linear_hasher.LinearHasher2(volume.resolver, self) for image in volume.images(): print("\t%s <%s>" % (image.name(), image.urn)) hasher.hash(image) except: traceback.print_exc() self.fail() finally: #os.unlink(containerName) pass
def addPathNamesToVolume(resolver, volume, pathnames, recursive, hashbased): for pathname in pathnames: if not os.path.exists(pathname): print("Path %s not found. Skipping.") continue pathname = utils.SmartUnicode(pathname) print("\tAdding: %s" % pathname) fsmeta = logical.FSMetadata.create(pathname) if os.path.isdir(pathname): image_urn = None if volume.isAFF4Collision(pathname): image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: image_urn = volume.urn.Append( escaping.arnPathFragment_from_path(pathname), quote=False) fsmeta.urn = image_urn fsmeta.store(resolver) resolver.Set(volume.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(pathname)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FolderImage)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) if recursive: for child in os.listdir(pathname): pathnames.append(os.path.join(pathname, child)) else: with open(pathname, "rb") as src: hasher = linear_hasher.StreamHasher( src, [lexicon.HASH_SHA1, lexicon.HASH_MD5, lexicon.HASH_SHA256]) if hashbased == False: urn = volume.writeLogicalStream(pathname, hasher, fsmeta.length) else: urn = volume.writeLogicalStreamRabinHashBased( pathname, hasher, fsmeta.length) fsmeta.urn = urn fsmeta.store(resolver) bc_writer = blockchain.BlockChainWriter.getBlockchainWriter() hash_dict = {} for h in hasher.hashes: hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h]) resolver.Add(urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) hash_dict[h.name] = hh if bc_writer: bc_writer.Set_hash(hash_dict["md5"], hash_dict["sha1"], hash_dict["sha256"])
def SelectSubjectsByPrefix(self, graph, prefix): prefix = utils.SmartUnicode(prefix) if graph == lexicon.any or graph == None: storeitems = chain(six.iteritems(self.store), six.iteritems(self.transient_store)) elif graph == transient_graph: storeitems = six.iteritems(self.transient_store) else: storeitems = six.iteritems(self.store) for subject, predicateDict in storeitems: if subject.startswith(prefix): yield rdfvalue.URN(subject)
def urn_from_member_name(member, base_urn): """Returns a URN object from a zip file's member name.""" member = utils.SmartUnicode(member) # Remove %xx escapes. member = re.sub("%(..)", lambda x: chr(int("0x" + x.group(1), 0)), member) # This is an absolute URN. if urllib.parse.urlparse(member).scheme == "aff4": result = member else: # Relative member becomes relative to the volume's URN. result = base_urn.Append(member, quote=False) return rdfvalue.URN(result)
def addPathNames(container_name, pathnames, recursive, append, hashbased): with data_store.MemoryDataStore() as resolver: container_urn = rdfvalue.URN.FromFileName(container_name) urn = None if append == False: volume = container.Container.createURN(resolver, container_urn) print("Creating AFF4Container: file://%s <%s>" % (container_name, volume.urn)) else: volume = container.Container.openURNtoContainer(container_urn, mode="+", resolver=resolver) print("Appending to AFF4Container: file://%s <%s>" % (container_name, volume.urn)) with volume as volume: for pathname in pathnames: if not os.path.exists(pathname): print("Path %s not found. Skipping.") continue pathname = utils.SmartUnicode(pathname) print ("\tAdding: %s" % pathname) fsmeta = logical.FSMetadata.create(pathname) if os.path.isdir(pathname): image_urn = None if volume.isAFF4Collision(pathname): image_urn = rdfvalue.URN("aff4://%s" % uuid.uuid4()) else: image_urn = volume.urn.Append(escaping.arnPathFragment_from_path(pathname), quote=False) fsmeta.urn = image_urn fsmeta.store(resolver) resolver.Set(volume.urn, image_urn, rdfvalue.URN(lexicon.standard11.pathName), rdfvalue.XSDString(pathname)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard11.FolderImage)) resolver.Add(volume.urn, image_urn, rdfvalue.URN(lexicon.AFF4_TYPE), rdfvalue.URN(lexicon.standard.Image)) if recursive: for child in os.listdir(pathname): pathnames.append(os.path.join(pathname, child)) else: with open(pathname, "rb") as src: hasher = linear_hasher.StreamHasher(src, [lexicon.HASH_SHA1, lexicon.HASH_MD5]) if hashbased == False: urn = volume.writeLogicalStream(pathname, hasher, fsmeta.length) else: urn = volume.writeLogicalStreamRabinHashBased(pathname, hasher, fsmeta.length) fsmeta.urn = urn fsmeta.store(resolver) for h in hasher.hashes: hh = hashes.newImmutableHash(h.hexdigest(), hasher.hashToType[h]) resolver.Add(urn, urn, rdfvalue.URN(lexicon.standard.hash), hh) return urn
def CreateStruct(struct_name, definition): fields = [] format_string = ["<"] defaults = [] for line in definition.splitlines(): line = line.strip(" ;") components = line.split() if len(components) >= 2: type_format_char = format_string_map.get(components[0]) name = components[1] if type_format_char is None: raise RuntimeError("Invalid definition %r" % line) try: if components[2] != "=": raise RuntimeError("Invalid definition %r" % line) defaults.append(int(components[3], 0)) except IndexError: defaults.append(0) format_string.append(type_format_char) fields.append(name) properties = dict(_format_string="".join(format_string), _fields=fields, _defaults=defaults, _name=struct_name) # Make accessors for all fields. for i, field in enumerate(fields): def setx(self, value, i=i): self._data[i] = value def getx(self, i=i): return self._data[i] properties[field] = property(getx, setx) if six.PY2: return type(utils.SmartStr(struct_name), (BaseParser, ), properties) else: return type(utils.SmartUnicode(struct_name), (BaseParser, ), properties)
def QueryPredicateObject(self, graph, predicate, object): predicate = utils.SmartUnicode(predicate) if graph == lexicon.any or graph == None: storeitems = chain(six.iteritems(self.store), six.iteritems(self.transient_store)) elif graph == transient_graph: storeitems = six.iteritems(self.transient_store) else: storeitems = six.iteritems(self.store) for subject, data in list(storeitems): for pred, value in list(data.items()): if pred == predicate: if type(value) != type([]): value = [value] if object in value: yield rdfvalue.URN(subject)
def urn_from_member_name(member, base_urn, version): """Returns a URN object from a zip file's member name.""" member = utils.SmartUnicode(member) if version != pyaff4.version.basic_zip: if version.isLessThanOrEqual(1, 0): # Remove %xx escapes. member = re.sub("%(..)", lambda x: chr(int("0x" + x.group(1), 0)), member) elif version.equals(1, 1): member = member.replace(" ", "%20") # This is an absolute URN. if urllib.parse.urlparse(member).scheme == "aff4": result = member else: # Relative member becomes relative to the volume's URN. result = base_urn.Append(member, quote=False) return rdfvalue.URN(result)
def meta(file): volume = container.Container.openURNtoContainer(rdfvalue.URN.FromFileName(file)) resolver = volume.resolver metadataURN = volume.urn.Append("information.turtle") try: with resolver.AFF4FactoryOpen(metadataURN) as fd: txt = fd.ReadAll() print(utils.SmartUnicode(txt)) except: pass for image in volume.images(): print ("\t%s <%s>" % (image.name(), trimVolume(volume.urn, image.urn))) with resolver.AFF4FactoryOpen(image.urn) as srcStream: if type(srcStream) == aff4_map.AFF4Map2: source_ranges = sorted(srcStream.tree) for n in source_ranges: d = n.data print("\t\t[%x,%x] -> %s[%x,%x]" % (d.map_offset, d.length, srcStream.targets[d.target_id], d.target_offset, d.length))
def parse_cd(self, backing_store_urn): with self.resolver.AFF4FactoryOpen(backing_store_urn) as backing_store: # Find the End of Central Directory Record - We read about 4k of # data and scan for the header from the end, just in case there is # an archive comment appended to the end. backing_store.Seek(-BUFF_SIZE, 2) ecd_real_offset = backing_store.Tell() buffer = backing_store.Read(BUFF_SIZE) end_cd, buffer_offset = EndCentralDirectory.FromBuffer(buffer) urn_string = None ecd_real_offset += buffer_offset # Fetch the volume comment. if end_cd.comment_len > 0: backing_store.Seek(ecd_real_offset + end_cd.sizeof()) urn_string = backing_store.Read(end_cd.comment_len) LOGGER.info("Loaded AFF4 volume URN %s from zip file.", urn_string) #if end_cd.size_of_cd == 0xFFFFFFFF: # end_cd, buffer_offset = Zip64EndCD.FromBuffer(buffer) #LOGGER.info("Found ECD at %#x", ecd_real_offset) # There is a catch 22 here - before we parse the ZipFile we dont # know the Volume's URN, but we need to know the URN so the # AFF4FactoryOpen() can open it. Therefore we start with a random # URN and then create a new ZipFile volume. After parsing the # central directory we discover our URN and therefore we can delete # the old, randomly selected URN. if urn_string and self.urn != urn_string: self.resolver.DeleteSubject(self.urn) self.urn.Set(utils.SmartUnicode(urn_string)) # Set these triples so we know how to open the zip file again. self.resolver.Set(self.urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_ZIP_TYPE)) self.resolver.Set(self.urn, lexicon.AFF4_STORED, rdfvalue.URN(backing_store_urn)) self.resolver.Set(backing_store_urn, lexicon.AFF4_CONTAINS, self.urn) directory_offset = end_cd.offset_of_cd directory_number_of_entries = end_cd.total_entries_in_cd # Traditional zip file - non 64 bit. if directory_offset > 0 and directory_offset != 0xffffffff: # The global difference between the zip file offsets and real # file offsets. This is non zero when the zip file was appended # to another file. self.global_offset = ( # Real ECD offset. ecd_real_offset - end_cd.size_of_cd - # Claimed CD offset. directory_offset) LOGGER.info("Global offset: %#x", self.global_offset) # This is a 64 bit archive, find the Zip64EndCD. else: locator_real_offset = ecd_real_offset - Zip64CDLocator.sizeof() backing_store.Seek(locator_real_offset, 0) locator = Zip64CDLocator( backing_store.Read(Zip64CDLocator.sizeof())) if not locator.IsValid(): raise IOError("Zip64CDLocator invalid or not supported.") # Although it may appear that we can use the Zip64CDLocator to # locate the Zip64EndCD record via it's offset_of_cd record this # is not quite so. If the zip file was appended to another file, # the offset_of_cd field will not be valid, as it still points # to the old offset. In this case we also need to know the # global shift. backing_store.Seek(locator_real_offset - Zip64EndCD.sizeof(), 0) end_cd = Zip64EndCD(backing_store.Read(Zip64EndCD.sizeof())) if not end_cd.IsValid(): LOGGER.error("Zip64EndCD magic not correct @%#x", locator_real_offset - Zip64EndCD.sizeof()) raise RuntimeError("Zip64EndCD magic not correct") directory_offset = end_cd.offset_of_cd directory_number_of_entries = end_cd.number_of_entries_in_volume # The global offset is now known: self.global_offset = ( # Real offset of the central directory. locator_real_offset - Zip64EndCD.sizeof() - end_cd.size_of_cd - # The directory offset in zip file offsets. directory_offset) LOGGER.info("Global offset: %#x", self.global_offset) # Now iterate over the directory and read all the ZipInfo structs. entry_offset = directory_offset for _ in range(directory_number_of_entries): backing_store.Seek(entry_offset + self.global_offset, 0) entry = CDFileHeader(backing_store.Read(CDFileHeader.sizeof())) if not entry.IsValid(): LOGGER.info("CDFileHeader at offset %#x invalid", entry_offset) raise RuntimeError() zip_info = ZipInfo( filename=backing_store.Read(entry.file_name_length), local_header_offset=entry.relative_offset_local_header, compression_method=entry.compression_method, compress_size=entry.compress_size, file_size=entry.file_size, crc32=entry.crc32, lastmoddate=entry.dosdate, lastmodtime=entry.dostime) # Zip64 local header - parse the Zip64 extended information extra field. # This field isnt a struct, its a serialization #if zip_info.local_header_offset < 0 or zip_info.local_header_offset == 0xffffffff: if entry.extra_field_len > 0: extrabuf = backing_store.Read(entry.extra_field_len) extra, readbytes = Zip64FileHeaderExtensibleField.FromBuffer( entry, extrabuf) extrabuf = extrabuf[readbytes:] if extra.header_id == 1: if extra.Get( "relative_offset_local_header") is not None: zip_info.local_header_offset = ( extra.Get("relative_offset_local_header")) if extra.Get("file_size") is not None: zip_info.file_size = extra.Get("file_size") if extra.Get("compress_size") is not None: zip_info.compress_size = extra.Get("compress_size") #break LOGGER.info("Found file %s @ %#x", zip_info.filename, zip_info.local_header_offset) # Store this information in the resolver. Ths allows # segments to be directly opened by URN. member_urn = aff4_utils.urn_from_member_name( zip_info.filename, self.urn) self.resolver.Set(member_urn, lexicon.AFF4_TYPE, rdfvalue.URN(lexicon.AFF4_ZIP_SEGMENT_TYPE)) self.resolver.Set(member_urn, lexicon.AFF4_STORED, self.urn) self.resolver.Set(member_urn, lexicon.AFF4_STREAM_SIZE, rdfvalue.XSDInteger(zip_info.file_size)) self.members[member_urn] = zip_info # Go to the next entry. entry_offset += (entry.sizeof() + entry.file_name_length + entry.extra_field_len + entry.file_comment_length)
def Dump(self, verbose=False): print(utils.SmartUnicode(self.DumpToTurtle(verbose=verbose))) self.ObjectCache.Dump()
def UnSerializeFromString(self, string): self.Set(utils.SmartUnicode(string))
def LoadFromURN(self): map_urn = self.urn.Append("map") map_idx_urn = self.urn.Append("idx") # Parse the map out of the map stream. If the stream does not exist yet # we just start with an empty map. try: with self.resolver.AFF4FactoryOpen(map_idx_urn) as map_idx: self.targets = [ rdfvalue.URN(utils.SmartUnicode(x)) for x in map_idx.Read(map_idx.Size()).splitlines() ] with self.resolver.AFF4FactoryOpen(map_urn) as map_stream: format_str = "<QQQI" bufsize = map_stream.Size() buf = map_stream.Read(bufsize) read_length = struct.calcsize(Range.format_str) lastUpperOffset = -1 lastLowerOffset = -1 lastLength = -1 lastTarget = -1 offset = 0 while offset < bufsize: (upperOffset, length, lowerOffset, target) = struct.unpack_from(format_str, buf, offset) offset += read_length if lastUpperOffset == -1: lastUpperOffset = upperOffset lastLowerOffset = lowerOffset lastLength = length lastTarget = target continue if lastUpperOffset + lastLength == upperOffset and lastLowerOffset + lastLength == lowerOffset and lastTarget == target: # these are adjoining lastLength = lastLength + length continue else: range = Range.FromList([ lastUpperOffset, lastLength, lastLowerOffset, lastTarget ]) if range.length > 0: self.tree.addi(range.map_offset, range.map_end, range) lastUpperOffset = upperOffset lastLowerOffset = lowerOffset lastLength = length lastTarget = target range = Range.FromList( [lastUpperOffset, lastLength, lastLowerOffset, lastTarget]) if range.length > 0: self.tree.addi(range.map_offset, range.map_end, range) except IOError: pass
def Set(self, data): self.value = utils.SmartUnicode(data)
def __lt__(self, other): return self.value < utils.SmartUnicode(other)
def UnSerializeFromString(self, string): utils.AssertStr(string) self.Set(utils.SmartUnicode(string)) return self