def find_by_value(self, agent_id, desc_domain, selector_prefix, value_regex): dlist = self.iface.find_by_value(str(agent_id), desc_domain, selector_prefix, value_regex) return [ Descriptor.unserialize(serializer, str(s), bus=self) for s in dlist ]
def push(self, agent_id, serialized_descriptor): descriptor = Descriptor.unserialize(serializer, str(serialized_descriptor)) desc_domain = str(descriptor.domain) uuid = str(descriptor.uuid) selector = str(descriptor.selector) # ensure processing terminates if not format_check.processing_depth(self.store, descriptor): log.warning( "Refusing descriptor %s:%s received from %s: loop or " ">2 ancestors having the same descriptor", agent_id, desc_domain, selector) return False if self.store.add(descriptor): self.descriptor_count += 1 log.debug("PUSH: %s => %s:%s", agent_id, desc_domain, selector) if not self.exiting: self.new_descriptor(agent_id, desc_domain, uuid, selector) # useful in case all agents are in idle/interactive mode self._check_idle() return True else: log.debug("PUSH: %s already seen => %s:%s", agent_id, desc_domain, selector) return False
def find_by_selector(self, agent_id, desc_domain, selector_prefix, limit=0, offset=0): dlist = self.iface.find_by_selector(str(agent_id), desc_domain, selector_prefix, limit, offset) return [ Descriptor.unserialize(serializer, str(s), bus=self) for s in dlist ]
def get_descriptor(self, domain, selector): """ Returns descriptor metadata, None if descriptor was not found. """ selector = self._version_lookup(domain, selector) if not selector: return None fullpath = self.pathFromSelector(domain, selector) + ".meta" if not os.path.isfile(fullpath): return None return Descriptor.unserialize(store_serializer, open(fullpath, "rb").read())
def get_value(self, domain, selector): """ Returns descriptor value, None if descriptor was not found. """ selector = self._version_lookup(domain, selector) if not selector: return None fullpath = self.pathFromSelector(domain, selector) + ".value" if not os.path.isfile(fullpath): return None try: value = Descriptor.unserialize_value(store_serializer, open(fullpath, "rb").read()) except: log.error("Could not unserialize value from file %s", fullpath) raise return value
def push(self, agent_id, serialized_descriptor): descriptor = Descriptor.unserialize(serializer, str(serialized_descriptor)) desc_domain = str(descriptor.domain) uuid = str(descriptor.uuid) selector = str(descriptor.selector) if self.store.add(descriptor): self.descriptor_count += 1 log.debug("PUSH: %s => %s:%s", agent_id, desc_domain, selector) if not self.exiting: self.new_descriptor(agent_id, desc_domain, uuid, selector) # useful in case all agents are in idle/interactive mode self.check_idle() return True else: log.debug("PUSH: %s already seen => %s:%s", agent_id, desc_domain, selector) return False
def find_by_value(self, domain, selector_prefix, value_regex): result = [] # File paths to explore pathprefix = self.basepath + '/' + domain + selector_prefix paths = [path for path in self.existing_paths if path.startswith(pathprefix)] for path in paths: # open and run re.match() on every file matching *.value for name in os.listdir(path): if os.path.isfile(path + name) and name.endswith('.value'): contents = Descriptor.unserialize_value( store_serializer, open(path + name, 'rb').read()) if re.match(value_regex, contents): selector = path[len(self.basepath)+len(domain)+1:] +\ name.split('.')[0] desc = self.get_descriptor(domain, selector) result.append(desc) return result
def process(self, descriptor, sender_id): import tarfile data = descriptor.value selector = descriptor.selector #: List of (unarchived file name, descriptor label, unarchived file #: contents) unarchived = [] def do_untar(archive, mode, archive_label=descriptor.label, unarchived=unarchived): tar = tarfile.open(fileobj=StringIO(archive), mode=mode) for finfo in tar.getmembers(): if finfo.isfile() and finfo.size > 0: fname = os.path.basename(finfo.name) unarchived.append((fname, archive_label + ":" + fname, tar.extractfile(finfo).read())) # Compressed files if "/compressed/bzip2" in selector: # Try and extract - might be a .tar.bz2 try: do_untar(descriptor.value, "r:bz2") except tarfile.TarError: # Probably not a compressed tar file import bz2 data = bz2.decompress(descriptor.value) fname = descriptor.label if fname.endswith('.bz2'): fname = fname[:-4] else: fname = "bunzipped %s" % fname unarchived.append((fname, fname, data)) if "/compressed/gzip" in selector: # Try and extract - might be a .tar.gz try: do_untar(descriptor.value, "r:gz") except tarfile.TarError: # Probably not a compressed tar file from gzip import GzipFile data = GzipFile(fileobj=StringIO(descriptor.value), mode='rb').read() fname = descriptor.label if fname.endswith('.gz'): fname = fname[:-3] else: fname = "gunzipped %s" % fname unarchived.append((fname, fname, data)) # Archive files if "/archive/tar" in selector: do_untar(descriptor.value, mode=None) if "/archive/zip" in selector: from zipfile import ZipFile fzip = ZipFile(file=StringIO(descriptor.value)) try: for zfileinfo in fzip.filelist: fname = zfileinfo.filename zfile = None for pwd in self.passwords: try: zfile = fzip.open(fname, pwd=pwd) break except RuntimeError: # incorrect password continue if zfile: unarchived.append( (fname, descriptor.label + ':' + fname, zfile.read())) else: self.log.warning( "Could not extract %s from %s " "(incorrect password)", fname, descriptor.label) except RuntimeError as e: self.log.error(e) if "/archive/cab" in selector and self.cabextract: try: tmpdir = mkdtemp("rebus-cabextract") with NamedTemporaryFile(prefix="rebus-cab") as cabfile: cabfile.write(descriptor.value) cabfile.flush() try: subprocess.check_output([self.cabextract, '-d', tmpdir, cabfile.name], stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: self.log.error("cabextract exited with status %d" % e.returncode) self.log.error(e.output) for fname in os.listdir(tmpdir): filepathname = os.path.join(tmpdir, fname) unarchived.append((fname, descriptor.label + ':' + fname, open(filepathname, 'rb').read())) finally: shutil.rmtree(tmpdir) for fname, desclabel, fcontents in unarchived: selector = guess_selector(buf=fcontents, label=desclabel) desc = Descriptor(desclabel, selector, fcontents, descriptor.domain, agent=self._name_) self.push(desc) self.declare_link( descriptor, desc, "unarchived", "\"%s\" has been unarchived " "from \"%s\"" % (fname, descriptor.label))
def discover(self, relpath): """ Recursively add existing files to storage. self.processedlock must be acquired prior to calling this function :param relpath: starts and ends with a '/', relative to self.basepath """ if relpath == '/agent_intstate/': # Ignore internal state of agents return path = self.basepath + relpath self.existing_paths.add(path) for elem in os.listdir(path): name = path + elem relname = relpath + elem if os.path.isdir(name): self.discover(relname + '/') elif os.path.isfile(name): basename = name.rsplit('.', 1)[0] if name.endswith('.value'): # Serialized descriptor value if not os.path.isfile(basename + '.meta'): raise Exception( 'Missing associated metadata for %s' % relname) elif name.endswith('.meta'): # Serialized descriptor metadata if not os.path.isfile(basename + '.value'): raise Exception( 'Missing associated value for %s' % relname) with open(name, 'rb') as fp: try: desc = Descriptor.unserialize(store_serializer, fp.read()) except: log.error( "Could not unserialize metadata from file %s", name) raise fname_selector = relname.rsplit('.')[0] # check consistency between file name and serialized # metadata fname_domain = fname_selector.split('/')[1] if fname_domain != desc.domain: raise Exception( 'Filename domain %s does not match metadata ' 'domain %s for descriptor %s' % (fname_domain, desc.domain, fname_selector)) fname_hash = fname_selector.rsplit('%', 1)[1] if fname_hash != desc.hash: raise Exception( 'Filename hash %s does not match metadata hash' ' %s for descriptor %s' % (fname_hash, desc.domain, fname_selector)) self.register_meta(desc) elif name.endswith('.cfg') and relpath == '/': # Bus configuration # TODO periodically save this file. Use two file, overwrite # oldest. if elem == '_processed.cfg': with open(name, 'rb') as fp: # copy processed info to self.processed p = store_serializer.load(fp) for dom in p.keys(): for sel, val in p[dom].items(): self.processed[dom][sel] = val else: raise Exception( 'Invalid file name - %s has an invalid extension ' '(must be .value, .meta or .cfg)' % relname) else: raise Exception( 'Invalid file type - %s is neither a regular file nor a ' 'directory' % name)
def get_children(self, agent_id, desc_domain, selector, recurse=True): return [ Descriptor.unserialize(serializer, str(s), bus=self) for s in self.iface.get_children(str(agent_id), desc_domain, selector, recurse) ]
def find_by_uuid(self, agent_id, desc_domain, uuid): dlist = self.iface.find_by_uuid(str(agent_id), desc_domain, uuid) return [ Descriptor.unserialize(serializer, str(s), bus=self) for s in dlist ]
def get_value(self, agent_id, desc_domain, selector): result = str(self.iface.get_value(str(agent_id), desc_domain, selector)) if result == "": return None return Descriptor.unserialize_value(serializer, result)
def get(self, agent_id, desc_domain, selector): result = str(self.rpc_get(str(agent_id), desc_domain, selector)) if result == "": return None return Descriptor.unserialize(serializer, result, bus=self)
def run(self): start = time.time() def ensure_link(x): if x.startswith("/link/"): return x if x.startswith("/"): return "/link" + x return "/link/" + x sels = chain(*[ map(str, self.find(self.domain, ensure_link(s), self.config['limit'])) for s in self.config['selectors'] ]) class Component(object): def __init__(self, linktype): self.linktype = linktype self.nodes = set() def add(self, v): self.nodes.add(v) links = {} labels = {} def nodenamer(fmt="node%i"): i = 0 while True: yield fmt % i i += 1 for s in sels: link = self.get(self.domain, s) uu1, uu2 = link.uuid, link.value["otherUUID"] linktype = link.value["linktype"] labels[uu1] = link.label labels[uu2] = link.value["otherlabel"] component = links.get((uu1, linktype)) or links.get( (uu2, linktype)) if not component: component = Component(linktype) component.add(uu1) component.add(uu2) links[uu1, linktype] = links[uu2, linktype] = component ltname = nodenamer() dot = ['graph "links" {'] for n, l in labels.iteritems(): dot.append( '\t"%s" [ label="%s", fontsize=10, fillcolor="#dddddd", style=filled, shape=note, href="/analysis/%s/%s"];' % (n, l, self.domain, n)) dot.append("") for comp in set(links.values()): compname = ltname.next() dot.append( '\t"%s" [ label="%s", fontsize=8, fillcolor="#%s", style=filled, shape=oval];' % (compname, comp.linktype, _color_scheme.get_as_hex(comp.linktype))) for elt in comp.nodes: dot.append('\t"%s" -- "%s" [ len=2 ];' % (compname, elt)) dot.append("") dot.append("}") done = time.time() desc = Descriptor(label="linkgraph", selector="/graph/dot/linkgraph", value="\n".join(dot), domain=self.domain, agent=self._name_, processing_time=done - start) self.push(desc)
def _discover(self, relpath): """ Recursively add existing files to storage. :param relpath: starts and ends with a '/', relative to self.basepath """ if relpath == '/agent_intstate/': # Ignore internal state of agents return path = self.basepath + relpath self.existing_paths.add(path) for elem in os.listdir(path): name = path + elem relname = relpath + elem if os.path.isdir(name): self._discover(relname + '/') elif os.path.isfile(name): basename = name.rsplit('.', 1)[0] if name.endswith('.value'): # Serialized descriptor value if not os.path.isfile(basename + '.meta'): raise Exception('Missing associated metadata for %s' % relname) elif name.endswith('.meta'): # Serialized descriptor metadata if not os.path.isfile(basename + '.value'): raise Exception('Missing associated value for %s' % relname) with open(name, 'rb') as fp: try: desc = Descriptor.unserialize( store_serializer, fp.read()) except: log.error( "Could not unserialize metadata from file %s", name) raise fname_selector = relname.rsplit('.')[0] # check consistency between file name and serialized # metadata fname_domain = fname_selector.split('/')[1] if fname_domain != desc.domain: raise Exception( 'Filename domain %s does not match metadata ' 'domain %s for descriptor %s' % (fname_domain, desc.domain, fname_selector)) fname_hash = fname_selector.rsplit('%', 1)[1] if fname_hash != desc.hash: raise Exception( 'Filename hash %s does not match metadata hash' ' %s for descriptor %s' % (fname_hash, desc.domain, fname_selector)) self._register_meta(desc) elif relpath == '/' and elem == 'diskstorage.sqlite3': continue elif relpath == '/' and elem == '_processed.cfg': # Former _processed.cfg storage file log.info( "Importing data to sqlite3 database from " "_processed.cfg. You should delete this file after it " "has been imported.") with open(name, 'rb') as fp: p = store_serializer.load(fp) for dom in p.keys(): for sel, valset in p[dom].items(): for agent_name, config_txt in valset: self.db.add_processed( dom, sel, agent_name, config_txt) else: raise Exception( 'Invalid file name - %s has an invalid extension ' '(must be .value, .meta or .cfg)' % relname) else: raise Exception( 'Invalid file type - %s is neither a regular file nor a ' 'directory' % name)