Beispiel #1
0
 def find_by_value(self, agent_id, desc_domain, selector_prefix,
                   value_regex):
     dlist = self.iface.find_by_value(str(agent_id), desc_domain,
                                      selector_prefix, value_regex)
     return [
         Descriptor.unserialize(serializer, str(s), bus=self) for s in dlist
     ]
Beispiel #2
0
    def push(self, agent_id, serialized_descriptor):
        descriptor = Descriptor.unserialize(serializer,
                                            str(serialized_descriptor))
        desc_domain = str(descriptor.domain)
        uuid = str(descriptor.uuid)
        selector = str(descriptor.selector)
        # ensure processing terminates
        if not format_check.processing_depth(self.store, descriptor):
            log.warning(
                "Refusing descriptor %s:%s received from %s: loop or "
                ">2 ancestors having the same descriptor", agent_id,
                desc_domain, selector)
            return False

        if self.store.add(descriptor):
            self.descriptor_count += 1
            log.debug("PUSH: %s => %s:%s", agent_id, desc_domain, selector)
            if not self.exiting:
                self.new_descriptor(agent_id, desc_domain, uuid, selector)
                # useful in case all agents are in idle/interactive mode
                self._check_idle()
            return True
        else:
            log.debug("PUSH: %s already seen => %s:%s", agent_id, desc_domain,
                      selector)
            return False
Beispiel #3
0
 def find_by_selector(self,
                      agent_id,
                      desc_domain,
                      selector_prefix,
                      limit=0,
                      offset=0):
     dlist = self.iface.find_by_selector(str(agent_id), desc_domain,
                                         selector_prefix, limit, offset)
     return [
         Descriptor.unserialize(serializer, str(s), bus=self) for s in dlist
     ]
Beispiel #4
0
    def get_descriptor(self, domain, selector):
        """
        Returns descriptor metadata, None if descriptor was not found.
        """
        selector = self._version_lookup(domain, selector)
        if not selector:
            return None

        fullpath = self.pathFromSelector(domain, selector) + ".meta"
        if not os.path.isfile(fullpath):
            return None
        return Descriptor.unserialize(store_serializer,
                                      open(fullpath, "rb").read())
Beispiel #5
0
    def get_value(self, domain, selector):
        """
        Returns descriptor value, None if descriptor was not found.
        """
        selector = self._version_lookup(domain, selector)
        if not selector:
            return None

        fullpath = self.pathFromSelector(domain, selector) + ".value"
        if not os.path.isfile(fullpath):
            return None
        try:
            value = Descriptor.unserialize_value(store_serializer,
                                                 open(fullpath, "rb").read())
        except:
            log.error("Could not unserialize value from file %s", fullpath)
            raise
        return value
Beispiel #6
0
 def push(self, agent_id, serialized_descriptor):
     descriptor = Descriptor.unserialize(serializer,
                                         str(serialized_descriptor))
     desc_domain = str(descriptor.domain)
     uuid = str(descriptor.uuid)
     selector = str(descriptor.selector)
     if self.store.add(descriptor):
         self.descriptor_count += 1
         log.debug("PUSH: %s => %s:%s", agent_id, desc_domain, selector)
         if not self.exiting:
             self.new_descriptor(agent_id, desc_domain, uuid, selector)
             # useful in case all agents are in idle/interactive mode
             self.check_idle()
         return True
     else:
         log.debug("PUSH: %s already seen => %s:%s", agent_id, desc_domain,
                   selector)
         return False
Beispiel #7
0
 def find_by_value(self, domain, selector_prefix, value_regex):
     result = []
     # File paths to explore
     pathprefix = self.basepath + '/' + domain + selector_prefix
     paths = [path for path in self.existing_paths if
              path.startswith(pathprefix)]
     for path in paths:
         # open and run re.match() on every file matching *.value
         for name in os.listdir(path):
             if os.path.isfile(path + name) and name.endswith('.value'):
                 contents = Descriptor.unserialize_value(
                     store_serializer,
                     open(path + name, 'rb').read())
                 if re.match(value_regex, contents):
                     selector = path[len(self.basepath)+len(domain)+1:] +\
                         name.split('.')[0]
                     desc = self.get_descriptor(domain, selector)
                     result.append(desc)
     return result
Beispiel #8
0
    def process(self, descriptor, sender_id):
        import tarfile
        data = descriptor.value
        selector = descriptor.selector

        #: List of (unarchived file name, descriptor label, unarchived file
        #: contents)
        unarchived = []

        def do_untar(archive, mode, archive_label=descriptor.label,
                     unarchived=unarchived):
            tar = tarfile.open(fileobj=StringIO(archive), mode=mode)
            for finfo in tar.getmembers():
                if finfo.isfile() and finfo.size > 0:
                    fname = os.path.basename(finfo.name)
                    unarchived.append((fname, archive_label + ":" + fname,
                                       tar.extractfile(finfo).read()))

        # Compressed files
        if "/compressed/bzip2" in selector:
            # Try and extract - might be a .tar.bz2
            try:
                do_untar(descriptor.value, "r:bz2")
            except tarfile.TarError:
                # Probably not a compressed tar file
                import bz2
                data = bz2.decompress(descriptor.value)
                fname = descriptor.label
                if fname.endswith('.bz2'):
                    fname = fname[:-4]
                else:
                    fname = "bunzipped %s" % fname
                unarchived.append((fname, fname, data))
        if "/compressed/gzip" in selector:
            # Try and extract - might be a .tar.gz
            try:
                do_untar(descriptor.value, "r:gz")
            except tarfile.TarError:
                # Probably not a compressed tar file
                from gzip import GzipFile
                data = GzipFile(fileobj=StringIO(descriptor.value),
                                mode='rb').read()
                fname = descriptor.label
                if fname.endswith('.gz'):
                    fname = fname[:-3]
                else:
                    fname = "gunzipped %s" % fname
                unarchived.append((fname, fname, data))

        # Archive files
        if "/archive/tar" in selector:
            do_untar(descriptor.value, mode=None)
        if "/archive/zip" in selector:
            from zipfile import ZipFile
            fzip = ZipFile(file=StringIO(descriptor.value))
            try:
                for zfileinfo in fzip.filelist:
                    fname = zfileinfo.filename
                    zfile = None
                    for pwd in self.passwords:
                        try:
                            zfile = fzip.open(fname, pwd=pwd)
                            break
                        except RuntimeError:
                            # incorrect password
                            continue
                    if zfile:
                        unarchived.append(
                            (fname, descriptor.label + ':' + fname,
                             zfile.read()))
                    else:
                        self.log.warning(
                            "Could not extract %s from %s "
                            "(incorrect password)", fname, descriptor.label)
            except RuntimeError as e:
                self.log.error(e)

        if "/archive/cab" in selector and self.cabextract:
            try:
                tmpdir = mkdtemp("rebus-cabextract")
                with NamedTemporaryFile(prefix="rebus-cab") as cabfile:
                    cabfile.write(descriptor.value)
                    cabfile.flush()
                    try:
                        subprocess.check_output([self.cabextract, '-d', tmpdir,
                                                 cabfile.name],
                                                stderr=subprocess.STDOUT)
                    except subprocess.CalledProcessError as e:
                        self.log.error("cabextract exited with status %d" %
                                       e.returncode)
                        self.log.error(e.output)
                for fname in os.listdir(tmpdir):
                    filepathname = os.path.join(tmpdir, fname)
                    unarchived.append((fname, descriptor.label + ':' + fname,
                                       open(filepathname, 'rb').read()))
            finally:
                shutil.rmtree(tmpdir)

        for fname, desclabel, fcontents in unarchived:
            selector = guess_selector(buf=fcontents, label=desclabel)
            desc = Descriptor(desclabel, selector, fcontents,
                              descriptor.domain, agent=self._name_)
            self.push(desc)
            self.declare_link(
                descriptor, desc, "unarchived", "\"%s\" has been unarchived "
                "from \"%s\"" % (fname, descriptor.label))
Beispiel #9
0
    def discover(self, relpath):
        """
        Recursively add existing files to storage.

        self.processedlock must be acquired prior to calling this function

        :param relpath: starts and ends with a '/', relative to self.basepath
        """
        if relpath == '/agent_intstate/':
            # Ignore internal state of agents
            return

        path = self.basepath + relpath
        self.existing_paths.add(path)

        for elem in os.listdir(path):
            name = path + elem
            relname = relpath + elem
            if os.path.isdir(name):
                self.discover(relname + '/')
            elif os.path.isfile(name):
                basename = name.rsplit('.', 1)[0]
                if name.endswith('.value'):
                    # Serialized descriptor value
                    if not os.path.isfile(basename + '.meta'):
                        raise Exception(
                            'Missing associated metadata for %s' % relname)
                elif name.endswith('.meta'):
                    # Serialized descriptor metadata
                    if not os.path.isfile(basename + '.value'):
                        raise Exception(
                            'Missing associated value for %s' % relname)
                    with open(name, 'rb') as fp:
                        try:
                            desc = Descriptor.unserialize(store_serializer,
                                                          fp.read())
                        except:
                            log.error(
                                "Could not unserialize metadata from file %s",
                                name)
                            raise
                        fname_selector = relname.rsplit('.')[0]
                        # check consistency between file name and serialized
                        # metadata
                        fname_domain = fname_selector.split('/')[1]
                        if fname_domain != desc.domain:
                            raise Exception(
                                'Filename domain %s does not match metadata '
                                'domain %s for descriptor %s' %
                                (fname_domain, desc.domain, fname_selector))
                        fname_hash = fname_selector.rsplit('%', 1)[1]
                        if fname_hash != desc.hash:
                            raise Exception(
                                'Filename hash %s does not match metadata hash'
                                ' %s for descriptor %s' %
                                (fname_hash, desc.domain, fname_selector))

                        self.register_meta(desc)
                elif name.endswith('.cfg') and relpath == '/':
                    # Bus configuration
                    # TODO periodically save this file. Use two file, overwrite
                    # oldest.
                    if elem == '_processed.cfg':
                        with open(name, 'rb') as fp:
                            # copy processed info to self.processed
                            p = store_serializer.load(fp)
                            for dom in p.keys():
                                for sel, val in p[dom].items():
                                    self.processed[dom][sel] = val
                else:
                    raise Exception(
                        'Invalid file name - %s has an invalid extension '
                        '(must be .value, .meta or .cfg)' % relname)
            else:
                raise Exception(
                    'Invalid file type - %s is neither a regular file nor a '
                    'directory' % name)
Beispiel #10
0
 def get_children(self, agent_id, desc_domain, selector, recurse=True):
     return [
         Descriptor.unserialize(serializer, str(s), bus=self)
         for s in self.iface.get_children(str(agent_id), desc_domain,
                                          selector, recurse)
     ]
Beispiel #11
0
 def find_by_uuid(self, agent_id, desc_domain, uuid):
     dlist = self.iface.find_by_uuid(str(agent_id), desc_domain, uuid)
     return [
         Descriptor.unserialize(serializer, str(s), bus=self) for s in dlist
     ]
Beispiel #12
0
 def get_value(self, agent_id, desc_domain, selector):
     result = str(self.iface.get_value(str(agent_id), desc_domain,
                                       selector))
     if result == "":
         return None
     return Descriptor.unserialize_value(serializer, result)
Beispiel #13
0
 def get(self, agent_id, desc_domain, selector):
     result = str(self.rpc_get(str(agent_id), desc_domain, selector))
     if result == "":
         return None
     return Descriptor.unserialize(serializer, result, bus=self)
Beispiel #14
0
    def run(self):

        start = time.time()

        def ensure_link(x):
            if x.startswith("/link/"):
                return x
            if x.startswith("/"):
                return "/link" + x
            return "/link/" + x

        sels = chain(*[
            map(str,
                self.find(self.domain, ensure_link(s), self.config['limit']))
            for s in self.config['selectors']
        ])

        class Component(object):
            def __init__(self, linktype):
                self.linktype = linktype
                self.nodes = set()

            def add(self, v):
                self.nodes.add(v)

        links = {}
        labels = {}

        def nodenamer(fmt="node%i"):
            i = 0
            while True:
                yield fmt % i
                i += 1

        for s in sels:
            link = self.get(self.domain, s)
            uu1, uu2 = link.uuid, link.value["otherUUID"]
            linktype = link.value["linktype"]
            labels[uu1] = link.label
            labels[uu2] = link.value["otherlabel"]

            component = links.get((uu1, linktype)) or links.get(
                (uu2, linktype))
            if not component:
                component = Component(linktype)
            component.add(uu1)
            component.add(uu2)
            links[uu1, linktype] = links[uu2, linktype] = component

        ltname = nodenamer()

        dot = ['graph "links" {']

        for n, l in labels.iteritems():
            dot.append(
                '\t"%s" [ label="%s", fontsize=10, fillcolor="#dddddd", style=filled, shape=note, href="/analysis/%s/%s"];'
                % (n, l, self.domain, n))

        dot.append("")

        for comp in set(links.values()):
            compname = ltname.next()
            dot.append(
                '\t"%s" [ label="%s", fontsize=8, fillcolor="#%s", style=filled, shape=oval];'
                % (compname, comp.linktype,
                   _color_scheme.get_as_hex(comp.linktype)))
            for elt in comp.nodes:
                dot.append('\t"%s" -- "%s" [ len=2 ];' % (compname, elt))
            dot.append("")

        dot.append("}")
        done = time.time()

        desc = Descriptor(label="linkgraph",
                          selector="/graph/dot/linkgraph",
                          value="\n".join(dot),
                          domain=self.domain,
                          agent=self._name_,
                          processing_time=done - start)

        self.push(desc)
Beispiel #15
0
    def _discover(self, relpath):
        """
        Recursively add existing files to storage.

        :param relpath: starts and ends with a '/', relative to self.basepath
        """
        if relpath == '/agent_intstate/':
            # Ignore internal state of agents
            return

        path = self.basepath + relpath
        self.existing_paths.add(path)

        for elem in os.listdir(path):
            name = path + elem
            relname = relpath + elem
            if os.path.isdir(name):
                self._discover(relname + '/')
            elif os.path.isfile(name):
                basename = name.rsplit('.', 1)[0]
                if name.endswith('.value'):
                    # Serialized descriptor value
                    if not os.path.isfile(basename + '.meta'):
                        raise Exception('Missing associated metadata for %s' %
                                        relname)
                elif name.endswith('.meta'):
                    # Serialized descriptor metadata
                    if not os.path.isfile(basename + '.value'):
                        raise Exception('Missing associated value for %s' %
                                        relname)
                    with open(name, 'rb') as fp:
                        try:
                            desc = Descriptor.unserialize(
                                store_serializer, fp.read())
                        except:
                            log.error(
                                "Could not unserialize metadata from file %s",
                                name)
                            raise
                        fname_selector = relname.rsplit('.')[0]
                        # check consistency between file name and serialized
                        # metadata
                        fname_domain = fname_selector.split('/')[1]
                        if fname_domain != desc.domain:
                            raise Exception(
                                'Filename domain %s does not match metadata '
                                'domain %s for descriptor %s' %
                                (fname_domain, desc.domain, fname_selector))
                        fname_hash = fname_selector.rsplit('%', 1)[1]
                        if fname_hash != desc.hash:
                            raise Exception(
                                'Filename hash %s does not match metadata hash'
                                ' %s for descriptor %s' %
                                (fname_hash, desc.domain, fname_selector))

                        self._register_meta(desc)
                elif relpath == '/' and elem == 'diskstorage.sqlite3':
                    continue
                elif relpath == '/' and elem == '_processed.cfg':
                    # Former _processed.cfg storage file
                    log.info(
                        "Importing data to sqlite3 database from "
                        "_processed.cfg. You should delete this file after it "
                        "has been imported.")
                    with open(name, 'rb') as fp:
                        p = store_serializer.load(fp)
                        for dom in p.keys():
                            for sel, valset in p[dom].items():
                                for agent_name, config_txt in valset:
                                    self.db.add_processed(
                                        dom, sel, agent_name, config_txt)
                else:
                    raise Exception(
                        'Invalid file name - %s has an invalid extension '
                        '(must be .value, .meta or .cfg)' % relname)
            else:
                raise Exception(
                    'Invalid file type - %s is neither a regular file nor a '
                    'directory' % name)