def worker(progname, directory, sensor=None): """This function is the main loop, creating the processes when needed and feeding them with the data from the files. """ utils.makedirs(os.path.join(directory, "current")) procs = {} while not WANTDOWN: # We get the next file to handle fname = getnextfiles(directory, sensor=sensor, count=1) # ... if we don't, we sleep for a while if not fname: utils.LOGGER.debug("Sleeping for %d s", SLEEPTIME) time.sleep(SLEEPTIME) continue fname = fname[0] fname_sensor = fname.groupdict()['sensor'] if fname_sensor in procs: proc = procs[fname_sensor] else: proc = create_process(progname, fname_sensor) procs[fname_sensor] = proc fname = fname.group() # Our "lock system": if we can move the file, it's ours try: shutil.move(os.path.join(directory, fname), os.path.join(directory, "current")) except shutil.Error: continue if config.DEBUG: utils.LOGGER.debug("Handling %s", fname) fname = os.path.join(directory, "current", fname) fdesc = utils.open_file(fname) handled_ok = True for line in fdesc: try: proc.stdin.write(line) except ValueError: utils.LOGGER.warning( "Error while handling line %r. " "Trying again", line) proc = create_process(progname, fname_sensor) procs[fname_sensor] = proc # Second (and last) try try: proc.stdin.write(line) utils.LOGGER.warning(" ... OK") except ValueError: handled_ok = False utils.LOGGER.warning(" ... KO") fdesc.close() if handled_ok: os.unlink(fname) utils.LOGGER.debug(' ... OK') else: utils.LOGGER.debug(' ... KO') # SHUTDOWN for sensor in procs: procs[sensor].stdin.close() procs[sensor].wait()
def store_scan_json(self, fname, filehash=None, needports=False, needopenports=False, categories=None, source=None, gettoarchive=None, add_addr_infos=True, force_info=False, merge=False): """This method parses a JSON scan result as exported using `scancli --json > file`, displays the parsing result, and return True if everything went fine, False otherwise. In backend-specific subclasses, this method stores the result instead of displaying it, thanks to the `store_host` method. """ if categories is None: categories = [] need_scan_doc = False with utils.open_file(fname) as fdesc: for line in fdesc: host = self.json2dbrec(json.loads(line)) for fname in ["_id"]: if fname in host: del host[fname] host["scanid"] = filehash if categories: host["categories"] = categories if source is not None: host["source"] = source if add_addr_infos and self.globaldb is not None and ( force_info or 'infos' not in host or not host['infos']): host['infos'] = {} for func in [ self.globaldb.data.country_byip, self.globaldb.data.as_byip, self.globaldb.data.location_byip ]: data = func(host['addr']) if data: host['infos'].update(data) if ((not needports or 'ports' in host) and (not needopenports or host.get('openports', {}).get('count'))): # We are about to insert data based on this file, # so we want to save the scan document need_scan_doc = True if merge and self.merge_host(host): pass else: self.archive_from_func(host, gettoarchive) self.store_host(host) if need_scan_doc: self.store_scan_doc({'_id': filehash}) return True
def gunzip(fname: str) -> None: if not fname.endswith(".gz"): raise Exception('filename should end with ".gz"') assert config.GEOIP_PATH is not None with utils.open_file(os.path.join(config.GEOIP_PATH, fname)) as inp, open( os.path.join(config.GEOIP_PATH, fname[:-3]), "wb") as outp: copyfileobj(inp, outp)
def worker(progname, directory, sensor=None): """This function is the main loop, creating the processes when needed and feeding them with the data from the files. """ utils.makedirs(os.path.join(directory, "current")) procs = {} while not WANTDOWN: # We get the next file to handle fname = getnextfiles(directory, sensor=sensor, count=1) # ... if we don't, we sleep for a while if not fname: utils.LOGGER.debug("Sleeping for %d s", SLEEPTIME) time.sleep(SLEEPTIME) continue fname = fname[0] fname_sensor = fname.groupdict()['sensor'] if fname_sensor in procs: proc = procs[fname_sensor] else: proc = create_process(progname, fname_sensor) procs[fname_sensor] = proc fname = fname.group() # Our "lock system": if we can move the file, it's ours try: shutil.move(os.path.join(directory, fname), os.path.join(directory, "current")) except shutil.Error: continue if config.DEBUG: utils.LOGGER.debug("Handling %s", fname) fname = os.path.join(directory, "current", fname) fdesc = utils.open_file(fname) handled_ok = True for line in fdesc: try: proc.stdin.write(line) except ValueError: utils.LOGGER.warning("Error while handling line %r. " "Trying again", line) proc = create_process(progname, fname_sensor) procs[fname_sensor] = proc # Second (and last) try try: proc.stdin.write(line) utils.LOGGER.warning(" ... OK") except ValueError: handled_ok = False utils.LOGGER.warning(" ... KO") fdesc.close() if handled_ok: os.unlink(fname) utils.LOGGER.debug(' ... OK') else: utils.LOGGER.debug(' ... KO') # SHUTDOWN for sensor in procs: procs[sensor].stdin.close() procs[sensor].wait()
def gunzip(fname, clean=True): if not fname.endswith('.gz'): raise Exception('filename should end with ".gz"') with utils.open_file(os.path.join(config.GEOIP_PATH, fname)) as inp: with open(os.path.join(config.GEOIP_PATH, fname[:-3]), "wb") as outp: outp.write(inp.read()) if clean: os.unlink(os.path.join(config.GEOIP_PATH, fname))
def gunzip(fname: str, clean: bool = True) -> None: if not fname.endswith(".gz"): raise Exception('filename should end with ".gz"') assert config.GEOIP_PATH is not None with utils.open_file(os.path.join(config.GEOIP_PATH, fname)) as inp: with open(os.path.join(config.GEOIP_PATH, fname[:-3]), "wb") as outp: outp.write(inp.read()) if clean: os.unlink(os.path.join(config.GEOIP_PATH, fname))
def store_scan_json( self, fname, filehash=None, needports=False, categories=None, source=None, gettoarchive=None, add_addr_infos=True, force_info=False, merge=False, ): """This method parses a JSON scan result as exported using `scancli --json > file`, displays the parsing result, and return True if everything went fine, False otherwise. In backend-specific subclasses, this method stores the result instead of displaying it, thanks to the `store_host` method. """ if categories is None: categories = [] with utils.open_file(fname) as fdesc: for line in fdesc: host = self.json2dbrec(json.loads(line)) for fname in ["_id"]: if fname in host: del host[fname] host["scanid"] = filehash if categories: host["categories"] = categories if source is not None: host["source"] = source if ( add_addr_infos and self.globaldb is not None and (force_info or "infos" not in host or not host["infos"]) ): host["infos"] = {} for func in [ self.globaldb.data.country_byip, self.globaldb.data.as_byip, self.globaldb.data.location_byip, ]: data = func(host["addr"]) if data: host["infos"].update(data) if not needports or "ports" in host: if merge and self.merge_host(host): return True self.archive_from_func(host, gettoarchive) self.store_host(host) self.store_scan_doc({"_id": filehash}) return True
def store_scan_json(self, fname, filehash=None, needports=False, needopenports=False, categories=None, source=None, gettoarchive=None, add_addr_infos=True, force_info=False, merge=False): """This method parses a JSON scan result as exported using `ivre scancli --json > file`, displays the parsing result, and return True if everything went fine, False otherwise. In backend-specific subclasses, this method stores the result instead of displaying it, thanks to the `store_host` method. """ if categories is None: categories = [] need_scan_doc = False with utils.open_file(fname) as fdesc: for line in fdesc: host = self.json2dbrec(json.loads(line)) for fname in ["_id"]: if fname in host: del host[fname] host["scanid"] = filehash if categories: host["categories"] = categories if source is not None: host["source"] = source if add_addr_infos and self.globaldb is not None and ( force_info or 'infos' not in host or not host['infos'] ): host['infos'] = {} for func in [self.globaldb.data.country_byip, self.globaldb.data.as_byip, self.globaldb.data.location_byip]: data = func(host['addr']) if data: host['infos'].update(data) if ((not needports or 'ports' in host) and (not needopenports or host.get('openports', {}).get('count'))): # We are about to insert data based on this file, # so we want to save the scan document need_scan_doc = True if merge and self.merge_host(host): pass else: self.archive_from_func(host, gettoarchive) self.store_host(host) if need_scan_doc: self.store_scan_doc({'_id': filehash}) return True
def sync(self, agent): """This function should only be called from `agent.sync()` method. It stores the results of terminated scans according to the target status. """ for remfname in glob.glob( os.path.join( agent.get_local_path("remoteoutput"), self.visiblecategory + ".*.xml*" ) ): locfname = os.path.basename(remfname).split(".", 4) locfname[0] = self.category status = "unknown" with utils.open_file(remfname) as remfdesc: remfcontent = remfdesc.read() if b'<status state="up"' in remfcontent: status = "up" elif b'<status state="down"' in remfcontent: if not self.storedown: remfdesc.close() os.unlink(remfname) continue status = "down" del remfcontent locfname = os.path.join( self.outputpath, locfname[0], status, re.sub("[/@:]", "_", agent.name), *locfname[1:], ) utils.makedirs(os.path.dirname(locfname)) os.rename(remfname, locfname) for remfname in glob.glob( os.path.join( agent.get_local_path("remotedata"), self.visiblecategory + ".*.tar*" ) ): locfname = os.path.basename(remfname).split(".", 4) locfname[0] = self.category locfname = os.path.join( self.outputpath, locfname[0], "data", re.sub("[/@:]", "_", agent.name), *locfname[1:], ) utils.makedirs(os.path.dirname(locfname)) os.rename(remfname, locfname)
def main(): """Update the flow database from log files""" parser = ArgumentParser(description=__doc__) parser.add_argument("files", nargs="*", metavar="FILE", help="Files to import in the flow database") parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true") parser.add_argument("-t", "--type", help="file type", choices=list(PARSERS_CHOICE)) parser.add_argument("-f", "--pcap-filter", help="pcap filter to apply (when supported)") parser.add_argument("-C", "--no-cleanup", help="avoid port cleanup heuristics", action="store_true") args = parser.parse_args() if args.verbose: config.DEBUG = True for fname in args.files: try: fileparser = PARSERS_CHOICE[args.type] except KeyError: with utils.open_file(fname) as fdesc: try: fileparser = PARSERS_MAGIC[fdesc.read(4)] except KeyError: utils.LOGGER.warning( "Cannot find the appropriate parser for file %r", fname, ) continue bulk = db.flow.start_bulk_insert() with fileparser(fname, args.pcap_filter) as fdesc: for rec in fdesc: if not rec: continue db.flow.flow2flow(bulk, rec) db.flow.bulk_commit(bulk) if not args.no_cleanup: db.flow.cleanup_flows()
def sync(self, agent): """This function should only be called from `agent.sync()` method. It stores the results of terminated scans according to the target status. """ for remfname in glob.glob( os.path.join(agent.get_local_path('remoteoutput'), self.visiblecategory + '.*.xml*') ): locfname = os.path.basename(remfname).split('.', 4) locfname[0] = self.category status = 'unknown' with utils.open_file(remfname) as remfdesc: remfcontent = remfdesc.read() if b'<status state="up"' in remfcontent: status = 'up' elif b'<status state="down"' in remfcontent: if not self.storedown: remfdesc.close() os.unlink(remfname) continue status = 'down' del remfcontent locfname = os.path.join( self.outputpath, locfname[0], status, re.sub('[/@:]', '_', agent.name), *locfname[1:] ) utils.makedirs(os.path.dirname(locfname)) os.rename(remfname, locfname) for remfname in glob.glob( os.path.join(agent.get_local_path('remotedata'), self.visiblecategory + '.*.tar*') ): locfname = os.path.basename(remfname).split('.', 4) locfname[0] = self.category locfname = os.path.join( self.outputpath, locfname[0], 'data', re.sub('[/@:]', '_', agent.name), *locfname[1:] ) utils.makedirs(os.path.dirname(locfname)) os.rename(remfname, locfname)
def store_scan(self, fname, **kargs): """This method opens a scan result, and calls the appropriate store_scan_* method to parse (and store) the scan result. """ scanid = utils.hash_file(fname, hashtype="sha256").hexdigest() if self.is_scan_present(scanid): if config.DEBUG: sys.stderr.write("WARNING: Scan already present in Database" " (%r).\n" % fname) return False with utils.open_file(fname) as fdesc: fchar = fdesc.read(1) try: store_scan_function = {"<": self.store_scan_xml, "{": self.store_scan_json}[fchar] except KeyError: raise ValueError("Unknown file type %s" % fname) return store_scan_function(fname, filehash=scanid, **kargs)
def __init__(self, fdesc, pcap_filter=None): """Creates the NetFlow object. fdesc: a file-like object or a filename pcap_filter: a PCAP filter to use with nfdump """ cmd = ["nfdump", "-aq", "-o", self.fmt] cmdkargs = {} if isinstance(fdesc, basestring): with open(fdesc) as fde: if fde.read(2) not in utils.FileOpener.FILE_OPENERS_MAGIC: cmd.extend(["-r", fdesc]) else: cmdkargs["stdin"] = utils.open_file(fdesc) else: cmdkargs["stdin"] = fdesc if pcap_filter is not None: cmd.append(pcap_filter) super(NetFlow, self).__init__(cmd, cmdkargs)
def store_scan(self, fname, **kargs): """This method opens a scan result, and calls the appropriate store_scan_* method to parse (and store) the scan result. """ scanid = utils.hash_file(fname, hashtype="sha256").hexdigest() if self.is_scan_present(scanid): if config.DEBUG: sys.stderr.write("WARNING: Scan already present in Database" " (%r).\n" % fname) return False with utils.open_file(fname) as fdesc: fchar = fdesc.read(1) try: return { '<': self.store_scan_xml, '{': self.store_scan_json, }[fchar](fname, filehash=scanid, **kargs) except KeyError: raise ValueError("Unknown file type %s" % fname)
def store_scan_xml(self, fname, **kargs): """This method parses an XML scan result, displays a JSON version of the result, and return True if everything went fine, False otherwise. In backend-specific subclasses, this method stores the result instead of displaying it, thanks to the `content_handler` attribute. """ parser = xml.sax.make_parser() try: content_handler = self.content_handler(fname, **kargs) except Exception as exc: sys.stderr.write(utils.warn_exception(exc, fname=fname)) else: parser.setContentHandler(content_handler) parser.setEntityResolver(xmlnmap.NoExtResolver()) parser.parse(utils.open_file(fname)) content_handler.outputresults() return True return False
def __init__(self, fdesc: Union[str, BinaryIO], pcap_filter: Optional[str] = None) -> None: """Creates the NetFlow object. fdesc: a file-like object or a filename pcap_filter: a PCAP filter to use with nfdump """ cmd = ["nfdump", "-aq", "-o", self.fmt] cmdkargs = {} if isinstance(fdesc, str): with open(fdesc, "rb") as fde: if fde.read(2) not in utils.FileOpener.FILE_OPENERS_MAGIC: cmd.extend(["-r", fdesc]) else: cmdkargs["stdin"] = cast(BinaryIO, utils.open_file(fdesc)) else: cmdkargs["stdin"] = fdesc if pcap_filter is not None: cmd.append(pcap_filter) super().__init__(cmd, cmdkargs)
def main(): """Update the flow database from log files""" try: import argparse parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('files', nargs='*', metavar='FILE', help='Files to import in the flow database') except ImportError: import optparse parser = optparse.OptionParser(description=__doc__) parser.parse_args_orig = parser.parse_args def my_parse_args(): res = parser.parse_args_orig() res[0].ensure_value('files', res[1]) return res[0] parser.parse_args = my_parse_args parser.add_argument = parser.add_option parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true") parser.add_argument("-t", "--type", help="file type", choices=PARSERS_CHOICE.keys()) parser.add_argument("-f", "--pcap-filter", help="pcap filter to apply (when supported)") parser.add_argument("-C", "--no-cleanup", help="avoid port cleanup heuristics", action="store_true") args = parser.parse_args() if args.verbose: config.DEBUG = True query_cache = {} for fname in args.files: try: fileparser = PARSERS_CHOICE[args.type] except KeyError: with utils.open_file(fname) as fdesc: try: fileparser = PARSERS_MAGIC[fdesc.read(4)] except KeyError: sys.stderr.write( 'WARNING: cannot find the appropriate parser for file' ' %r\n' % fname) continue bulk = db.flow.start_bulk_insert() with fileparser(fname, args.pcap_filter) as fdesc: for rec in fdesc: if not rec: continue linkattrs = ('proto', ) accumulators = {} for (fields, sp_linkattrs, sp_accumulators) in FIELD_REQUEST_EXT: if all(field in rec for field in fields): linkattrs = sp_linkattrs accumulators = sp_accumulators break if linkattrs not in query_cache: query_cache[linkattrs] = db.flow.add_flow( ["Flow"], linkattrs, counters=COUNTERS, accumulators=accumulators) bulk.append(query_cache[linkattrs], rec) bulk.close() if not args.no_cleanup: db.flow.cleanup_flows()
def main(): """Update the flow database from log files""" parser, use_argparse = utils.create_argparser(__doc__, extraargs='files') if use_argparse: parser.add_argument('files', nargs='*', metavar='FILE', help='Files to import in the flow database') parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true") parser.add_argument("-t", "--type", help="file type", choices=list(PARSERS_CHOICE)) parser.add_argument("-f", "--pcap-filter", help="pcap filter to apply (when supported)") parser.add_argument("-C", "--no-cleanup", help="avoid port cleanup heuristics", action="store_true") args = parser.parse_args() if args.verbose: config.DEBUG = True query_cache = {} for fname in args.files: try: fileparser = PARSERS_CHOICE[args.type] except KeyError: with utils.open_file(fname) as fdesc: try: fileparser = PARSERS_MAGIC[fdesc.read(4)] except KeyError: utils.LOGGER.warning( 'Cannot find the appropriate parser for file %r', fname, ) continue bulk = db.flow.start_bulk_insert() with fileparser(fname, args.pcap_filter) as fdesc: for rec in fdesc: if not rec: continue linkattrs = ('proto', ) accumulators = {} for (fields, sp_linkattrs, sp_accumulators) in FIELD_REQUEST_EXT: if all(field in rec for field in fields): linkattrs = sp_linkattrs accumulators = sp_accumulators break if linkattrs not in query_cache: query_cache[linkattrs] = db.flow.add_flow( ["Flow"], linkattrs, counters=COUNTERS, accumulators=accumulators) bulk.append(query_cache[linkattrs], rec) bulk.close() if not args.no_cleanup: db.flow.cleanup_flows()
def main(): """Update the flow database from log files""" try: import argparse parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('files', nargs='*', metavar='FILE', help='Files to import in the flow database') except ImportError: import optparse parser = optparse.OptionParser(description=__doc__) parser.parse_args_orig = parser.parse_args def my_parse_args(): res = parser.parse_args_orig() res[0].ensure_value('files', res[1]) return res[0] parser.parse_args = my_parse_args parser.add_argument = parser.add_option parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true") parser.add_argument("-t", "--type", help="file type", choices=list(PARSERS_CHOICE)) parser.add_argument("-f", "--pcap-filter", help="pcap filter to apply (when supported)") parser.add_argument("-C", "--no-cleanup", help="avoid port cleanup heuristics", action="store_true") args = parser.parse_args() if args.verbose: config.DEBUG = True query_cache = {} for fname in args.files: try: fileparser = PARSERS_CHOICE[args.type] except KeyError: with utils.open_file(fname) as fdesc: try: fileparser = PARSERS_MAGIC[fdesc.read(4)] except KeyError: utils.LOGGER.warning( 'Cannot find the appropriate parser for file %r', fname, ) continue bulk = db.flow.start_bulk_insert() with fileparser(fname, args.pcap_filter) as fdesc: for rec in fdesc: if not rec: continue linkattrs = ('proto',) accumulators = {} for (fields, sp_linkattrs, sp_accumulators) in FIELD_REQUEST_EXT: if all(field in rec for field in fields): linkattrs = sp_linkattrs accumulators = sp_accumulators break if linkattrs not in query_cache: query_cache[linkattrs] = db.flow.add_flow( ["Flow"], linkattrs, counters=COUNTERS, accumulators=accumulators) bulk.append(query_cache[linkattrs], rec) bulk.close() if not args.no_cleanup: db.flow.cleanup_flows()