Exemple #1
0
def worker(progname, directory, sensor=None):
    """This function is the main loop, creating the processes when
    needed and feeding them with the data from the files.

    """
    utils.makedirs(os.path.join(directory, "current"))
    procs = {}
    while not WANTDOWN:
        # We get the next file to handle
        fname = getnextfiles(directory, sensor=sensor, count=1)
        # ... if we don't, we sleep for a while
        if not fname:
            utils.LOGGER.debug("Sleeping for %d s", SLEEPTIME)
            time.sleep(SLEEPTIME)
            continue
        fname = fname[0]
        fname_sensor = fname.groupdict()['sensor']
        if fname_sensor in procs:
            proc = procs[fname_sensor]
        else:
            proc = create_process(progname, fname_sensor)
            procs[fname_sensor] = proc
        fname = fname.group()
        # Our "lock system": if we can move the file, it's ours
        try:
            shutil.move(os.path.join(directory, fname),
                        os.path.join(directory, "current"))
        except shutil.Error:
            continue
        if config.DEBUG:
            utils.LOGGER.debug("Handling %s", fname)
        fname = os.path.join(directory, "current", fname)
        fdesc = utils.open_file(fname)
        handled_ok = True
        for line in fdesc:
            try:
                proc.stdin.write(line)
            except ValueError:
                utils.LOGGER.warning(
                    "Error while handling line %r. "
                    "Trying again", line)
                proc = create_process(progname, fname_sensor)
                procs[fname_sensor] = proc
                # Second (and last) try
                try:
                    proc.stdin.write(line)
                    utils.LOGGER.warning("  ... OK")
                except ValueError:
                    handled_ok = False
                    utils.LOGGER.warning("  ... KO")
        fdesc.close()
        if handled_ok:
            os.unlink(fname)
            utils.LOGGER.debug('  ... OK')
        else:
            utils.LOGGER.debug('  ... KO')
    # SHUTDOWN
    for sensor in procs:
        procs[sensor].stdin.close()
        procs[sensor].wait()
Exemple #2
0
    def store_scan_json(self,
                        fname,
                        filehash=None,
                        needports=False,
                        needopenports=False,
                        categories=None,
                        source=None,
                        gettoarchive=None,
                        add_addr_infos=True,
                        force_info=False,
                        merge=False):
        """This method parses a JSON scan result as exported using
        `scancli --json > file`, displays the parsing result, and
        return True if everything went fine, False otherwise.

        In backend-specific subclasses, this method stores the result
        instead of displaying it, thanks to the `store_host`
        method.

        """
        if categories is None:
            categories = []
        need_scan_doc = False
        with utils.open_file(fname) as fdesc:
            for line in fdesc:
                host = self.json2dbrec(json.loads(line))
                for fname in ["_id"]:
                    if fname in host:
                        del host[fname]
                host["scanid"] = filehash
                if categories:
                    host["categories"] = categories
                if source is not None:
                    host["source"] = source
                if add_addr_infos and self.globaldb is not None and (
                        force_info or 'infos' not in host
                        or not host['infos']):
                    host['infos'] = {}
                    for func in [
                            self.globaldb.data.country_byip,
                            self.globaldb.data.as_byip,
                            self.globaldb.data.location_byip
                    ]:
                        data = func(host['addr'])
                        if data:
                            host['infos'].update(data)
                if ((not needports or 'ports' in host)
                        and (not needopenports
                             or host.get('openports', {}).get('count'))):
                    # We are about to insert data based on this file,
                    # so we want to save the scan document
                    need_scan_doc = True
                    if merge and self.merge_host(host):
                        pass
                    else:
                        self.archive_from_func(host, gettoarchive)
                        self.store_host(host)
        if need_scan_doc:
            self.store_scan_doc({'_id': filehash})
        return True
Exemple #3
0
def gunzip(fname: str) -> None:
    if not fname.endswith(".gz"):
        raise Exception('filename should end with ".gz"')
    assert config.GEOIP_PATH is not None
    with utils.open_file(os.path.join(config.GEOIP_PATH, fname)) as inp, open(
            os.path.join(config.GEOIP_PATH, fname[:-3]), "wb") as outp:
        copyfileobj(inp, outp)
def worker(progname, directory, sensor=None):
    """This function is the main loop, creating the processes when
    needed and feeding them with the data from the files.

    """
    utils.makedirs(os.path.join(directory, "current"))
    procs = {}
    while not WANTDOWN:
        # We get the next file to handle
        fname = getnextfiles(directory, sensor=sensor, count=1)
        # ... if we don't, we sleep for a while
        if not fname:
            utils.LOGGER.debug("Sleeping for %d s", SLEEPTIME)
            time.sleep(SLEEPTIME)
            continue
        fname = fname[0]
        fname_sensor = fname.groupdict()['sensor']
        if fname_sensor in procs:
            proc = procs[fname_sensor]
        else:
            proc = create_process(progname, fname_sensor)
            procs[fname_sensor] = proc
        fname = fname.group()
        # Our "lock system": if we can move the file, it's ours
        try:
            shutil.move(os.path.join(directory, fname),
                        os.path.join(directory, "current"))
        except shutil.Error:
            continue
        if config.DEBUG:
            utils.LOGGER.debug("Handling %s", fname)
        fname = os.path.join(directory, "current", fname)
        fdesc = utils.open_file(fname)
        handled_ok = True
        for line in fdesc:
            try:
                proc.stdin.write(line)
            except ValueError:
                utils.LOGGER.warning("Error while handling line %r. "
                                     "Trying again", line)
                proc = create_process(progname, fname_sensor)
                procs[fname_sensor] = proc
                # Second (and last) try
                try:
                    proc.stdin.write(line)
                    utils.LOGGER.warning("  ... OK")
                except ValueError:
                    handled_ok = False
                    utils.LOGGER.warning("  ... KO")
        fdesc.close()
        if handled_ok:
            os.unlink(fname)
            utils.LOGGER.debug('  ... OK')
        else:
            utils.LOGGER.debug('  ... KO')
    # SHUTDOWN
    for sensor in procs:
        procs[sensor].stdin.close()
        procs[sensor].wait()
Exemple #5
0
def gunzip(fname, clean=True):
    if not fname.endswith('.gz'):
        raise Exception('filename should end with ".gz"')
    with utils.open_file(os.path.join(config.GEOIP_PATH, fname)) as inp:
        with open(os.path.join(config.GEOIP_PATH, fname[:-3]), "wb") as outp:
            outp.write(inp.read())
    if clean:
        os.unlink(os.path.join(config.GEOIP_PATH, fname))
Exemple #6
0
def gunzip(fname: str, clean: bool = True) -> None:
    if not fname.endswith(".gz"):
        raise Exception('filename should end with ".gz"')
    assert config.GEOIP_PATH is not None
    with utils.open_file(os.path.join(config.GEOIP_PATH, fname)) as inp:
        with open(os.path.join(config.GEOIP_PATH, fname[:-3]), "wb") as outp:
            outp.write(inp.read())
    if clean:
        os.unlink(os.path.join(config.GEOIP_PATH, fname))
Exemple #7
0
    def store_scan_json(
        self,
        fname,
        filehash=None,
        needports=False,
        categories=None,
        source=None,
        gettoarchive=None,
        add_addr_infos=True,
        force_info=False,
        merge=False,
    ):
        """This method parses a JSON scan result as exported using
        `scancli --json > file`, displays the parsing result, and
        return True if everything went fine, False otherwise.

        In backend-specific subclasses, this method stores the result
        instead of displaying it, thanks to the `store_host`
        method.

        """
        if categories is None:
            categories = []
        with utils.open_file(fname) as fdesc:
            for line in fdesc:
                host = self.json2dbrec(json.loads(line))
                for fname in ["_id"]:
                    if fname in host:
                        del host[fname]
                host["scanid"] = filehash
                if categories:
                    host["categories"] = categories
                if source is not None:
                    host["source"] = source
                if (
                    add_addr_infos
                    and self.globaldb is not None
                    and (force_info or "infos" not in host or not host["infos"])
                ):
                    host["infos"] = {}
                    for func in [
                        self.globaldb.data.country_byip,
                        self.globaldb.data.as_byip,
                        self.globaldb.data.location_byip,
                    ]:
                        data = func(host["addr"])
                        if data:
                            host["infos"].update(data)
                if not needports or "ports" in host:
                    if merge and self.merge_host(host):
                        return True
                    self.archive_from_func(host, gettoarchive)
                    self.store_host(host)
            self.store_scan_doc({"_id": filehash})
        return True
Exemple #8
0
    def store_scan_json(self, fname, filehash=None,
                        needports=False, needopenports=False,
                        categories=None, source=None,
                        gettoarchive=None, add_addr_infos=True,
                        force_info=False, merge=False):
        """This method parses a JSON scan result as exported using
        `ivre scancli --json > file`, displays the parsing result, and
        return True if everything went fine, False otherwise.

        In backend-specific subclasses, this method stores the result
        instead of displaying it, thanks to the `store_host`
        method.

        """
        if categories is None:
            categories = []
        need_scan_doc = False
        with utils.open_file(fname) as fdesc:
            for line in fdesc:
                host = self.json2dbrec(json.loads(line))
                for fname in ["_id"]:
                    if fname in host:
                        del host[fname]
                host["scanid"] = filehash
                if categories:
                    host["categories"] = categories
                if source is not None:
                    host["source"] = source
                if add_addr_infos and self.globaldb is not None and (
                        force_info or 'infos' not in host or not host['infos']
                ):
                    host['infos'] = {}
                    for func in [self.globaldb.data.country_byip,
                                 self.globaldb.data.as_byip,
                                 self.globaldb.data.location_byip]:
                        data = func(host['addr'])
                        if data:
                            host['infos'].update(data)
                if ((not needports or 'ports' in host) and
                    (not needopenports or
                     host.get('openports', {}).get('count'))):
                    # We are about to insert data based on this file,
                    # so we want to save the scan document
                    need_scan_doc = True
                    if merge and self.merge_host(host):
                        pass
                    else:
                        self.archive_from_func(host, gettoarchive)
                        self.store_host(host)
        if need_scan_doc:
            self.store_scan_doc({'_id': filehash})
        return True
Exemple #9
0
    def sync(self, agent):
        """This function should only be called from `agent.sync()`
        method. It stores the results of terminated scans according to
        the target status.

        """
        for remfname in glob.glob(
            os.path.join(
                agent.get_local_path("remoteoutput"), self.visiblecategory + ".*.xml*"
            )
        ):
            locfname = os.path.basename(remfname).split(".", 4)
            locfname[0] = self.category
            status = "unknown"
            with utils.open_file(remfname) as remfdesc:
                remfcontent = remfdesc.read()
                if b'<status state="up"' in remfcontent:
                    status = "up"
                elif b'<status state="down"' in remfcontent:
                    if not self.storedown:
                        remfdesc.close()
                        os.unlink(remfname)
                        continue
                    status = "down"
                del remfcontent
            locfname = os.path.join(
                self.outputpath,
                locfname[0],
                status,
                re.sub("[/@:]", "_", agent.name),
                *locfname[1:],
            )
            utils.makedirs(os.path.dirname(locfname))
            os.rename(remfname, locfname)
        for remfname in glob.glob(
            os.path.join(
                agent.get_local_path("remotedata"), self.visiblecategory + ".*.tar*"
            )
        ):
            locfname = os.path.basename(remfname).split(".", 4)
            locfname[0] = self.category
            locfname = os.path.join(
                self.outputpath,
                locfname[0],
                "data",
                re.sub("[/@:]", "_", agent.name),
                *locfname[1:],
            )
            utils.makedirs(os.path.dirname(locfname))
            os.rename(remfname, locfname)
Exemple #10
0
def main():
    """Update the flow database from log files"""
    parser = ArgumentParser(description=__doc__)
    parser.add_argument("files",
                        nargs="*",
                        metavar="FILE",
                        help="Files to import in the flow database")
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true")
    parser.add_argument("-t",
                        "--type",
                        help="file type",
                        choices=list(PARSERS_CHOICE))
    parser.add_argument("-f",
                        "--pcap-filter",
                        help="pcap filter to apply (when supported)")
    parser.add_argument("-C",
                        "--no-cleanup",
                        help="avoid port cleanup heuristics",
                        action="store_true")
    args = parser.parse_args()

    if args.verbose:
        config.DEBUG = True

    for fname in args.files:
        try:
            fileparser = PARSERS_CHOICE[args.type]
        except KeyError:
            with utils.open_file(fname) as fdesc:
                try:
                    fileparser = PARSERS_MAGIC[fdesc.read(4)]
                except KeyError:
                    utils.LOGGER.warning(
                        "Cannot find the appropriate parser for file %r",
                        fname,
                    )
                    continue
        bulk = db.flow.start_bulk_insert()
        with fileparser(fname, args.pcap_filter) as fdesc:
            for rec in fdesc:
                if not rec:
                    continue
                db.flow.flow2flow(bulk, rec)
        db.flow.bulk_commit(bulk)

    if not args.no_cleanup:
        db.flow.cleanup_flows()
Exemple #11
0
    def sync(self, agent):
        """This function should only be called from `agent.sync()`
        method. It stores the results of terminated scans according to
        the target status.

        """
        for remfname in glob.glob(
                os.path.join(agent.get_local_path('remoteoutput'),
                             self.visiblecategory + '.*.xml*')
        ):
            locfname = os.path.basename(remfname).split('.', 4)
            locfname[0] = self.category
            status = 'unknown'
            with utils.open_file(remfname) as remfdesc:
                remfcontent = remfdesc.read()
                if b'<status state="up"' in remfcontent:
                    status = 'up'
                elif b'<status state="down"' in remfcontent:
                    if not self.storedown:
                        remfdesc.close()
                        os.unlink(remfname)
                        continue
                    status = 'down'
                del remfcontent
            locfname = os.path.join(
                self.outputpath,
                locfname[0],
                status,
                re.sub('[/@:]', '_', agent.name),
                *locfname[1:]
            )
            utils.makedirs(os.path.dirname(locfname))
            os.rename(remfname, locfname)
        for remfname in glob.glob(
                os.path.join(agent.get_local_path('remotedata'),
                             self.visiblecategory + '.*.tar*')
        ):
            locfname = os.path.basename(remfname).split('.', 4)
            locfname[0] = self.category
            locfname = os.path.join(
                self.outputpath,
                locfname[0],
                'data',
                re.sub('[/@:]', '_', agent.name),
                *locfname[1:]
            )
            utils.makedirs(os.path.dirname(locfname))
            os.rename(remfname, locfname)
Exemple #12
0
    def store_scan(self, fname, **kargs):
        """This method opens a scan result, and calls the appropriate
        store_scan_* method to parse (and store) the scan result.

        """
        scanid = utils.hash_file(fname, hashtype="sha256").hexdigest()
        if self.is_scan_present(scanid):
            if config.DEBUG:
                sys.stderr.write("WARNING: Scan already present in Database" " (%r).\n" % fname)
            return False
        with utils.open_file(fname) as fdesc:
            fchar = fdesc.read(1)
            try:
                store_scan_function = {"<": self.store_scan_xml, "{": self.store_scan_json}[fchar]
            except KeyError:
                raise ValueError("Unknown file type %s" % fname)
            return store_scan_function(fname, filehash=scanid, **kargs)
Exemple #13
0
    def __init__(self, fdesc, pcap_filter=None):
        """Creates the NetFlow object.

        fdesc: a file-like object or a filename
        pcap_filter: a PCAP filter to use with nfdump

        """
        cmd = ["nfdump", "-aq", "-o", self.fmt]
        cmdkargs = {}
        if isinstance(fdesc, basestring):
            with open(fdesc) as fde:
                if fde.read(2) not in utils.FileOpener.FILE_OPENERS_MAGIC:
                    cmd.extend(["-r", fdesc])
                else:
                    cmdkargs["stdin"] = utils.open_file(fdesc)
        else:
            cmdkargs["stdin"] = fdesc
        if pcap_filter is not None:
            cmd.append(pcap_filter)
        super(NetFlow, self).__init__(cmd, cmdkargs)
Exemple #14
0
    def __init__(self, fdesc, pcap_filter=None):
        """Creates the NetFlow object.

        fdesc: a file-like object or a filename
        pcap_filter: a PCAP filter to use with nfdump

        """
        cmd = ["nfdump", "-aq", "-o", self.fmt]
        cmdkargs = {}
        if isinstance(fdesc, basestring):
            with open(fdesc) as fde:
                if fde.read(2) not in utils.FileOpener.FILE_OPENERS_MAGIC:
                    cmd.extend(["-r", fdesc])
                else:
                    cmdkargs["stdin"] = utils.open_file(fdesc)
        else:
            cmdkargs["stdin"] = fdesc
        if pcap_filter is not None:
            cmd.append(pcap_filter)
        super(NetFlow, self).__init__(cmd, cmdkargs)
Exemple #15
0
    def store_scan(self, fname, **kargs):
        """This method opens a scan result, and calls the appropriate
        store_scan_* method to parse (and store) the scan result.

        """
        scanid = utils.hash_file(fname, hashtype="sha256").hexdigest()
        if self.is_scan_present(scanid):
            if config.DEBUG:
                sys.stderr.write("WARNING: Scan already present in Database"
                                 " (%r).\n" % fname)
            return False
        with utils.open_file(fname) as fdesc:
            fchar = fdesc.read(1)
            try:
                return {
                    '<': self.store_scan_xml,
                    '{': self.store_scan_json,
                }[fchar](fname, filehash=scanid, **kargs)
            except KeyError:
                raise ValueError("Unknown file type %s" % fname)
Exemple #16
0
    def store_scan_xml(self, fname, **kargs):
        """This method parses an XML scan result, displays a JSON
        version of the result, and return True if everything went
        fine, False otherwise.

        In backend-specific subclasses, this method stores the result
        instead of displaying it, thanks to the `content_handler`
        attribute.

        """
        parser = xml.sax.make_parser()
        try:
            content_handler = self.content_handler(fname, **kargs)
        except Exception as exc:
            sys.stderr.write(utils.warn_exception(exc, fname=fname))
        else:
            parser.setContentHandler(content_handler)
            parser.setEntityResolver(xmlnmap.NoExtResolver())
            parser.parse(utils.open_file(fname))
            content_handler.outputresults()
            return True
        return False
Exemple #17
0
    def store_scan_xml(self, fname, **kargs):
        """This method parses an XML scan result, displays a JSON
        version of the result, and return True if everything went
        fine, False otherwise.

        In backend-specific subclasses, this method stores the result
        instead of displaying it, thanks to the `content_handler`
        attribute.

        """
        parser = xml.sax.make_parser()
        try:
            content_handler = self.content_handler(fname, **kargs)
        except Exception as exc:
            sys.stderr.write(utils.warn_exception(exc, fname=fname))
        else:
            parser.setContentHandler(content_handler)
            parser.setEntityResolver(xmlnmap.NoExtResolver())
            parser.parse(utils.open_file(fname))
            content_handler.outputresults()
            return True
        return False
Exemple #18
0
    def __init__(self,
                 fdesc: Union[str, BinaryIO],
                 pcap_filter: Optional[str] = None) -> None:
        """Creates the NetFlow object.

        fdesc: a file-like object or a filename
        pcap_filter: a PCAP filter to use with nfdump

        """
        cmd = ["nfdump", "-aq", "-o", self.fmt]
        cmdkargs = {}
        if isinstance(fdesc, str):
            with open(fdesc, "rb") as fde:
                if fde.read(2) not in utils.FileOpener.FILE_OPENERS_MAGIC:
                    cmd.extend(["-r", fdesc])
                else:
                    cmdkargs["stdin"] = cast(BinaryIO, utils.open_file(fdesc))
        else:
            cmdkargs["stdin"] = fdesc
        if pcap_filter is not None:
            cmd.append(pcap_filter)
        super().__init__(cmd, cmdkargs)
Exemple #19
0
def main():
    """Update the flow database from log files"""
    try:
        import argparse
        parser = argparse.ArgumentParser(description=__doc__)
        parser.add_argument('files',
                            nargs='*',
                            metavar='FILE',
                            help='Files to import in the flow database')
    except ImportError:
        import optparse
        parser = optparse.OptionParser(description=__doc__)
        parser.parse_args_orig = parser.parse_args

        def my_parse_args():
            res = parser.parse_args_orig()
            res[0].ensure_value('files', res[1])
            return res[0]

        parser.parse_args = my_parse_args
        parser.add_argument = parser.add_option

    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true")
    parser.add_argument("-t",
                        "--type",
                        help="file type",
                        choices=PARSERS_CHOICE.keys())
    parser.add_argument("-f",
                        "--pcap-filter",
                        help="pcap filter to apply (when supported)")
    parser.add_argument("-C",
                        "--no-cleanup",
                        help="avoid port cleanup heuristics",
                        action="store_true")
    args = parser.parse_args()

    if args.verbose:
        config.DEBUG = True

    query_cache = {}
    for fname in args.files:
        try:
            fileparser = PARSERS_CHOICE[args.type]
        except KeyError:
            with utils.open_file(fname) as fdesc:
                try:
                    fileparser = PARSERS_MAGIC[fdesc.read(4)]
                except KeyError:
                    sys.stderr.write(
                        'WARNING: cannot find the appropriate parser for file'
                        ' %r\n' % fname)
                    continue
        bulk = db.flow.start_bulk_insert()
        with fileparser(fname, args.pcap_filter) as fdesc:
            for rec in fdesc:
                if not rec:
                    continue
                linkattrs = ('proto', )
                accumulators = {}
                for (fields, sp_linkattrs,
                     sp_accumulators) in FIELD_REQUEST_EXT:
                    if all(field in rec for field in fields):
                        linkattrs = sp_linkattrs
                        accumulators = sp_accumulators
                        break
                if linkattrs not in query_cache:
                    query_cache[linkattrs] = db.flow.add_flow(
                        ["Flow"],
                        linkattrs,
                        counters=COUNTERS,
                        accumulators=accumulators)
                bulk.append(query_cache[linkattrs], rec)
        bulk.close()

    if not args.no_cleanup:
        db.flow.cleanup_flows()
Exemple #20
0
def main():
    """Update the flow database from log files"""
    parser, use_argparse = utils.create_argparser(__doc__, extraargs='files')
    if use_argparse:
        parser.add_argument('files',
                            nargs='*',
                            metavar='FILE',
                            help='Files to import in the flow database')
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true")
    parser.add_argument("-t",
                        "--type",
                        help="file type",
                        choices=list(PARSERS_CHOICE))
    parser.add_argument("-f",
                        "--pcap-filter",
                        help="pcap filter to apply (when supported)")
    parser.add_argument("-C",
                        "--no-cleanup",
                        help="avoid port cleanup heuristics",
                        action="store_true")
    args = parser.parse_args()

    if args.verbose:
        config.DEBUG = True

    query_cache = {}
    for fname in args.files:
        try:
            fileparser = PARSERS_CHOICE[args.type]
        except KeyError:
            with utils.open_file(fname) as fdesc:
                try:
                    fileparser = PARSERS_MAGIC[fdesc.read(4)]
                except KeyError:
                    utils.LOGGER.warning(
                        'Cannot find the appropriate parser for file %r',
                        fname,
                    )
                    continue
        bulk = db.flow.start_bulk_insert()
        with fileparser(fname, args.pcap_filter) as fdesc:
            for rec in fdesc:
                if not rec:
                    continue
                linkattrs = ('proto', )
                accumulators = {}
                for (fields, sp_linkattrs,
                     sp_accumulators) in FIELD_REQUEST_EXT:
                    if all(field in rec for field in fields):
                        linkattrs = sp_linkattrs
                        accumulators = sp_accumulators
                        break
                if linkattrs not in query_cache:
                    query_cache[linkattrs] = db.flow.add_flow(
                        ["Flow"],
                        linkattrs,
                        counters=COUNTERS,
                        accumulators=accumulators)
                bulk.append(query_cache[linkattrs], rec)
        bulk.close()

    if not args.no_cleanup:
        db.flow.cleanup_flows()
Exemple #21
0
def main():
    """Update the flow database from log files"""
    try:
        import argparse
        parser = argparse.ArgumentParser(description=__doc__)
        parser.add_argument('files', nargs='*', metavar='FILE',
                            help='Files to import in the flow database')
    except ImportError:
        import optparse
        parser = optparse.OptionParser(description=__doc__)
        parser.parse_args_orig = parser.parse_args
        def my_parse_args():
            res = parser.parse_args_orig()
            res[0].ensure_value('files', res[1])
            return res[0]
        parser.parse_args = my_parse_args
        parser.add_argument = parser.add_option

    parser.add_argument("-v", "--verbose", help="verbose mode",
                        action="store_true")
    parser.add_argument("-t", "--type", help="file type",
                        choices=list(PARSERS_CHOICE))
    parser.add_argument("-f", "--pcap-filter",
                        help="pcap filter to apply (when supported)")
    parser.add_argument("-C", "--no-cleanup",
                        help="avoid port cleanup heuristics",
                        action="store_true")
    args = parser.parse_args()

    if args.verbose:
        config.DEBUG = True

    query_cache = {}
    for fname in args.files:
        try:
            fileparser = PARSERS_CHOICE[args.type]
        except KeyError:
            with utils.open_file(fname) as fdesc:
                try:
                    fileparser = PARSERS_MAGIC[fdesc.read(4)]
                except KeyError:
                    utils.LOGGER.warning(
                        'Cannot find the appropriate parser for file %r', fname,
                    )
                    continue
        bulk = db.flow.start_bulk_insert()
        with fileparser(fname, args.pcap_filter) as fdesc:
            for rec in fdesc:
                if not rec:
                    continue
                linkattrs = ('proto',)
                accumulators = {}
                for (fields, sp_linkattrs, sp_accumulators) in FIELD_REQUEST_EXT:
                    if all(field in rec for field in fields):
                        linkattrs = sp_linkattrs
                        accumulators = sp_accumulators
                        break
                if linkattrs not in query_cache:
                    query_cache[linkattrs] = db.flow.add_flow(
                        ["Flow"], linkattrs, counters=COUNTERS,
                        accumulators=accumulators)
                bulk.append(query_cache[linkattrs], rec)
        bulk.close()

    if not args.no_cleanup:
        db.flow.cleanup_flows()