Beispiel #1
0
    def load_into_pandas(self, input_file, regen: bool =False):
        """
        intput_file can be filename or fd
        load csv mptpcp data into panda
        :param regen Ignore the cache and regenerate any cached csv file from the input pcap

        Returns:
            a panda.DataFrame
        """
        log.debug("Asked to load %s" % input_file)

        filename = os.path.expanduser(input_file)
        filename = os.path.realpath(filename)
        csv_filename = self.get_matching_csv_filename(filename, regen)

        temp = mp.get_fields("fullname", "type")
        dtypes = {k: v for k, v in temp.items() if v is not None}
        log.debug("Loading a csv file %s" % csv_filename)

        data = pd.read_csv(csv_filename, sep=self.config["DEFAULT"]["delimiter"],
            dtype=dtypes,
            converters={
                "tcp.flags": lambda x: int(x, 16),
            }
        )

        data.rename(inplace=True, columns=mp.get_fields("fullname", "name"))

        # pp = pprint.PrettyPrinter(indent=4)
        # log.debug("Dtypes after load:%s\n" % pp.pformat(data.dtypes))
        return data
Beispiel #2
0
    def get_matching_csv_filename(self, filename, force_regen : bool):
        """
        Name is bad, since the function can generate  the file if required
        Expects a realpath as filename
        Accept either a .csv or a .pcap file
        Returns realpath towards resulting csv filename
        """
        realpath = filename
        basename, ext = os.path.splitext(realpath)
        # print("Basename=%s" % basename)
        # csv_filename = filename

        if ext == ".csv":
            log.debug("Filename already has a .csv extension")
            csv_filename = realpath
        else:
            print("%s format is not supported as is. Needs to be converted first" %
                (filename))

            def matching_cache_filename(filename):
                """
                Expects a realpath else
                """
                # create a list of path elements (separated by system separator '/' or '\'
                # from the absolute filename
                l = os.path.realpath(filename).split(os.path.sep)
                res = os.path.join(self.config["DEFAULT"]["cache"], '%'.join(l))
                _, ext = os.path.splitext(filename)
                if ext != ".csv":
                    res += ".csv"
                return res

            # csv_filename = filename + ".csv"  #  str(Filetype.csv.value)
            csv_filename = matching_cache_filename(realpath)
            cache_is_invalid = True

            log.debug("Checking for %s" % csv_filename)
            if os.path.isfile(csv_filename):
                log.info("A cache %s was found" % csv_filename)
                ctime_cached = os.path.getctime(csv_filename)
                ctime_pcap = os.path.getctime(filename)
                # print(ctime_cached , " vs ", ctime_pcap)

                if ctime_cached > ctime_pcap:
                    log.debug("Cache seems valid")
                    cache_is_invalid = False
                else:
                    log.debug("Cache seems outdated")


            # if matching csv does not exist yet or if generation forced
            if force_regen or cache_is_invalid:

                # recursively create the directories
                log.debug("Creating cache directory [%s]" % self.config["DEFAULT"]["cache"])
                os.makedirs(self.config["DEFAULT"]["cache"], exist_ok=True)

                log.info("Preparing to convert %s into %s" %
                        (filename, csv_filename))

                exporter = TsharkExporter(
                        self.config["DEFAULT"]["tshark_binary"],
                        self.config["DEFAULT"]["delimiter"],
                        self.config["DEFAULT"]["wireshark_profile"],
                )

                retcode, stderr = exporter.export_to_csv(
                        filename,
                        csv_filename,
                        mp.get_fields("fullname", "name"),
                        tshark_filter="mptcp and not icmp"
                )
                log.info("exporter exited with code=", retcode)
                if retcode:
                    raise Exception(stderr)
        return csv_filename