def __init__(self, loader: TsharkConfig=None, **kwargs) -> None: super().__init__(df_name=kwargs.get("dest"), **kwargs) # if loader == None: self.loader = loader or TsharkConfig() completer_method = functools.partial(cmd2.Cmd.path_complete, path_filter=lambda path: os.path.isfile(path)) setattr(self, ATTR_CHOICES_CALLABLE, ChoicesCallable(is_method=True, is_completer=True, to_call=completer_method,))
def __init__( self, name: str, protocol: Protocol, loader=None, **kwargs ) -> None: """ """ self.loader = loader or TsharkConfig() self.protocol = protocol DataframeAction.__init__(self, df_name=name, **kwargs)
def main(): # https://docs.python.org/3/library/argparse.html#module-argparse # http://tricksntweaks.blogspot.be/2013/05/advance-argument-parsing-in-python.html parser = argparse.ArgumentParser( description='Generate MPTCP stats & plots', fromfile_prefix_chars='@', ) parser.add_argument('--relative', action="store_true", help="set to export relative TCP seq number") parser.add_argument('--tshark', dest="tshark_exe", action="store", default="tshark", help="Path to shark binary") parser.add_argument( '--profile', dest="tshark_exe", action="store", default=None, help="Wireshark profile which contains many options to customize output" ) # TODO tshark.py devrait plutot accepter des streams # argparse.FileType('r') # parser.add_argument('xpconfig', default="tests.ini", action="store", type=str, help="Config filename. Describe experiment settings") # parser.add_argument('inputPcap', action="store", help="src IP") pcap_parser = argparse.ArgumentParser( description='Expecting pcap file as input', add_help=False, ) pcap_parser.add_argument('inputPcap', action="store", help="Input pcap") subparsers = parser.add_subparsers(dest="subparser_name", title="Subparsers", help='sub-command help') subparser_csv = subparsers.add_parser('pcap2csv', parents=[pcap_parser], help='Converts pcap to a csv file') # subparser_csv.add_argument('inputPcap', action="store", help="Input pcap") subparser_csv.add_argument('--output', "-o", action="store", help="csv filename") subparser_csv.add_argument('--filter', "-f", action="store", help="Filter", default="") subparser_csv.add_argument( 'fields_filename', type=argparse.FileType('r'), action="store", help="json file mapping name to their wireshark name") # List MPTCP connections and subflows sp_csv2sql = subparsers.add_parser( 'csv2sql', help='Imports csv file to an sqlite database') sp_csv2sql.add_argument('inputCsv', action="store", help="Input Csv") sp_csv2sql.add_argument('output', nargs="?", action="store", help="db filename") sp_pcap2sql = subparsers.add_parser( 'pcap2sql', help='Converts pcap to an sqlite database') sp_pcap2sql.add_argument('inputPcap', action="store", help="Input pcap") sp_pcap2sql.add_argument('output', nargs="?", action="store", help="db filename") args = parser.parse_args(sys.argv[1:]) exporter = TsharkConfig(tshark_exe, profile=args.profile) # exporter.tcp_relative_seq = args.relative if args.relative else True exporter.tcp_relative_seq = args.relative # exporter.fields_to_export = fields_to_export log.debug("Relative #seq = %s" % exporter.tcp_relative_seq) if args.subparser_name == "pcap2csv": inputFilename = args.inputPcap outputFilename = args.output if args.output else get_basename( inputFilename, "csv") fields_to_export = load_fields_to_export_from_file( args.fields_filename) exporter.filter = args.filter print(fields_to_export) exporter.export_pcap_to_csv(inputFilename, outputFilename, fields_to_export) elif args.subparser_name == "csv2sql": inputFilename = args.inputCsv outputFilename = get_basename(inputFilename, "sqlite") convert_csv_to_sql(inputFilename, outputFilename, "connections") elif args.subparser_name == "pcap2sql": inputFilename = args.inputPcap outputFilename = get_basename(inputFilename, "sqlite") exporter.export_pcap_to_sql(inputFilename, outputFilename) else: parser.print_help()
def __init__(self, cfg: MpTcpAnalyzerConfig, stdin=sys.stdin, **kwargs) -> None: """ Args: cfg (MpTcpAnalyzerConfig): A valid configuration Attributes: prompt (str): Prompt seen by the user, displays currently loaded pcpa config: configution to get user parameters data: dataframe currently in use """ self.shortcuts.update({ 'lc': 'list_connections', 'ls': 'list_subflows', 'lr': 'list_reinjections' }) super().__init__(completekey='tab', stdin=stdin) self.prompt = FG_COLORS['blue'] + "Ready>" + color_off self.data = None # type: pd.DataFrame self.config = cfg self.tshark_config = TsharkConfig( delimiter=cfg["mptcpanalyzer"]["delimiter"], profile=cfg["mptcpanalyzer"]["wireshark_profile"], ) # cmd2 specific initialization self.abbrev = True # when no ambiguities, run the command self.allow_cli_args = True # disable autoload of transcripts self.allow_redirection = True # allow pipes in commands self.default_to_shell = False self.debug = True # for now self.set_posix_shlex = True # need cmd2 >= 0.8 # Load Plots ###################### # you can list available plots under the namespace # https://pypi.python.org/pypi/entry_point_inspector # https://docs.openstack.org/stevedore/latest/reference/index.html#stevedore.extension.ExtensionManager # mgr = driver.DriverManager( self.plot_mgr = extension.ExtensionManager( namespace='mptcpanalyzer.plots', invoke_on_load=True, verify_requirements=True, invoke_args=(self.tshark_config,), # invoke_kwds propagate_map_exceptions=True, on_load_failure_callback=self.stevedore_error_handler ) self.cmd_mgr = extension.ExtensionManager( namespace='mptcpanalyzer.cmds', invoke_on_load=True, verify_requirements=True, invoke_args=(), propagate_map_exceptions=False, on_load_failure_callback=self.stevedore_error_handler ) # do_plot parser ###################### # not my first choice but to accomodate cmd2 constraints # see https://github.com/python-cmd2/cmd2/issues/498 subparsers = MpTcpAnalyzerCmdApp.plot_parser.add_subparsers(dest="plot_type", title="Subparsers", help='sub-command help',) subparsers.required = True # type: ignore def register_plots(ext, subparsers): """Adds a parser per plot""" # check if dat is loaded parser = ext.obj.default_parser() assert parser, "Forgot to return parser" subparsers.add_parser(ext.name, parents=[parser], add_help=False) self.plot_mgr.map(register_plots, subparsers) # # will raise NoMatches when no plot available # if loading commands from a file, we disable prompt not to pollute output if stdin != sys.stdin: log.info("Disabling prompt because reading from stdin") self.use_rawinput = False self.prompt = "" self.intro = "" """ The optional arguments stdin and stdout specify the input and output file objects that the Cmd instance or subclass instance will use for input and output. If not specified, they will default to sys.stdin and sys.stdout. """ print("WARNING: mptcpanalyzer may require a custom wireshark. " "Check github for mptcp patches streaming.")
class MpTcpAnalyzerCmdApp(cmd2.Cmd): """ mptcpanalyzer can run into 3 modes: #. interactive mode (default): an interpreter with some basic completion will accept your commands. There is also some help embedded. #. if a filename is passed as argument, it will load commands from this file otherwise, it will consider the unknow arguments as one command, the same that could be used interactively """ intro = textwrap.dedent(""" Press ? to list the available commands and `help <command>` or `<command> -h` for a detailed help of the command """.format(__version__)) def stevedore_error_handler(manager, entrypoint, exception): print("Error while loading entrypoint [%s]" % entrypoint) def __init__(self, cfg: MpTcpAnalyzerConfig, stdin=sys.stdin, **kwargs) -> None: """ Args: cfg (MpTcpAnalyzerConfig): A valid configuration Attributes: prompt (str): Prompt seen by the user, displays currently loaded pcpa config: configution to get user parameters data: dataframe currently in use """ self.shortcuts.update({ 'lc': 'list_connections', 'ls': 'list_subflows', 'lr': 'list_reinjections' }) super().__init__(completekey='tab', stdin=stdin) self.prompt = FG_COLORS['blue'] + "Ready>" + color_off self.data = None # type: pd.DataFrame self.config = cfg self.tshark_config = TsharkConfig( delimiter=cfg["mptcpanalyzer"]["delimiter"], profile=cfg["mptcpanalyzer"]["wireshark_profile"], ) # cmd2 specific initialization self.abbrev = True # when no ambiguities, run the command self.allow_cli_args = True # disable autoload of transcripts self.allow_redirection = True # allow pipes in commands self.default_to_shell = False self.debug = True # for now self.set_posix_shlex = True # need cmd2 >= 0.8 # Load Plots ###################### # you can list available plots under the namespace # https://pypi.python.org/pypi/entry_point_inspector # https://docs.openstack.org/stevedore/latest/reference/index.html#stevedore.extension.ExtensionManager # mgr = driver.DriverManager( self.plot_mgr = extension.ExtensionManager( namespace='mptcpanalyzer.plots', invoke_on_load=True, verify_requirements=True, invoke_args=(self.tshark_config,), # invoke_kwds propagate_map_exceptions=True, on_load_failure_callback=self.stevedore_error_handler ) self.cmd_mgr = extension.ExtensionManager( namespace='mptcpanalyzer.cmds', invoke_on_load=True, verify_requirements=True, invoke_args=(), propagate_map_exceptions=False, on_load_failure_callback=self.stevedore_error_handler ) # do_plot parser ###################### # not my first choice but to accomodate cmd2 constraints # see https://github.com/python-cmd2/cmd2/issues/498 subparsers = MpTcpAnalyzerCmdApp.plot_parser.add_subparsers(dest="plot_type", title="Subparsers", help='sub-command help',) subparsers.required = True # type: ignore def register_plots(ext, subparsers): """Adds a parser per plot""" # check if dat is loaded parser = ext.obj.default_parser() assert parser, "Forgot to return parser" subparsers.add_parser(ext.name, parents=[parser], add_help=False) self.plot_mgr.map(register_plots, subparsers) # # will raise NoMatches when no plot available # if loading commands from a file, we disable prompt not to pollute output if stdin != sys.stdin: log.info("Disabling prompt because reading from stdin") self.use_rawinput = False self.prompt = "" self.intro = "" """ The optional arguments stdin and stdout specify the input and output file objects that the Cmd instance or subclass instance will use for input and output. If not specified, they will default to sys.stdin and sys.stdout. """ print("WARNING: mptcpanalyzer may require a custom wireshark. " "Check github for mptcp patches streaming.") @property def plot_manager(self): return self.plot_mgr @plot_manager.setter def plot_manager(self, mgr): """ Override the default plot manager, only used for testing :param mgr: a stevedore plugin manager """ self.plot_mgr = mgr def load_plugins(self, mgr=None): """ This function monkey patches the class to inject Command plugins Attrs: mgr: override the default plugin manager when set. Useful to run tests """ mgr = mgr if mgr is not None else self.cmd_mgr def _inject_cmd(ext, data): log.debug("Injecting plugin %s" % ext.name) for prefix in ["do", "help", "complete"]: method_name = prefix + "_" + ext.name try: obj = getattr(ext.obj, prefix) if obj: setattr(MpTcpAnalyzerCmdApp, method_name, obj) except AttributeError: log.debug("Plugin does not provide %s" % method_name) # there is also map_method available try: mgr.map(_inject_cmd, self) except stevedore.exception.NoMatches as e: log.error("stevedore: No matches (%s)" % e) def precmd(self, line): """ Here we can preprocess line, with for instance shlex.split() ? Note: This is only called when using cmdloop, not with onecmd ! """ # default behavior print(">>> %s" % line) return line def cmdloop(self, intro=None): """ overrides baseclass just to be able to catch exceptions """ try: super().cmdloop() except KeyboardInterrupt as e: pass # Exception raised by sys.exit(), which is called by argparse # we don't want the program to finish just when there is an input error except SystemExit as e: self.cmdloop() except mp.MpTcpException as e: print(e) self.cmdloop() except Exception as e: log.critical("Unknown error, aborting...") log.critical("%s" % e) print("Displaying backtrace:\n") traceback.print_exc() def postcmd(self, stop, line): """ Override baseclass returning true will stop the program """ log.debug("postcmd result for line [%s] => %r", line, stop) return True if stop is True else False parser = MpTcpAnalyzerParser(description="List subflows of an MPTCP connection") filter_stream = parser.add_argument("mptcpstream", action="store", type=int, help="Equivalent to wireshark mptcp.stream id") # TODO for tests only, fix setattr(filter_stream, argparse_completer.ACTION_ARG_CHOICES, [0, 1, 2]) @with_argparser(parser) @with_category(CAT_MPTCP) @is_loaded def do_list_subflows(self, args): """ list mptcp subflows [mptcp.stream id] Example: ls 0 """ self.list_subflows(args.mptcpstream) @is_loaded def list_subflows(self, mptcpstreamid: int): try: con = MpTcpConnection.build_from_dataframe(self.data, mptcpstreamid) self.poutput("mptcp.stream %d has %d subflow(s) (client/server): " % (mptcpstreamid, len(con.subflows()))) for sf in con.subflows(): self.poutput("\t%s" % sf) except mp.MpTcpException as e: self.perror(e) # def help_list_subflows(self): # print("Use parser -h") # def complete_list_subflows(self, text, line, begidx, endidx): # """ help to complete the args """ # # conversion to set removes duplicate keys # l = list(set(self.data["mptcpstream"])) # # convert items to str else it won't be used for completion # l = [str(x) for x in l] # return l # parser = gen_pcap_parser({"pcap": PreprocessingActions.FilterStream | PreprocessingActions.Merge }, protocol="tcp") parser = argparse_completer.ACArgumentParser( description=''' This function tries to map a tcp.stream id from one pcap to one in another pcap in another dataframe. ''' ) # TODO could use LoadSinglePcap load_pcap1 = parser.add_argument("pcap1", action="store", help="first to load") load_pcap2 = parser.add_argument("pcap2", action="store", help="second pcap") # cmd2.Cmd.path_complete ? # setattr(action_stream, argparse_completer.ACTION_ARG_CHOICES, range(0, 10)) # use path_filter setattr(load_pcap1, argparse_completer.ACTION_ARG_CHOICES, ('path_complete', )) setattr(load_pcap2, argparse_completer.ACTION_ARG_CHOICES, ('path_complete', )) parser.add_argument("tcpstreamid", action="store", type=int, help="tcp.stream id visible in wireshark for pcap1") parser.add_argument("--json", action="store_true", default=False, help="Machine readable summary.") parser.add_argument( '-v', '--verbose', dest="verbose", default=False, action="store_true", help="how to display each connection") parser.epilog = ''' Examples: map_tcp_connection examples/client_1_tcp_only.pcap examples/server_1_tcp_only.pcap 0 ''' @with_argparser(parser) @with_category(CAT_TCP) def do_map_tcp_connection(self, args): df1 = load_into_pandas(args.pcap1, self.tshark_config) df2 = load_into_pandas(args.pcap2, self.tshark_config) main_connection = TcpConnection.build_from_dataframe(df1, args.tcpstreamid) mappings = map_tcp_stream(df2, main_connection) self.poutput("Trying to map %s" % (main_connection,)) self.poutput("%d mapping(s) found" % len(mappings)) for match in mappings: # formatted_output = main.format_mapping(match) # output = "{c1.tcpstreamid} <-> {c2.tcpstreamid} with score={score}" # formatted_output = output.format( # c1=main_connection, # c2=match, # score=score # ) # print(formatted_output) self.poutput("%s" % str(match)) parser = MpTcpAnalyzerParser( description="This function tries to map a mptcp.stream from a dataframe" "(aka pcap) to mptcp.stream" "in another dataframe. " ) load_pcap1 = parser.add_argument("pcap1", action="store", type=str, help="first to load") load_pcap2 = parser.add_argument("pcap2", action="store", type=str, help="second pcap") setattr(load_pcap1, argparse_completer.ACTION_ARG_CHOICES, ('path_complete', )) setattr(load_pcap2, argparse_completer.ACTION_ARG_CHOICES, ('path_complete', )) parser.add_argument("mptcpstreamid", action="store", type=int, help="to filter") parser.add_argument("--trim", action="store", type=float, default=0, help="Remove mappings with a score below this threshold") parser.add_argument("--limit", action="store", type=int, default=2, help="Limit display to the --limit best mappings") parser.add_argument( '-v', '--verbose', dest="verbose", default=False, action="store_true", help="display all candidates") @with_argparser(parser) @with_category(CAT_MPTCP) @experimental def do_map_mptcp_connection(self, args): """ Tries to map mptcp.streams from different pcaps. Score based mechanism Todo: - Limit number of displayed matches """ df1 = load_into_pandas(args.pcap1, self.tshark_config) df2 = load_into_pandas(args.pcap2, self.tshark_config) main_connection = MpTcpConnection.build_from_dataframe(df1, args.mptcpstreamid) mappings = map_mptcp_connection(df2, main_connection) self.poutput("%d mapping(s) found" % len(mappings)) mappings.sort(key=lambda x: x.score, reverse=True) for rank, match in enumerate(mappings): if rank >= args.limit: self.pfeedback("ignoring mappings left") break winner_like = match.score == float('inf') output = "{c1.mptcpstreamid} <-> {c2.mptcpstreamid} with score={score} {extra}" formatted_output = output.format( c1=main_connection, c2=match.mapped, score=FG_COLORS['red'] + str(match.score) + color_off, extra= " <-- should be a correct match" if winner_like else "" ) if match.score < args.trim: continue # match = MpTcpMapping(match.mapped, match.score, mapped_subflows) def _print_subflow(x): return "\n-" + x[0].format_mapping(x[1]) formatted_output += ''.join( [ _print_subflow(x) for x in match.subflow_mappings]) self.poutput(formatted_output) # def parser_summary(): # """ """ # pass summary_parser = MpTcpAnalyzerParser(description="Prints a summary of the mptcp connection") action_stream = summary_parser.add_argument( "mptcpstream", type=MpTcpStreamId, action=mp.parser.retain_stream("pcap"), help="mptcp.stream id") # TODO update the stream id autcompletion dynamically ? # setattr(action_stream, argparse_completer.ACTION_ARG_CHOICES, range(0, 10)) summary_parser.add_argument( 'destination', # mp.DestinationChoice, action="store", choices=mp.DestinationChoice, type=lambda x: mp.ConnectionRoles[x], help='Filter flows according to their direction' '(towards the client or the server)' 'Depends on mptcpstream' ) summary_parser.add_argument("--json", action="store_true", default=False, help="Machine readable summary.") @with_argparser_test(summary_parser, preload_pcap=True) @is_loaded def do_summary(self, args, unknown): """ Naive summary contributions of the mptcp connection See summary_extended for more details """ df = self.data # myNs = Namespace() # myNs._dataframes = { "pcap": self.data } # args = parser.parse_args(args, myNs) mptcpstream = args.mptcpstream success, ret = stats.mptcp_compute_throughput( self.data, args.mptcpstream, args.destination ) if success is not True: self.perror("Throughput computation failed:") self.perror(ret) return if args.json: import json # TODO use self.poutput # or use a stream, it must just be testable val = json.dumps(ret, ensure_ascii=False) self.poutput(val) return mptcp_transferred = ret["mptcp_throughput_bytes"] self.poutput("mptcpstream %d transferred %d bytes." % (ret["mptcpstreamid"], mptcp_transferred)) for tcpstream, sf_bytes in map(lambda sf: (sf["tcpstreamid"], sf["throughput_bytes"]), ret["subflow_stats"]): subflow_load = sf_bytes/mptcp_transferred self.poutput("tcpstream {} transferred {sf_tput} bytes out of {mptcp_tput}, " "accounting for {tput_ratio:.2f}%".format( tcpstream, sf_tput=sf_bytes, mptcp_tput=mptcp_transferred, tput_ratio=subflow_load*100 )) parser = gen_pcap_parser({"pcap": PreprocessingActions.Preload}) parser.description = "Export connection(s) to CSV" parser.epilog = ''' ''' # faut qu'il prenne le pcap ici sinon je ne peux pas autofiltrer : parser.add_argument("output", action="store", help="Output filename") group = parser.add_mutually_exclusive_group(required=False) group.add_argument('--tcpstream', action=functools.partial(FilterStream, "pcap", False), type=TcpStreamId) group.add_argument('--mptcpstream', action=functools.partial(FilterStream, "pcap", True), type=MpTcpStreamId) # parser.add_argument("protocol", action="store", choices=["mptcp", "tcp"], help="tcp.stream id visible in wireshark") # TODO check ? parser.add_argument("--destination", action="store", choices=mp.DestinationChoice, help="tcp.stream id visible in wireshark") parser.add_argument("--drop-syn", action="store_true", default=False, help="Helper just for my very own specific usecase") @is_loaded @with_argparser(parser) def do_tocsv(self, args): """ Selects tcp/mptcp/udp connection and exports it to csv """ df = self.data # TODO let the parser do it # if args.tcpstream: # # df = df[ df.tcpstream == args.tcpstream] # self.poutput("Filtering tcpstream") # con = TcpConnection.build_from_dataframe(df, args.tcpstream) # if args.destination: # self.poutput("Filtering destination") # q = con.generate_direction_query(args.destination) # df = df.query(q) # elif args.mptcpstream: # self.poutput("Unsupported yet") # df = df[ df.mptcpstream == args.mptcpstream] # need to compute the destinations before dropping syn from the dataframe # df['tcpdest'] = np.nan; for streamid, subdf in df.groupby("tcpstream"): con = TcpConnection.build_from_dataframe(df, streamid) df = mpdata.tcpdest_from_connections(df, con) if args.drop_syn: # use subdf ? self.poutput("drop-syn Unsupported yet") df.drop(subdf.head(3).index, inplace=True) # drop 3 first packets of each connection ? # this should be a filter syns = df[df.tcpflags == mp.TcpFlags.SYN] # df = df[ df.flags ] # if args.destination: # if args.tcpstream: # TODO we should filter destination self.poutput("Writing to %s" % args.output) pandas_to_csv(df, args.output) parser = gen_bicap_parser("mptcp", True) parser.add_argument("--json", action="store_true", default=False, help="Machine readable summary.") parser.description = """ Look into more details of an mptcp connection """ parser.epilog = """ summary_extended examples/client_2_redundant.pcapng 0 examples/server_2_redundant.pcapng 0 """ @with_argparser(parser) def do_summary_extended(self, args): """ Summarize contributions of each subflow For now it is naive, does not look at retransmissions ? """ print("%r" % args) df_pcap1 = load_into_pandas(args.pcap1, self.tshark_config) destinations = args.destinations # or list(mp.ConnectionRoles) for destination in destinations: success, basic_stats = stats.mptcp_compute_throughput( # TODO here we should load the pcap before hand ! df_pcap1, args.pcap1stream, args.destinations ) if success is not True: self.perror("Error %s" % basic_stats) # TODO already be done # TODO we should have the parser do it df = load_merged_streams_into_pandas( args.pcap1, args.pcap2, args.pcap1stream, args.pcap2stream, True, self.tshark_config ) success, ret = stats.mptcp_compute_throughput_extended( df, stats=basic_stats, destination=destination ) if success is not True: self.perror("Throughput computation failed:") self.perror(ret) return if args.json: import json # TODO use self.poutput # or use a stream, it must just be testable val = json.dumps(ret, ensure_ascii=False) self.poutput(val) return # TODO display goodput/ratio total_transferred = ret["mptcp_throughput_bytes"] # (ret["mptcpstreamid"], ret["mptcp_bytes"])) msg = "mptcpstream {mptcpstreamid} throughput/goodput {mptcp_throughput_bytes}/{mptcp_goodput_bytes}" self.poutput(msg.format(**ret)) for sf in ret["subflow_stats"]: subflow_load = sf_bytes/ret["mptcp_bytes"] msg = """ tcpstream {tcpstreamid} analysis: - throughput: transferred {} out of {mptcp_throughput_bytes}, accounting for {.2f:throughput_contribution}% - goodput: transferred {mptcp_goodput} out of {mptcp_goodput_bytes}, accounting for {.2f:goodput_contribution}% """ self.poutput( msg.format( mptcp_tput=ret["mptcp_throughput_bytes"], **ret, **sf )) # @is_loaded @with_category(CAT_TCP) def do_list_tcp_connections(self, *args): """ List tcp connections via their ids (tcp.stream) """ streams = self.data.groupby("tcpstream") self.poutput('%d tcp connection(s)' % len(streams)) for tcpstream, group in streams: # self.list_subflows(mptcpstream) self.data.tcp.connection(tcpstream) con = TcpConnection.build_from_dataframe(self.data, tcpstream) self.poutput(con) self.poutput("\n") @is_loaded @with_category(CAT_MPTCP) def do_list_mptcp_connections(self, *args): """ List mptcp connections via their ids (mptcp.stream) """ streams = self.data.groupby("mptcpstream") self.poutput('%d mptcp connection(s)' % len(streams)) for mptcpstream, group in streams: self.list_subflows(mptcpstream) self.poutput("\n") # def generate_namespace(self) -> argparse.Namespace: # myNamespace = Namespace() # myNamespace.toto = self.data # parser = argparse_completer.ACArgumentParser( # description=""" # Mptcpanalyzer filters pcaps to keep only tcp packets. # This may explain why printed packet ids dont map # """ # ) load_pcap1 = parser.add_argument("imported_pcap", type=str, help="Capture file to cleanup.") setattr(load_pcap1, argparse_completer.ACTION_ARG_CHOICES, ('path_complete', )) parser.add_argument("exported_pcap", type=str, help="Cleaned up file") @with_argparser(parser) def do_clean_pcap(self, args): """ toto """ self.poutput("Exporting a clean version of {} in {}".format( args.imported_pcap, args.exported_pcap)) self.tshark_config.filter_pcap(args.imported_pcap, args.exported_pcap) # TODO it should be able to print for both parser = gen_bicap_parser("tcp", True) parser.description = """This function tries merges a tcp stream from 2 pcaps in an attempt to print owds. See map_tcp_connection first maybe.""" # TODO add a limit of packets or use ppaged() # parser.add_argument("protocol", action="store", choices=["mptcp", "tcp"], # help="tcp.stream id visible in wireshark") # give a choice "hash" / "stochastic" parser.add_argument( '-v', '--verbose', dest="verbose", default=False, action="store_true", help="how to display each connection" ) parser.add_argument("--csv", action="store", default=None, help="Machine readable summary.") parser.epilog = ''' You can run for example: map_tcp_connection examples/client_1_tcp_only.pcap examples/server_1_tcp_only.pcap 0 ''' @with_argparser(parser) @experimental def do_print_owds(self, args): """ TODO options to diagnose errors: - print unmapped packets - print abnormal OWDs (negative etc) """ self.poutput("Loading merged streams") df = args._dataframes["pcap"] result = df print(result.head(10)) # print("%r" % result) # print(result[mpdata.TCP_DEBUG_FIELDS].head(20)) # for key, subdf in df.groupby(_sender("tcpdest")) # todo sort by chronological order ? # for row in df.itertuples(); # self.ppaged() if args.csv: self.pfeedback("Exporting to csv") with open(args.csv, "w") as fd: df.to_csv( fd, sep="|", index=False, header=True, ) # print unmapped packets print("print_owds finished") # print("TODO display before doing plots") # TODO display errors print(result[["owd"]].head(20)) # print(result.columns) mpdata.print_weird_owds(result) # print(result[["owd"]].head(20)) def do_check_tshark(self, line): """ Check your tshark/wireshark version """ self.poutput("TODO implement automated check") self.poutput("you need a wireshark > 19 June 2018 with commit dac91db65e756a3198616da8cca11d66a5db6db7...") parser = gen_bicap_parser("mptcp", dest=True) parser.description = """ Qualify reinjections of the connection. You might want to run map_mptcp_connection first to find out what map to which """ parser.add_argument("--failed", action="store_true", default=False, help="List failed reinjections too.") parser.add_argument("--csv", action="store_true", default=False, help="Machine readable summary.") parser.add_argument("--debug", action="store_true", default=False, help="Explain decision for every reinjection.") @with_argparser_and_unknown_args(parser) @with_category(CAT_MPTCP) @experimental def do_qualify_reinjections(self, args, unknown): """ test with: mp qualify_reinjections 0 TODO move the code into a proper function """ # TODO this should be done automatically right ? df_all = load_merged_streams_into_pandas( args.pcap1, args.pcap2, args.pcap1stream, args.pcap2stream, mptcp=True, tshark_config=self.tshark_config ) # adds a redundant column df = classify_reinjections(df_all) # print(df_all[ pd.notnull(df_all[_sender("reinjection_of")])] [ # _sender(["reinjection_of", "reinjected_in", "packetid", "reltime"]) + # _receiver(["packetid", "reltime"]) # ]) # to help debug # df.to_excel("temp.xls") def _print_reinjection_comparison(original_packet, reinj, ): """ Expects tuples of original and reinjection packets """ # original_packet = sender_df.loc[ sender_df.packetid == initial_packetid, ].iloc[0] row = reinj reinjection_packetid = getattr(row, _sender("packetid")), reinjection_start = getattr(row, _sender("abstime")), reinjection_arrival = getattr(row, _receiver("abstime")), original_start = original_packet[_sender("abstime")], original_arrival = original_packet[_receiver("abstime")] if reinj.redundant == False: # print(original_packet["packetid"]) msg = ("packet {pktid} is a successful reinjection of {initial_packetid}." " It arrived at {reinjection_arrival} to compare with {original_arrival}" " while being transmitted at {reinjection_start} to compare with " "{original_start}, i.e., {reinj_delta} before") # TODO use assert instead if getattr(row, _receiver("abstime")) > original_packet[ _receiver("abstime") ]: print("BUG: this is not a valid reinjection after all ?") elif args.failed: # only de msg = "packet {pktid} is a failed reinjection of {initial_packetid}." else: return msg = msg.format( pktid = reinjection_packetid, initial_packetid = initial_packetid, reinjection_start = reinjection_start, reinjection_arrival = reinjection_arrival, original_start = original_start, original_arrival = original_arrival, reinj_delta = reinj.reinj_delta, ) self.poutput(msg) # with pd.option_context('display.max_rows', None, 'display.max_columns', 300): # print(reinjected_packets[["packetid", "packetid_receiver", *_receiver(["reinjected_in", "reinjection_of"])]].head()) # TODO filter depending on --failed and --destinations if args.csv: self.pfeedback("Exporting to csv") # keep redundant # only export a subset ? # for # df1 = df[['a','d']] # smalldf = df.drop() columns = _sender(["abstime", "reinjection_of", "reinjected_in", "packetid", "tcpstream", "mptcpstream", "tcpdest", "mptcpdest"]) columns += _receiver(["abstime", "packetid"]) columns += ["redundant", "owd", "reinj_delta"] df[columns].to_csv( self.stdout, sep="|", index=False, header=True, ) return for destination in ConnectionRoles: if args.destinations and destination not in args.destinations: log.debug("ignoring destination %s " % destination) continue self.poutput("looking for reinjections towards mptcp %s" % destination) sender_df = df[df.mptcpdest == destination] log.debug("%d reinjections in that direction" % (len(sender_df), )) # TODO we now need to display successful reinjections reinjections = sender_df[pd.notnull(sender_df[_sender("reinjection_of")])] successful_reinjections = reinjections[reinjections.redundant == False] self.poutput("%d successful reinjections" % len(successful_reinjections)) # print(successful_reinjections[ _sender(["packetid", "reinjection_of"]) + _receiver(["packetid"]) ]) for row in reinjections.itertuples(index=False): # loc ? this is an array, sort it and take the first one ? initial_packetid = row.reinjection_of[0] # print("initial_packetid = %r %s" % (initial_packetid, type(initial_packetid))) original_packet = df_all.loc[df_all.packetid == initial_packetid].iloc[0] # print("original packet = %r %s" % (original_packet, type(original_packet))) # if row.redundant == True and args.failed: # _print_failed_reinjection(original_packet, row, debug=args.debug) _print_reinjection_comparison(original_packet, row, ) parser = MpTcpAnalyzerParser( description="Listing reinjections of the connection" ) parser.add_argument("mptcpstream", type=MpTcpStreamId, help="mptcp.stream id") parser.add_argument("--summary", action="store_true", default=False, help="Just count reinjections") @is_loaded @with_category(CAT_MPTCP) @with_argparser_test(parser) def do_list_reinjections(self, args): """ List reinjections We want to be able to distinguish between good and bad reinjections (like good and bad RTOs). A good reinjection is a reinjection for which either: - the segment arrives first at the receiver - the cumulative DACK arrives at the sender sooner thanks to that reinjection To do that, we need to take into account latencies """ df = self.data df = self.data[df.mptcpstream == args.mptcpstream] if df.empty: self.poutput("No packet with mptcp.stream == %d" % args.mptcpstream) return # known : Set[int] = set() # print(df.columns) # TODO move to outer function ? # TODO use ppaged reinjections = df.dropna(axis=0, subset=["reinjection_of"] ) total_nb_reinjections = 0 output = "" for row in reinjections.itertuples(): # if row.packetid not in known: # ','.join(map(str,row.reinjection_of) output += ("packetid=%d (tcp.stream %d) is a reinjection of %d packet(s): " % (row.packetid, row.tcpstream, len(row.reinjection_of))) # print("reinjOf=", row.reinjection_of) # assuming packetid is the index for pktId in row.reinjection_of: # print("packetId %d" % pktId) # entry = self.data.iloc[ pktId - 1] entry = self.data.loc[ pktId ] # entry = df.loc[ df.packetid == pktId] # print("packetId %r" % entry) output += ("- packet %d (tcp.stream %d)" % (entry.packetid, entry.tcpstream)) # known.update([row.packetid] + row.reinjection) self.ppaged(output) # reinjections = df["reinjection_of"].dropna(axis=0, ) # print("number of reinjections of ") parser = MpTcpAnalyzerParser( description="Loads a pcap to analyze" ) parser.add_argument("input_file", action=LoadSinglePcap, help="Either a pcap or a csv file." "When a pcap is passed, mptcpanalyzer looks for a cached csv" "else it generates a " "csv from the pcap with the external tshark program.") @with_argparser(parser) def do_load_pcap(self, args): """ Load the file as the current one """ print(args) # args = shlex.split(args) # print(args) # parser = self.do_load_pcap.argparser # print(parser) # args = parser.parse_args(args) self.poutput("Loading %s" % args.input_file) self.data = args._dataframes["input_file"] self.prompt = "%s> " % os.path.basename(args.input_file) def do_list_available_plots(self, args): """ Print available plots. Mostly for debug, you should use 'plot'. """ plot_names = self.list_available_plots() print(plot_names) def list_available_plots(self): return self.plot_mgr.names() def pcap_loaded(self): return isinstance(self.data, pd.DataFrame) plot_parser = MpTcpAnalyzerParser(prog='plot', description='Generate plots') # TODO complete the help # plot throughput tcp examples/client_2_redundant.pcapng 0 examples/server_2_redundant.pcapng 0 3" "quit" plot_parser.epilog = ''' You can run for example: plot owd tcp examples/client_2_filtered.pcapng 0 examples/server_2_filtered.pcapng 0 --display ''' @with_argparser_and_unknown_args(plot_parser) def do_plot(self, args, unknown): """ global member used by others do_plot members * Loads required dataframes when necessary """ # Allocate plot object plotter = self.plot_mgr[args.plot_type].obj # TODO reparse with the definitive parser ? # 'converts' the namespace to for the syntax define a dict dargs = vars(args) print("%s" % dargs) dataframes = dargs.pop("_dataframes") # workaround argparse limitations to set as default both directions # TODO replace that with an action ? # destinations=dargs.get("destinations", list(mp.ConnectionRoles)) # dargs.update(destinations=destinations) # log.debug("Selecting destinations %s" % (destinations,)) # dataframes = plotter.preprocess(**dargs) print("%s" % args) # dataframes = args._dataframes.values() assert dataframes is not None, "Preprocess must return a list" # pass unknown_args too ? result = plotter.run(**dataframes, **dargs) # to save to file for instance plotter.postprocess(result, **dargs) @with_category(CAT_GENERAL) def do_clean_cache(self, line): """ mptcpanalyzer saves pcap to csv converted files in a cache folder, (most likely $XDG_CACHE_HOME/mptcpanalyzer). This commands clears the cache. """ cache = mp.get_cache() self.poutput("Cleaning cache [%s]" % cache.folder) cache.clean() def do_dump(self, args): """ Dumps content of the csv file, with columns selected by the user. Mostly used for debug """ parser = argparse.ArgumentParser(description="dumps csv content") parser.add_argument('columns', default=[ "ipsrc", "ipdst"], choices=self.data.columns, nargs="*") parser.add_argument('-n', default=10, action="store", help="Number of results to display") args = parser.parse_args(shlex.split(args)) print(self.data[args.columns]) def complete_dump(self, text, line, begidx, endidx): """ Should return a list of possibilities """ l = [x for x in self.data.columns if x.startswith(text)] return l # not needed in cmd2 def do_quit(self, *args): """ Quit/exit program """ print("Thanks for flying with mptcpanalyzer.") return True def do_EOF(self, line): """ Keep it to be able to exit with CTRL+D """ return True def preloop(self): """ Executed once when cmdloop is called """ histfile = self.config["mptcpanalyzer"]['history'] if readline and os.path.exists(histfile): log.debug("Loading history from %s" % histfile) readline.read_history_file(histfile) def postloop(self): histfile = self.config["mptcpanalyzer"]['history'] if readline: log.debug("Saving history to %s" % histfile) readline.set_history_length(histfile_size) readline.write_history_file(histfile)
def load_into_pandas( input_file: str, config: TsharkConfig, # clock_offset: int = 0, **extra) -> pd.DataFrame: """ load mptcp data into pandas Args: input_file: pcap filename config: Hard, keep changing load_cb: callback to use if cache not available extra: extra arguments to forward to load_cb """ log.debug("Asked to load simple pcap %s" % input_file) filename = getrealpath(input_file) cache = mp.get_cache() tshark_dtypes = { fullname: field.type for fullname, field in config.fields.items() } artifical_dtypes = { name: field.type for name, field in per_pcap_artificial_fields.items() } dtypes = dict(tshark_dtypes, **artifical_dtypes) # TODO add per_pcap_artificial_fields hash pseudohash = hash(config) + hash(frozenset(dtypes.items())) uid = cache.cacheuid( '', # prefix (might want to shorten it a bit) [filename], # dependencies str(pseudohash) + '.csv') is_cache_valid, csv_filename = cache.get(uid) logging.debug("cache validity=%d cachename: %s" % (is_cache_valid, csv_filename)) if not is_cache_valid: logging.info("Cache invalid .. Converting %s " % (filename, )) with tempfile.NamedTemporaryFile(mode='w+', prefix="mptcpanalyzer-", delete=False) as out: tshark_fields = [ field.fullname for _, field in config.fields.items() ] retcode, stderr = config.export_to_csv(filename, out, tshark_fields) log.info("exporter exited with code=%d", retcode) if retcode is 0: out.close() cache.put(uid, out.name) else: raise Exception(stderr) log.debug("Loading a csv file %s" % csv_filename) try: with open(csv_filename) as fd: converters = { f.fullname: f.converter for _, f in config.fields.items() if f.converter } converters.update({ name: f.converter for name, f in per_pcap_artificial_fields.items() if f.converter }) # print("converters\n", converters) dtypes = { field.fullname: field.type for _, field in config.fields.items() } log.debug("Dtypes before load: %s" % dtypes) data = pd.read_csv( fd, comment='#', sep=config.delimiter, dtype=dtypes, # seems like for now we can't change the default representation apart from converting the column to # a string !!! # https://stackoverflow.com/questions/46930201/pandas-to-datetime-is-not-formatting-the-datetime-value-in-the-desired-format # date_parser=_convert_timestamp, # parse_dates=["frame.time_epoch"], converters=converters, # float_precision="high", # might be necessary # nrows=10, # useful for debugging purpose ) # 1 to 1 -> can't add new columns data.rename(inplace=True, columns={ f.fullname: name for name, f in config.fields.items() }) # add new columns data = data.assign( **{name: np.nan for name in per_pcap_artificial_fields.keys()}) column_names = set(data.columns) # print("column_names", column_names) data = data.astype(dtype=artifical_dtypes, copy=False) # we want packetid column to survive merges/dataframe transformation so keepit as a column # TODO remove ? let other functions do it ? data.set_index("packetid", drop=False, inplace=True) log.debug("Column names: %s" % data.columns) hashing_fields = [ name for name, field in config.fields.items() if field.hash ] log.debug("Hashing over fields %s" % hashing_fields) # won't work because it passes a Serie (mutable)_ # TODO generate hashing fields from Fields temp = pd.DataFrame(data, columns=hashing_fields) data["hash"] = temp.apply(lambda x: hash(tuple(x)), axis=1) except Exception as e: logging.error( "You may need to filter more your pcap to keep only mptcp packets") raise e log.info("Finished loading dataframe for %s. Size=%d" % (input_file, len(data))) # print("FINAL_DTYPES") log.debug(data.dtypes) # print(data.head(5)) return data
def load_into_pandas(input_file: str, config: TsharkConfig, **extra) -> pd.DataFrame: """ load mptcp data into pandas Args: input_file: pcap filename config: Hard, keep changing load_cb: callback to use if cache not available extra: extra arguments to forward to load_cb """ log.debug("Asked to load simple pcap %s", input_file) filename = getrealpath(input_file) cache = mp.get_cache() # {fullname: field.type for fullname, field in config.fields.items()} tshark_dtypes = get_dtypes(config.fields) artifical_dtypes = get_dtypes(per_pcap_artificial_fields) dtypes = dict(tshark_dtypes, **artifical_dtypes) # TODO add per_pcap_artificial_fields hash pseudohash = hash(config) + hash(frozenset(dtypes.items())) uid = cache.cacheuid( '', # prefix (might want to shorten it a bit) [filename], # dependencies str(pseudohash) + '.csv') # print(config.fields) is_cache_valid, csv_filename = cache.get(uid) log.debug("cache validity=%d cachename: %s", is_cache_valid, csv_filename) if not is_cache_valid: log.info( "Cache invalid .. Converting %s", filename, ) with tempfile.NamedTemporaryFile(mode='w+', prefix="mptcpanalyzer-", delete=False) as out: # tshark_fields = [field.fullname for _, field in config.fields.items()] tshark_fields = { field.fullname: name for name, field in config.fields.items() } retcode, _, stderr = config.export_to_csv(filename, out, tshark_fields) log.info("exporter exited with code=%d", retcode) if retcode is 0: out.close() cache.put(uid, out.name) else: raise Exception(stderr) log.debug("Loading a csv file %s", csv_filename) try: with open(csv_filename) as fd: # gets a list of fields to convert # we dont want to modify the passed parameter fields = config.fields.copy() fields.update(per_pcap_artificial_fields) converters = get_converters(config.fields) # builds a list of fields to be parsed as dates # (since converter/types don't seem to be great) date_cols = get_date_cols(config.fields) dtypes = get_dtypes(config.fields) log.log(mp.TRACE, "Dtypes before load:\n%s", pp.pformat(dtypes)) log.log(mp.TRACE, "Converters before load:\n%s", pp.pformat(converters)) log.log(mp.TRACE, "Fields to load as times:\n%s", pp.pformat(date_cols)) # keep this commented code to help diagnosing pandas problems # from mptcpanalyzer.debug import read_csv_debug fields = [f.fullname for _, f in config.fields.items()] # fields =[ "tcp.options.mptcp.sendkey" ] # data = mptcpanalyzer.debug.read_csv_debug(fields, data = pd.read_csv( fd, comment='#', sep=config.delimiter, dtype=dtypes, date_parser=date_converter, parse_dates=date_cols, # ideally DON't user converters but pandas bugs... converters=converters, # float_precision="high", # might be necessary ) log.debug("Finished loading CSV file") # 1 to 1 -> can't add new columns data.rename(inplace=True, columns={ f.fullname: name for name, f in config.fields.items() }) # add new columns data = data.assign( **{name: np.nan for name in per_pcap_artificial_fields.keys()}) column_names = set(data.columns) data = data.astype(dtype=artifical_dtypes, copy=False) # we want packetid column to survive merges/dataframe transformation # so keepit as a column # TODO remove ? let other functions do it ? data.set_index("packetid", drop=False, inplace=True) hashing_fields = [ name for name, field in config.fields.items() if field.hash ] log.debug("Hashing over fields %s", hashing_fields) # won't work because it passes a Serie (mutable)_ # TODO generate hashing fields from Fields # TODO reference stack overflow problem temp = pd.DataFrame(data, columns=hashing_fields) data["hash"] = temp.apply(lambda x: hash(tuple(x)), axis=1) except TypeError as e: log.error( "You may need to filter more your pcap to keep only mptcp packets") raise e except Exception as e: log.error( "You may need to filter more your pcap to keep only mptcp packets") raise e log.info("Finished loading dataframe for %s. Size=%d", input_file, len(data)) return data