def plot_mptcp(self, df, fig, fields, pcap_destinations, **kwargs): axes = fig.gca() fields = ["tcpdest", "tcpstream", "mptcpdest"] destinations = pcap_destinations label_fmt = "Stream {tcpstream}" if len(destinations) > 1: label_fmt = label_fmt + " towards {dest}" print("pcap", pcap_destinations) for idx, subdf in df.groupby(_sender(fields), sort=False): tcpdest, tcpstream, mptcpdest = idx if mptcpdest not in destinations: log.debug("Ignoring destination %s", mptcpdest) continue # print("OWD") # with pd.option_context('display.max_rows', None, 'display.max_columns', None): # # more options can be specified also # # print(df) # print(df.owd) # "Subflow %d towards tcp %s" % (tcpstream, tcpdest), # seems to be a bug pplot = subdf.plot( # gca = get current axes (Axes), create one if necessary ax=axes, legend=True, # TODO should depend from x=_sender("abstime"), y="owd", label=label_fmt.format(tcpstream=tcpstream, dest=mp.ConnectionRoles(mptcpdest).to_string()) )
def plot_tcp(self, df, fig, fields, **kwargs): axes = fig.gca() # fields = ["tcpdest", "tcpstream"] # ConnctionRole doesn't support < for idx, subdf in df.groupby(_sender(fields), sort=False): print("t= %r" % (idx, )) print("len= %r" % len(subdf)) tcpdest, tcpstream = idx # if protocol == tcpdest not in kwargs.destinations: # log.debug("skipping TCP dest %s" % tcpdest) # continue # print("tcpdest= %r" % tcpdest) # print("=== less than 0\n", subdf[subdf.owd < 0.050]) # print("=== less than 0\n", subdf.tail()) # if tcpdest # df = debug_convert(df) pplot = subdf.plot.line( # gca = get current axes (Axes), create one if necessary ax=axes, legend=True, # TODO should depend from x=_sender("abstime"), y="owd", label="towards %s" % tcpdest, # seems to be a bug # grid=True, # xticks=tcpstreams["reltime"], # rotation for ticks # rot=45, # lw=3 )
def plot_mptcp(self, df, fig, fields, **kwargs): axes = fig.gca() fields = ["tcpdest", "tcpstream", "mptcpdest"] for idx, subdf in df.groupby(_sender(fields), sort=False): print("t= %r" % (idx, )) print("len= %r" % len(subdf)) tcpdest, tcpstream, mptcpdest = idx # if protocol == tcpdest not in kwargs.destinations: # log.debug("skipping TCP dest %s" % tcpdest) # continue # if tcpdest # df = debug_convert(df) pplot = subdf.plot( # gca = get current axes (Axes), create one if necessary ax=axes, legend=True, # TODO should depend from x=_sender("abstime"), y="owd", label="Subflow %d towards tcp %s" % (tcpstream, tcpdest), # seems to be a bug # grid=True, # xticks=tcpstreams["reltime"], # rotation for ticks # rot=45, # lw=3 )
def plot_tcp(self, df, fig, fields, **kwargs): axes = fig.gca() # fields = ["tcpdest", "tcpstream"] label_fmt = "Stream {tcpstream} towards {tcpdest}" for idx, subdf in df.groupby(_sender(fields), sort=False): # print("t= %r" % (idx,)) print("len= %r" % len(subdf)) tcpdest, tcpstream = idx # print("tcpdest= %r" % tcpdest) # print("=== less than 0\n", subdf[subdf.owd < 0.050]) # print("=== less than 0\n", subdf.tail()) # if tcpdest debug_dataframe(subdf, "subdf stream %d destination %r" % (tcpstream, tcpdest)) pplot = subdf.plot.line( # gca = get current axes (Axes), create one if necessary ax=axes, legend=True, # TODO should depend from x=_sender("abstime"), y="owd", label=label_fmt.format(tcpstream=tcpstream, tcpdest=tcpdest), )
def plot(self, pcap, pcapstream, **kwargs): """ getcallargs """ df = pcap # Need to compute reinjections df.mptcp.fill_dest(pcapstream) df = classify_reinjections(df) fig = plt.figure() # log.info("%d streams in the MPTCP flow" % len(tcpstreams)) log.info("Plotting reinjections ") axes = fig.gca() fields = ["tcpstream", "mptcpdest"] fig.suptitle( "Reinjections CDF ", verticalalignment="top", ) # il n'a pas encore eu les destinations !! debug_dataframe(df, "DATASET HEAD") for idx, subdf in df.groupby(_sender(fields), sort=False): log.info("len(df)= %d" % len(df)) # TODO check destination # TODO skip if no reinjection debug_dataframe(subdf, "DATASET HEAD") # for idx, (streamid, ds) in enumerate(tcpstreams): # subdf[_sender("reinj_delta")].plot.line( # x="abstime", # ax=axes, # # use_index=False, # legend=False, # grid=True, # ) subdf[_sender("reinj_delta")].hist(cumulative=True, density=1, bins=100) axes.set_xlabel("Time (s)") axes.set_ylabel("Reinjection delay") handles, labels = axes.get_legend_handles_labels() # Generate "subflow X" labels # location: 3 => bottom left, 4 => bottom right axes.legend(handles, ["Subflow %d" % (x) for x, _ in enumerate(labels)], loc=4) return fig
def plot(self, pcap, protocol, **kwargs): """ Ideally it should be mapped automatically For now plots only one direction but there could be a wrapper to plot forward owd, then backward OWDs Disclaimer: Keep in mind this assumes a perfect synchronization between nodes, i.e., it relies on the pcap absolute time field. While this is true in discrete time simulators such as ns3 """ fig = plt.figure() axes = fig.gca() res = pcap res[_sender("abstime")] = pd.to_datetime(res[_sender("abstime")], unit="s") # TODO here we should rewrite debug_fields = _sender(TCP_DEBUG_FIELDS) + _receiver( TCP_DEBUG_FIELDS) + ["owd"] print("columns", pcap) print("columns", res.columns) print("info", res.info()) print(res.loc[res._merge == "both", debug_fields]) df = res print("STARTING LOOP") print("DESTINATION=%r" % kwargs.get("pcapdestinations", [])) # df= df[df.owd > 0.010] fields = [ "tcpdest", "tcpstream", ] # if True: if protocol == "mptcp": self.plot_mptcp(df, fig, fields, **kwargs) else: self.plot_tcp(df, fig, fields, **kwargs) # TODO add units axes.set_xlabel("Time (s)") axes.set_ylabel("One Way Delay (s)") self.title = "One Way Delays for {} streams {} <-> {} {dest}".format( protocol, kwargs.get("pcap1stream"), kwargs.get("pcap2stream"), dest="") return fig
def _print_reinjection_comparison(original_packet, reinj, ): """ Expects tuples of original and reinjection packets """ # original_packet = sender_df.loc[ sender_df.packetid == initial_packetid, ].iloc[0] row = reinj reinjection_packetid = getattr(row, _sender("packetid")), reinjection_start = getattr(row, _sender("abstime")), reinjection_arrival = getattr(row, _receiver("abstime")), original_start = original_packet[_sender("abstime")], original_arrival = original_packet[_receiver("abstime")] if reinj.redundant == False: # print(original_packet["packetid"]) msg = ("packet {pktid} is a successful reinjection of {initial_packetid}." " It arrived at {reinjection_arrival} to compare with {original_arrival}" " while being transmitted at {reinjection_start} to compare with " "{original_start}, i.e., {reinj_delta} before") # TODO use assert instead if getattr(row, _receiver("abstime")) > original_packet[ _receiver("abstime") ]: print("BUG: this is not a valid reinjection after all ?") elif args.failed: # only de msg = "packet {pktid} is a failed reinjection of {initial_packetid}." else: return msg = msg.format( pktid = reinjection_packetid, initial_packetid = initial_packetid, reinjection_start = reinjection_start, reinjection_arrival = reinjection_arrival, original_start = original_start, original_arrival = original_arrival, reinj_delta = reinj.reinj_delta, ) self.poutput(msg)
def plot(self, pcap, protocol, **kwargs): """ Ideally it should be mapped automatically For now plots only one direction but there could be a wrapper to plot forward owd, then backward OWDs Disclaimer: Keep in mind this assumes a perfect synchronization between nodes, i.e., it relies on the pcap absolute time field. While this is true in discrete time simulators such as ns3 """ fig = plt.figure() axes = fig.gca() res = pcap destinations = kwargs.get("pcap_destinations") # should already be done # res[_sender("abstime")] = pd.to_datetime(res[_sender("abstime")], unit="s") # TODO here we should rewrite debug_fields = _sender(TCP_DEBUG_FIELDS) + _receiver(TCP_DEBUG_FIELDS) + ["owd"] # print("columns", pcap) debug_dataframe(res, "owd dataframe") # print(res.loc[res.merge_status == "both", debug_fields]) df = res # print("DESTINATION=%r" % destinations) # df= df[df.owd > 0.010] fields = ["tcpdest", "tcpstream", ] # if True: # TODO: use Protocol.MPTCP: if protocol == "mptcp": self.plot_mptcp(df, fig, fields, **kwargs) elif protocol == "tcp": self.plot_tcp(df, fig, fields, **kwargs) else: raise Exception("Unsupported protocol %r" % protocol) self.title_fmt = "One Way Delays for {protocol}" if len(destinations) == 1: self.title_fmt = self.title_fmt + " towards {dest}" self.title_fmt = self.title_fmt.format( protocol=protocol, # kwargs.get("pcap1stream"), # kwargs.get("pcap2stream"), dest=destinations[0].to_string() ) return fig
def plot(self, df, pcapstream, field, **kwargs): """ getcallargs """ fig = plt.figure() # tcpstreams = dat.groupby('tcpstream') # log.info("%d streams in the MPTCP flow" % len(tcpstreams)) log.info("Plotting field %s" % field) log.info("len(df)= %d" % len(df)) axes = fig.gca() fields = ["tcpstream", "mptcpdest"] fig.suptitle( "Plot of subflow %s" % field, verticalalignment="top", # x=0.1, y=.95, ) # il n'a pas encore eu les destinations !! print("DATASET HEAD") print(df.head()) for idx, subdf in df.groupby(_sender(fields), sort=False): log.info("len(df)= %d" % len(df)) # TODO check destination # for idx, (streamid, ds) in enumerate(tcpstreams): subdf[field].plot.line( x="abstime", ax=axes, # use_index=False, legend=False, grid=True, ) axes.set_xlabel("Time (s)") axes.set_ylabel(self._attributes[field]) handles, labels = axes.get_legend_handles_labels() # Generate "subflow X" labels # location: 3 => bottom left, 4 => bottom right axes.legend( handles, ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)], loc=4) return fig
def mptcp_compute_throughput( rawdf, mptcpstreamid, destination: ConnectionRoles # mptcpstreamid2=None ) -> Tuple[bool, Any]: """ Very raw computation: substract highest dsn from lowest by the elapsed time Returns: a tuple (True/false, dict) """ df = rawdf[rawdf.mptcpstream == mptcpstreamid] if df.empty: return False, "No packet with mptcp.stream == %d" % mptcpstreamid con = MpTcpConnection.build_from_dataframe(df, mptcpstreamid) q = con.generate_direction_query(destination) df = unidirectional_df = df.query(q) dsn_min = df.dss_dsn.min() dsn_max = df.dss_dsn.max() total_transferred = dsn_max - dsn_min d = df.groupby(_sender('tcpstream')) subflow_stats: List[Any] = [] for tcpstream, group in d: # TODO drop retransmitted subflow_load = group.drop_duplicates(subset="dss_dsn").dss_length.sum() subflow_load = subflow_load if not math.isnan(subflow_load) else 0 subflow_stats.append({ 'tcpstreamid': tcpstream, 'throughput_bytes': int(subflow_load) }) return True, { 'mptcpstreamid': mptcpstreamid, # TODO append bytes 'mptcp_goodput_bytes': total_transferred, 'mptcp_throughput_bytes': sum(map(lambda x: x['throughput_bytes'], subflow_stats)), 'subflow_stats': subflow_stats, }
def map_mptcp_connection(rawdf2: pd.DataFrame, main: MpTcpConnection) -> List[MpTcpMapping]: """ warn: Do not trust the results yet WIP ! Returns: List of (connection, score) with the best mapping first This function tries to map a mptcp.stream from a dataframe (aka pcap) to mptcp.stream in another dataframe. For now it just looks at IP level stuff without considering subflow mapping score """ log.warning("mapping between datasets is not considered trustable yet") results: List[MpTcpMapping] = [] for mptcpstream2 in rawdf2[_sender("mptcpstream")].dropna().unique(): other = MpTcpConnection.build_from_dataframe(rawdf2, mptcpstream2) mapping = map_mptcp_connection_from_known_streams(main, other) results.append(mapping) results.sort(key=lambda x: x.score, reverse=True) return results
def map_mptcp_connection(rawdf2: pd.DataFrame, main: MpTcpConnection) -> List[MpTcpMapping]: """ warn: Do not trust the results yet WIP ! Returns: List of (connection, score) with the best mapping first This function tries to map a mptcp.stream from a dataframe (aka pcap) to mptcp.stream in another dataframe. For now it just looks at IP level stuff without considering subflow mapping score """ log.warning("mapping between datasets is not considered trustable yet") results = [] # type: List[MpTcpMapping] # mappings = {} # type: Dict[int,Tuple[Any, float]] score = -1 # type: float results = [] # print("%r" % main) # print(rawdf2["mptcpstream"].unique().dropna()) for mptcpstream2 in rawdf2[_sender("mptcpstream")].dropna().unique(): other = MpTcpConnection.build_from_dataframe(rawdf2, mptcpstream2) mapping = map_mptcp_connection_from_known_streams(main, other) # score = main.score(other) # if score > float('-inf'): # # (other, score) # mapped_subflows = _map_subflows(main, other) # mapping = MpTcpMapping(mapped=other, score=score, subflow_mappings=mapped_subflows) results.append(mapping) # sort based on the score results.sort(key=lambda x: x[1], reverse=True) return results
def do_qualify_reinjections(self, args, unknown): """ test with: mp qualify_reinjections 0 TODO move the code into a proper function """ # TODO this should be done automatically right ? df_all = load_merged_streams_into_pandas( args.pcap1, args.pcap2, args.pcap1stream, args.pcap2stream, mptcp=True, tshark_config=self.tshark_config ) # adds a redundant column df = classify_reinjections(df_all) # print(df_all[ pd.notnull(df_all[_sender("reinjection_of")])] [ # _sender(["reinjection_of", "reinjected_in", "packetid", "reltime"]) + # _receiver(["packetid", "reltime"]) # ]) # to help debug # df.to_excel("temp.xls") def _print_reinjection_comparison(original_packet, reinj, ): """ Expects tuples of original and reinjection packets """ # original_packet = sender_df.loc[ sender_df.packetid == initial_packetid, ].iloc[0] row = reinj reinjection_packetid = getattr(row, _sender("packetid")), reinjection_start = getattr(row, _sender("abstime")), reinjection_arrival = getattr(row, _receiver("abstime")), original_start = original_packet[_sender("abstime")], original_arrival = original_packet[_receiver("abstime")] if reinj.redundant == False: # print(original_packet["packetid"]) msg = ("packet {pktid} is a successful reinjection of {initial_packetid}." " It arrived at {reinjection_arrival} to compare with {original_arrival}" " while being transmitted at {reinjection_start} to compare with " "{original_start}, i.e., {reinj_delta} before") # TODO use assert instead if getattr(row, _receiver("abstime")) > original_packet[ _receiver("abstime") ]: print("BUG: this is not a valid reinjection after all ?") elif args.failed: # only de msg = "packet {pktid} is a failed reinjection of {initial_packetid}." else: return msg = msg.format( pktid = reinjection_packetid, initial_packetid = initial_packetid, reinjection_start = reinjection_start, reinjection_arrival = reinjection_arrival, original_start = original_start, original_arrival = original_arrival, reinj_delta = reinj.reinj_delta, ) self.poutput(msg) # with pd.option_context('display.max_rows', None, 'display.max_columns', 300): # print(reinjected_packets[["packetid", "packetid_receiver", *_receiver(["reinjected_in", "reinjection_of"])]].head()) # TODO filter depending on --failed and --destinations if args.csv: self.pfeedback("Exporting to csv") # keep redundant # only export a subset ? # for # df1 = df[['a','d']] # smalldf = df.drop() columns = _sender(["abstime", "reinjection_of", "reinjected_in", "packetid", "tcpstream", "mptcpstream", "tcpdest", "mptcpdest"]) columns += _receiver(["abstime", "packetid"]) columns += ["redundant", "owd", "reinj_delta"] df[columns].to_csv( self.stdout, sep="|", index=False, header=True, ) return for destination in ConnectionRoles: if args.destinations and destination not in args.destinations: log.debug("ignoring destination %s " % destination) continue self.poutput("looking for reinjections towards mptcp %s" % destination) sender_df = df[df.mptcpdest == destination] log.debug("%d reinjections in that direction" % (len(sender_df), )) # TODO we now need to display successful reinjections reinjections = sender_df[pd.notnull(sender_df[_sender("reinjection_of")])] successful_reinjections = reinjections[reinjections.redundant == False] self.poutput("%d successful reinjections" % len(successful_reinjections)) # print(successful_reinjections[ _sender(["packetid", "reinjection_of"]) + _receiver(["packetid"]) ]) for row in reinjections.itertuples(index=False): # loc ? this is an array, sort it and take the first one ? initial_packetid = row.reinjection_of[0] # print("initial_packetid = %r %s" % (initial_packetid, type(initial_packetid))) original_packet = df_all.loc[df_all.packetid == initial_packetid].iloc[0] # print("original packet = %r %s" % (original_packet, type(original_packet))) # if row.redundant == True and args.failed: # _print_failed_reinjection(original_packet, row, debug=args.debug) _print_reinjection_comparison(original_packet, row, )
def plot(self, df, tcpstream, fields, destinations, **kwargs): """ getcallargs """ fig = plt.figure() # tcpstreams = dat.groupby('tcpstream') # print("%d streams in the MPTCP flow" % len(tcpstream)) log.debug("Plotting field(s) %s" % fields) axes = fig.gca() # for idx, (streamid, ds) in enumerate(tcpstreams): tcpdf = df # [df.tcpstream == tcpstream] # if dropsyn # tcpdf[field].iloc[3:] labels = [] # type: List[str] # TODO le .iloc permet d'eliminer les syn/ack # print("DTYPES") # print(tcpdf.dtypes) for dest, ddf in tcpdf.groupby(_sender("tcpdest")): # print("dest %r in %r" %( dest , destinations)) # TODO remove ? if dest in destinations: for field in fields: # print("dest", dest, " in " , destinations) ddf[field].plot.line( x=_sender("abstime"), ax=axes, # use_index=False, legend=False, grid=True, ) labels.append("%s towards %s" % (self._attributes[field], dest)) axes.set_xlabel("Time (s)") if len(fields) == 1: y_label = self._attributes[fields[0]] else: y_label = "/".join(fields) axes.set_ylabel(y_label) handles, _labels = axes.get_legend_handles_labels() # TODO generate correct labels ? # print(tcpdf[field].iloc[3:]) # Generate "subflow X" labels # location: 3 => bottom left, 4 => bottom right axes.legend( handles, labels # ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)], # loc=4 ) fig.suptitle(" %s " % y_label) return fig
def plot(self, dat, destinations, protocol, **kwargs): """ getcallargs """ fig = plt.figure() axes = fig.gca() mptcp_plot = (protocol == "mptcp") # success, ret = mptcp_compute_throughput(dat, mptcpstream, destination) # if success is not True: # print("Failure: %s", ret) # return # data = map(lambda x: x['bytes'], ret['subflow_stats']) # s = pd.DataFrame(data=pd.Series(data)) # print (s) # gca = get current axes (Axes), create one if necessary axes = fig.gca() title = "TCP throughput/goodput" fields = [ "tcpdest", "tcpstream", ] if mptcp_plot: fields.append("mptcpdest") title = "MPTCP throughput/goodput" for idx, subdf in dat.groupby(_sender(fields), sort=False): # filler in case stream, tcpdest, mptcpdest, _catchall = (*idx, "filler1", "filler2" ) # type: ignore filtereddest = mptcpdest if mptcp_plot else tcpdest if filtereddest not in kwargs.get("destinations"): continue tput_df = compute_goodput(subdf, kwargs.get("window")) tput_df.plot.line( ax=axes, legend=True, # TODO should depend from x=_sender("dt_abstime"), y="tput", # y="gput", label="Xput towards %s" % filtereddest, # seems to be a bug ) # TODO plot on one y the throughput; on the other the goodput axes.set_xlabel("Time (s)") axes.set_ylabel("contribution") fig.suptitle(title) # handles, labels = axes.get_legend_handles_labels() # # Generate "subflow X" labels # # location: 3 => bottom left, 4 => bottom right # axes.legend( # handles, # ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)], # loc=4 # ) return fig
def compute_goodput(df, averaging_window): """ wireshark example can be found in: ui/qt/tcp_stream_dialog.cpp: void TCPStreamDialog::fillThroughput() // Throughput Graph - rate of sent bytes // Goodput Graph - rate of ACKed bytes todo should make it work with dack/ack problem is we don't support sack :'( Adds following columns to the dataframe: - tput - gput - dt_abstime: abstime but in datetime format so that one can apply "rolling" features """ # df.rolling(on="bytes") # we can use mptcp.ack # we can use tcp.ack that are relative # rolling window can use offset # assert (field == "tcpack" or field "dack") df[_sender("dt_abstime")] = pd.to_datetime(df[_sender("abstime")], unit="s") print(df["dt_abstime"]) import re string1 = averaging_window # TODO I should retreive the unit afterwards averaging_window_int = int(re.search(r'\d+', string1).group()) # TODO use it as index to use the rolling ? # win_type= # rolling def _compute_tput(x, ): """ Not an exact one, does not account for TCP sack for instance """ print("compute_tput called !!") # print("%r" % x ) # so now it gets a series return (x.max() - x.min()) / averaging_window_int # TODO test newdf = df.set_index("dt_abstime", drop=False) print(newdf[["abstime", "tcpack"]]) newdf["tput"] = newdf["tcpack"].rolling( # 3, averaging_window, # on="tcpack", # closed="right", # center=True # ).mean() ).apply( _compute_tput, raw=False, ) # args=(), kwargs={} print("AFTER rolling ") print(newdf[["abstime", "tcpack", "tput"]].head(5)) return newdf
def plot(self, pcap, pcapstream, fields, pcap_destinations, **kwargs): """ getcallargs """ log.debug("Plotting field(s) %s", fields) fig = plt.figure() axes = fig.gca() tcpdf = pcap # should be done when filtering the stream tcpdf.tcp.fill_dest(pcapstream) labels = [] # type: List[str] print(pcap) print(tcpdf) for dest, ddf in tcpdf.groupby(_sender("tcpdest")): if dest not in pcap_destinations: log.debug("Ignoring destination %s", dest) log.debug("Plotting destination %s", dest) for field in fields: # print("dest", dest, " in " , destinations) final = ddf.drop_duplicates(subset=field) print("dataframe to plot") print(final) # log.debug("Plotting field %s" % field) # print("len len(ddf[field])=%d" % len(ddf[field])) if len(final) <= 0: log.info("No datapoint to plot") continue # drop duplicate ? # the astype is a workaround pandas failure debug_dataframe(final, "tcp_attr") final.plot( x="abstime", y=field, ax=axes, use_index=False, legend=False, grid=True, ) label_fmt = "{field} towards {dest}" labels.append( label_fmt.format(field=self._attributes[field], dest=str(dest))) self.x_label = "Time (s)" if len(fields) == 1: y_label = self._attributes[fields[0]] else: y_label = "/".join(fields) self.y_label = y_label handles, _labels = axes.get_legend_handles_labels() # TODO generate correct labels ? # print(tcpdf[field].iloc[3:]) # Generate "subflow X" labels # location: 3 => bottom left, 4 => bottom right axes.legend( handles, labels # ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)], # loc=4 ) # TODO fix dest self.title_fmt = " %s " % y_label return fig
def classify_reinjections(df_all: pd.DataFrame) -> pd.DataFrame: """ look at reinjections on the receiver side, see which one is first packets with reinjected_in_receiver are (at least they should) be the first DSN arrived. Returns: a new dataframe with an added column "redundant" and "time_delta" """ log.info("Classifying reinjections") if df_all.merged.already_classified(): log.debug("Already classified, aborting") return df_all df_all = df_all.assign(redundant=False, reinj_delta=np.nan) df = df_all[df_all.merge_status == "both"] # print(df_all[ pd.notnull(df_all[_sender("reinjection_of")])] [ # _sender(["reinjection_of", "reinjected_in", "packetid", "reltime"]) + # _receiver(["packetid", "reltime"]) # ]) for destination in ConnectionRoles: log.debug("Looking at mptcp destination %r", destination) sender_df = df[df.mptcpdest == destination] # print(sender_df[ sender_df.reinjected_in.notna() ][["packetid", "reinjected_in"]]) # print("successful reinjections" % len(reinjected_in)) # select only packets that have been reinjected # debug_dataframe(sender_df, "reinjections", usecols=["reinjection_of"]) reinjected_packets = sender_df.dropna( axis='index', subset=[_sender("reinjection_of")]) log.debug("%d reinjected packets", len(reinjected_packets)) # with pd.option_context('display.max_rows', None, 'display.max_columns', 300): # print(reinjected_packets[ # _sender(["packetid", "reinjected_in", "reinjection_of"]) # + _receiver(["reinjected_in", "reinjection_of"]) # ].head()) for reinjection in reinjected_packets.itertuples(): # here we look at all the reinjected packets # print("full reinjection %r" % (reinjection,)) # if there are packets in _receiver(reinjected_in), it means the reinjections # arrived before other similar segments and thus these segments are useless # it should work because # useless_reinjections = getattr(reinjection, _receiver("reinjected_in"), []) # if it was correctly mapped if reinjection.merge_status != "both": log.log(mp.TRACE, "reinjection %d could not be mapped, giving up...", reinjection.packetid) continue # print("%r" % reinjection.reinjection_of) initial_packetid = reinjection.reinjection_of[0] # print("initial_packetid = %r %s" % (initial_packetid, type(initial_packetid))) original_packet = df_all.loc[df_all.packetid == initial_packetid].iloc[0] if original_packet.merge_status != "both": # TODO count missed classifications ? log.log( mp.TRACE, "Original packet %d could not be mapped, giving up...", original_packet.packetid) continue orig_arrival = getattr(original_packet, _receiver("reltime")) reinj_arrival = getattr(reinjection, _receiver("reltime")) reinj_pktid = getattr(reinjection, _sender("packetid")) reinj_delta = orig_arrival - reinj_arrival df_all.loc[reinj_pktid, "reinj_delta"] = reinj_delta if reinj_delta < pd.Timedelta(0): # print("GOT A failed reinjection") df_all.loc[df_all[_sender("packetid")] == reinjection.packetid, "redundant"] = True #TODO set reinj_delta for reinjection.packetid else: # print("GOT a successful reinjection") pass return df_all
def load_merged_streams_into_pandas( pcap1: str, pcap2: str, streamid1: int, streamid2: int, # TODO changed to protocol mptcp: bool, tshark_config: TsharkConfig, clock_offset1: int = 0, clock_offset2: int = 0, mapping_mode: PacketMappingMode = PacketMappingMode.HASH, **extra): """ Arguments: protocol: mptcp or tcp mapping_mode: Only HASH works for now clock_offset: untested Returns a dataframe with columns... owd ? """ protocolStr = "mptcp" if mptcp else "tcp" log.debug(f"Asked to load {protocolStr} merged streams {streamid1} and " "{streamid2} from pcaps {pcap1} and {pcap2}") cache = mp.get_cache() cacheid = cache.cacheuid( "merged", [getrealpath(pcap1), getrealpath(pcap2)], protocolStr + "_" + str(streamid1) + "_" + str(streamid2) + ".csv") # if we can't load that file from cache try: merged_df = pd.DataFrame() res = pd.DataFrame() valid, cachename = cache.get(cacheid) log.info("Cache validity=%s and cachename=%s" % (valid, cachename)) # TODO disable when clock_offset is set if not valid: df1 = load_into_pandas(pcap1, tshark_config, clock_offset=clock_offset1) df2 = load_into_pandas(pcap2, tshark_config, clock_offset=clock_offset2) main_connection = None # type: Union[MpTcpConnection, TcpConnection] other_connection = None # type: Union[MpTcpConnection, TcpConnection] if mptcp: main_connection = MpTcpConnection.build_from_dataframe( df1, MpTcpStreamId(streamid1)) other_connection = MpTcpConnection.build_from_dataframe( df2, MpTcpStreamId(streamid2)) # for now we use known streams exclusively # might be interested to use merge_tcp_dataframes later merged_df = merge_mptcp_dataframes_known_streams( (df1, main_connection), (df2, other_connection)) else: main_connection = TcpConnection.build_from_dataframe( df1, TcpStreamId(streamid1)) other_connection = TcpConnection.build_from_dataframe( df2, TcpStreamId(streamid2)) # for now we use known streams exclusively # might be interested to use merge_tcp_dataframes later merged_df = merge_tcp_dataframes_known_streams( (df1, main_connection), (df2, other_connection)) assert cachename log.info("Saving into %s" % cachename) # trying to export lists correctly # print(merged_df.reinjected_in.dropna().head()) # convert arrays back to strings # merged_df.apply(",".join() # or abstime ? # TODO rechange the flags hex() merged_df.to_csv( cachename, # columns=columns, index=False, header=True, sep=tshark_config.delimiter, ) # tcpdest had become an objected instead of a CategoricalDtype # see https://github.com/pandas-dev/pandas/issues/22361 log.log(mp.TRACE, "saving with dtypes=", dict(merged_df.dtypes)) else: log.info("Loading from cache %s", cachename) date_cols = get_date_cols(tshark_config.fields) with open(cachename) as fd: # generate fieldlist def _gen_fields(fields): gfields = {} # type: ignore for _name in [_first, _second]: gfields.update( {_name(k): v for k, v in fields.items()}) return gfields # reltime discarded on save ? tshark_config.fields.pop("reltime") gfields = _gen_fields(tshark_config.fields) merge_dtypes = get_dtypes(gfields) # log.log(mp.TRACE, "Using gfields %s" % pp.pformat(gfields)) # we don't need any converters converters = {} date_cols = get_date_cols(gfields) log.log(mp.TRACE, "Using date_cols %s" % pp.pformat(date_cols)) log.log(mp.TRACE, "Using dtypes %s" % pp.pformat(merge_dtypes)) # log.log(mp.TRACE, "Using converters %s" % (pp.pformat(converters))) merged_df = pd.read_csv( fd, skip_blank_lines=True, comment='#', # we don't need 'header' when metadata is with comment sep=tshark_config.delimiter, # memory_map=True, # could speed up processing dtype=merge_dtypes, # poping still generates converters=converters, # date_parser=date_converter, parse_dates=date_cols, ) # at this stage, destinatiosn are nan debug_fields = ["abstime", "tcpstream", "tcpdest", "mptcpdest"] mptcpanalyzer.debug.debug_dataframe( merged_df, "Merged dataframe", usecols=(_first(debug_fields) + _second(debug_fields))) # workaround bug https://github.com/pandas-dev/pandas/issues/25448 def _convert_to_enums(): # per_pcap_artificial_fields for col in [ _first("tcpdest"), _first("mptcpdest"), _second("tcpdest"), _second("mptcpdest") ]: merged_df[col] = merged_df[col].apply( _convert_role, convert_dtype=False) # we fix the clocks a posteriori so that the cache is still usable log.debug("Postprocessing clock if needed") # merged_df[_first('abstime')] += clock_offset1 # merged_df[_second('abstime')] += clock_offset2 log.debug("Converting dataframes to be sender/receiver based...") # in both cases # TODO here we should attribute the definite mptcprole if mptcp: log.error( "We should correct the clocks if the argument is passed !") # raise mp.MpTcpException("Implement mptcp merge") res = convert_to_sender_receiver(merged_df) # fill MPTCP dest ? else: # tcp res = convert_to_sender_receiver(merged_df) # log.debug("Sorting by sender abstime") # merged_df.sort_values(by=_sender("abstime"), ascending=True, inplace=True) # debug_dataframe(res, "checking merge", usecols=["merge_status"]) # print("%d nan values" % len(res[res.merge_status == np.nan])) log.debug("Computing owds") debug_dataframe(res, "before owds") # TODO we don't necessarely need to generate the OWDs here, might be put out res['owd'] = res[_receiver('abstime')] - res[_sender('abstime')] debug_dataframe( res, "owd", usecols=["owd", _sender('abstime'), _receiver('abstime')]) # with pd.option_context('float_format', '{:f}'.format): # print( # res[_sender(["ipsrc", "ipdst", "abstime"]) # + _receiver(["abstime", "packetid"]) + TCP_DEBUG_FIELDS + ["owd"] ] # ) except Exception as e: log.exception("exception happened while merging") # pd.set_option('display.max_rows', 200) # pd.set_option('display.max_colwidth', -1) # print("dtypes=", dict(dtypes)) log.log(mp.TRACE, "Dtypes after load:%s\n", pp.pformat(res.dtypes)) log.info("Finished loading. merged dataframe size: %d", len(res)) return res
def convert_to_sender_receiver(df) -> pd.DataFrame: """ Convert dataframe from X_HOST1 | X_HOST2 to X_SENDER | X_RECEIVER each packet has a destination marker Assume clocks are fine here ! """ log.debug("Converting from host_1/host_2 to sender/receiver format") # fill up afterwards total = pd.DataFrame() for tcpstream, subdf in df.groupby(_first("tcpstream")): min_h1 = subdf.iloc[0, subdf.columns.get_loc(_first('abstime'))] min_h2 = subdf.iloc[0, subdf.columns.get_loc(_second('abstime'))] # def _rename_columns(h1_role: ConnectionRoles): # """ # client_suffix, server_suffix # Params: # client_suffix must be one of HOST1_SUFFIX or HOST2_SUFFIX # server_suffix can be deduced # """ def _rename_column(col_name, suffixes) -> str: for suffix_to_replace, new_suffix in suffixes.items(): if col_name.endswith(suffix_to_replace): return col_name.replace(suffix_to_replace, new_suffix) return col_name # total = pd.concat([total, subdf], ignore_index=True) log.debug(f"Comparing {min_h1} (h1) with {min_h2} (h2)") assert min_h1 != min_h2, ( f"Same sending {min_h1} and receiving time {min_h2}." "Either the clock is not precise enough or it's a bug" " (more likely)") if min_h1 < min_h2: log.debug("Looks like h1 is the tcp client") # suffixes = { HOST1_SUFFIX: SENDER_SUFFIX, HOST2_SUFFIX: RECEIVER_SUFFIX } h1_role = ConnectionRoles.Client else: if min_h1 == min_h2: log.warn("there is an issue") log.debug("Looks like h2 is the tcp client") h1_role = (ConnectionRoles.Server) # _rename_columns(role) for tcpdest, tdf in subdf.groupby(_first("tcpdest"), sort=False): if tcpdest == h1_role: suffixes = { HOST2_SUFFIX: SENDER_SUFFIX, HOST1_SUFFIX: RECEIVER_SUFFIX } else: suffixes = { HOST1_SUFFIX: SENDER_SUFFIX, HOST2_SUFFIX: RECEIVER_SUFFIX } log.debug("suffixes: %s" % suffixes) rename_func = functools.partial(_rename_column, suffixes=suffixes) log.log(mp.TRACE, "renaming inplace") rename_func = functools.partial(_rename_column, suffixes=suffixes) log.debug("total df size = %d" % len(total)) with pd.option_context('precision', 20): debug_cols = _first(["abstime", "tcpdest"]) + _second( ["abstime", "tcpdest"]) log.log(mp.TRACE, "before rename \n%s", tdf[debug_cols]) tdf = tdf.rename(columns=rename_func, copy=True, inplace=False) debug_cols = _sender(["abstime", "tcpdest"]) + _receiver( ["abstime", "tcpdest"]) log.log(mp.TRACE, "After rename \n%s" % tdf[debug_cols]) # print(tdf[debug_cols]) # debug_dataframe(tdf, "temporary dataframe") total = pd.concat( [total, tdf], ignore_index=True, sort=False, ) # print("total df size = %d" % len(total)) # subdf[ _first("tcpdest") == ConnectionRole.Client] .rename(columns=_rename_cols, inplace=True) # print(subdf.columns) # print(total.columns) # debug_dataframe(total, "total") log.debug("Converted to sender/receiver format") log.log(mp.TRACE, "Comparing #unique entries %d vs #all %d", total[_sender("abstime")].count(), len(total[_sender("abstime")])) # assert total[_sender("abstime")].count() == len(total[_sender("abstime")]) return total
def plot(self, pcap, pcapstream, **kwargs): """ getcallargs """ fig = plt.figure() df = pcap window = kwargs.get("window") destinations = kwargs.get("pcap_destinations") print("Destinations", destinations) con = df.tcp.connection(pcapstream) df = con.fill_dest(df) debug_dataframe(df, "plotting TCP throughput") # la il faudrait resampler pd_abstime = pd.to_datetime( df[_sender("abstime")], unit="s", errors='raise', ) df.set_index(pd_abstime, inplace=True) df.sort_index(inplace=True) # TODO at some point here, we lose the dest type :'( for dest, subdf in df.groupby("tcpdest"): if dest not in destinations: log.debug("Ignoring destination %s", dest) continue log.debug("Plotting destination %s", dest) label_fmt = "TCP stream {stream}" if len(destinations) >= 2: label_fmt = label_fmt + " towards {dest}" plot_tput( fig, subdf["tcplen"], # subdf["tcpack"], # subdf["abstime"], subdf.index, window, label=label_fmt.format( stream=pcapstream, dest=mp.ConnectionRoles(dest).to_string())) self.y_label = "Throughput (bytes/second)" # TODO fix connection towards a direction ? self.title_fmt = "TCP Throughput (Averaging window of {window}) for:\n{con:c<->s}".format( window=window, con=con) # self.title = "TCP Throughput (Average window of %s)" % window # handles, labels = axes.get_legend_handles_labels() # # Generate "subflow X" labels # # location: 3 => bottom left, 4 => bottom right # axes.legend( # handles, # ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)], # loc=4 # ) return fig
def classify_reinjections(df_all: pd.DataFrame) -> pd.DataFrame: """ here the idea is to look at reinjections on the receiver side, see which one is first packets with reinjected_in_receiver are (at least they should) be the first DSN arrived. Returns a new dataframe with an added column "redundant" """ df_all["redundant"] = False df_all["reinj_delta"] = np.nan # rename to df_both ? df = df_all[df_all._merge == "both"] # print(df_all[ pd.notnull(df_all[_sender("reinjection_of")])] [ # _sender(["reinjection_of", "reinjected_in", "packetid", "reltime"]) + # _receiver(["packetid", "reltime"]) # ]) for destination in ConnectionRoles: sender_df = df[df.mptcpdest == destination] # print(sender_df[ sender_df.reinjected_in.notna() ][["packetid", "reinjected_in"]]) # print("successful reinjections" % len(reinjected_in)) # select only packets that have been reinjected # print("%d sender_df packets" % len(sender_df)) # print(sender_df["reinjection_of"]) reinjected_packets = sender_df.dropna( axis='index', subset=[_sender("reinjection_of")]) logging.debug("%d reinjected packets" % len(reinjected_packets)) # with pd.option_context('display.max_rows', None, 'display.max_columns', 300): # print(reinjected_packets[ # _sender(["packetid", "reinjected_in", "reinjection_of"]) + _receiver(["reinjected_in", "reinjection_of"]) # ].head()) for reinjection in reinjected_packets.itertuples(): # here we look at all the reinjected packets # print("full reinjection %r" % (reinjection,)) # if there are packets in _receiver(reinjected_in), it means the reinjections # arrived before other similar segments and thus these segments are useless # it should work because # useless_reinjections = getattr(reinjection, _receiver("reinjected_in"), []) # if it was correctly mapped # TODO why reinjection._merge doesn't exist ? if reinjection._1 != "both": # TODO count missed classifications ? log.debug("reinjection %d could not be mapped, giving up..." % (reinjection.packetid)) continue # print("%r" % reinjection.reinjection_of) initial_packetid = reinjection.reinjection_of[0] # print("initial_packetid = %r %s" % (initial_packetid, type(initial_packetid))) original_packet = df_all.loc[df_all.packetid == initial_packetid].iloc[0] if original_packet._merge != "both": # TODO count missed classifications ? logging.debug( "Original packet %d could not be mapped, giving up..." % (original_packet.packetid)) continue orig_arrival = getattr(original_packet, _receiver("reltime")) reinj_arrival = getattr(reinjection, _receiver("reltime")) reinj_pktid = getattr(reinjection, _sender("packetid")) reinj_delta = orig_arrival - reinj_arrival df_all.loc[reinj_pktid, "reinj_delta"] = reinj_delta if reinj_delta < 0: # print("GOT A MATCH") df_all.loc[df_all[_sender("packetid")] == reinjection.packetid, "redundant"] = True #TODO set reinj_delta for reinjection.packetid return df_all
def load_merged_streams_into_pandas( pcap1: str, pcap2: str, streamid1: int, # Union[MpTcpStreamId, TcpStreamId], streamid2: int, mptcp: bool, tshark_config: TsharkConfig, clock_offset1: int = 0, clock_offset2: int = 0, mapping_mode: PacketMappingMode = PacketMappingMode.HASH, **extra): """ Arguments: protocol: mptcp or tcp mapping_mode: Only HASH works for now Returns a dataframe with columns... owd ? """ log.debug( "Asked to load merged tcp streams %d and %d from pcaps %s and %s" % (streamid1, streamid2, pcap1, pcap2)) cache = mp.get_cache() protocolStr = "mptcp" if mptcp else "tcp" cacheid = cache.cacheuid( "merged", [ getrealpath(pcap1), getrealpath(pcap2), ], protocolStr + "_" + str(streamid1) + "_" + str(streamid2) + ".csv") # if we can't load that file from cache try: merged_df = pd.DataFrame() res = pd.DataFrame() valid, cachename = cache.get(cacheid) log.info("Cache validity=%s and cachename=%s" % (valid, cachename)) # TODO disable when clock_offset is set if not valid: df1 = load_into_pandas(pcap1, tshark_config, clock_offset=clock_offset1) df2 = load_into_pandas(pcap2, tshark_config, clock_offset=clock_offset2) main_connection = None # type: Union[MpTcpConnection, TcpConnection] other_connection = None # type: Union[MpTcpConnection, TcpConnection] if mptcp: main_connection = MpTcpConnection.build_from_dataframe( df1, streamid1) other_connection = MpTcpConnection.build_from_dataframe( df2, streamid2) # TODO generate # map_mptcp_connection() # for now we use known streams exclusively # might be interested to use merge_tcp_dataframes later merged_df = merge_mptcp_dataframes_known_streams( (df1, main_connection), (df2, other_connection)) else: main_connection = TcpConnection.build_from_dataframe( df1, streamid1) other_connection = TcpConnection.build_from_dataframe( df2, streamid2) # for now we use known streams exclusively # might be interested to use merge_tcp_dataframes later merged_df = merge_tcp_dataframes_known_streams( (df1, main_connection), (df2, other_connection)) assert cachename logging.info("Saving into %s" % cachename) # trying to export lists correctly # print(merged_df.reinjected_in.dropna().head()) # convert arrays back to strings # merged_df.apply(",".join() merged_df.to_csv( cachename, # columns=columns, index=False, header=True, sep=tshark_config.delimiter, ) # la on a perdu tcpdest est devenu object print("saving with dtypes=", dict(merged_df.dtypes)) # print("MERGED_DF", merged_df[TCP_DEBUG_FIELDS].head(20)) # if log level >= DEBUG then save to xls too ! # if True: # filename = cachename + ".xls" # logging.debug("Saved a debug excel copy at %s" % filename) # merged_df.to_excel(filename) else: logging.info("Loading from cache %s" % cachename) # dtypes = {k: v for k, v in temp.items() if v is not None or k not in ["tcpflags"]} def _gen_dtypes(fields) -> Dict[str, Any]: dtypes = {} # type: ignore for _name in [_first, _second]: # TODO this could be simplified for k, v in fields.items(): if v is not None or k not in ["tcpflags"]: dtypes.setdefault(_name(k), v) # add generated field dtypes dtypes.update({ _name(f.fullname): f.type for f in per_pcap_artificial_fields.values() }) # these are overrides from the generated dtypes dtypes.update({ # during the merge, we join even unmapped packets so some entries # may be empty => float64 _first("packetid"): np.float64, _second("packetid"): np.float64, }) return dtypes def _gen_converters() -> Dict[str, Callable]: # converters = {} # type: Dict[str, Any] fields = dict(tshark_config.fields) fields.update(per_pcap_artificial_fields) converters = {} # no need to convert tcpflags default_converters = { name: f.converter for name, f in fields.items() if f.converter and name != "tcpflags" } # converters.update({ name: f.converter for name, f in per_pcap_artificial_fields.items() if f.converter}) for name, converter in default_converters.items(): converters.update({ _first(name): converter, _second(name): converter }) return converters with open(cachename) as fd: dtypes = _gen_dtypes({ name: field.type for name, field in tshark_config.fields.items() }) converters = _gen_converters() # more recent versions can do without it # pd.set_option('display.max_rows', 200) # pd.set_option('display.max_colwidth', -1) # print("converters=", converters) merged_df = pd.read_csv( fd, skip_blank_lines=True, comment='#', # we don't need 'header' when metadata is with comment sep=tshark_config.delimiter, # memory_map=True, # could speed up processing dtype=dtypes, # poping still generates converters=converters, ) # log.debug("Column names after loading from cache: %s", merged_df.columns) # TODO: # No columns to parse from file # we fix the clocks a posteriori so that the cache is still usable logging.debug("Postprocessing clock if needed") merged_df[_first('abstime')] += clock_offset1 merged_df[_second('abstime')] += clock_offset2 logging.debug("Converting dataframes to be sender/receiver based...") # in both cases # TODO here we should attribute the definite mptcprole # compute owd if mptcp: print("Should be merging OWDs") logging.error( "We should correct the clocks if the argument is passed !") # raise mp.MpTcpException("Implement mptcp merge") res = convert_to_sender_receiver(merged_df) else: # tcp # c la ou ou corrige les temps # on rename les colonnes host1 ou host2 par _sender ou bien _receiver ?! res = convert_to_sender_receiver(merged_df) # don't do it here else we might repeat it # data["abstime"] += clock_offset logging.debug("Computing owds") log.debug("Column names: %s", res.columns) log.debug("Dtypes after load:%s\n" % dict(res.dtypes)) print("res=") # TODO we don't necessarely need to generate the OWDs here, might be put out res['owd'] = res[_receiver('abstime')] - res[_sender('abstime')] # .head(40)) with pd.option_context('float_format', '{:f}'.format): print(res[_sender(["ipsrc", "ipdst", "abstime"]) + _receiver(["abstime", "packetid"]) + TCP_DEBUG_FIELDS + ["owd"]]) except Exception: logging.exception("exception happened while merging") # pd.set_option('display.max_rows', 200) # pd.set_option('display.max_colwidth', -1) # print("dtypes=", dict(dtypes)) # log.debug("Dtypes after load:%s\n" % pp.pformat(merged_df.dtypes)) log.info("Finished loading. merged dataframe size: %d" % len(merged_df)) return res
def plot(self, pcap, pcapstream, window, **kwargs): """ TODO for now only plots subflows plots the mptcp aggregate or mptcpack instead ? """ fig = plt.figure() df = pcap destinations = kwargs.get("pcap_destinations") con = df.mptcp.connection(pcapstream) df = con.fill_dest(df) if len(destinations) == 1: suffix = " towards MPTCP %s" % (destinations[0].to_string()) self.title_fmt = self.title_fmt + suffix # origin pd_abstime = pd.to_datetime( df[_sender("abstime")], unit="s", errors='raise', ) df.set_index(pd_abstime, inplace=True) df.sort_index(inplace=True) # then plots MPTCP level throughput ################################################## label_fmt = "MPTCP" if len(destinations) >= 2: label_fmt = label_fmt + " towards {mptcpdest}" for mptcpdest, subdf in df.groupby(_sender("mptcpdest")): # tcpdest, tcpstream, mptcpdest = idx mptcpdest = mp.ConnectionRoles(mptcpdest) if mptcpdest not in destinations: log.debug("Ignoring destination %s", mptcpdest) continue log.debug("Plotting mptcp destination %s", mptcpdest) plot_tput(fig, subdf["tcplen"], subdf["abstime"], window, label=label_fmt.format(mptcpdest=mptcpdest.to_string())) # plot subflows first... ################################################## fields = ["tcpstream", "tcpdest", "mptcpdest"] label_fmt = "Subflow {tcpstream}" if len(destinations) >= 2: label_fmt = label_fmt + " towards MPTCP {mptcpdest}" for idx, subdf in df.groupby(fields, sort=False): tcpstream, tcpdest, mptcpdest = idx mptcpdest = mp.ConnectionRoles(mptcpdest) if mptcpdest not in destinations: log.debug("Ignoring MPTCP destination %s", tcpdest) continue log.debug("Plotting tcp destination %s", tcpdest) # basically the same as for tcp plot_tput( fig, subdf["tcplen"], subdf.index, # subdf["abstime"], window, label=label_fmt.format(tcpstream=tcpstream, mptcpdest=mptcpdest.to_string())) # return { # 'fig': fig # } return fig
def plot(self, pcap, pcapstream, window, **kwargs): """ Should be very similar to the thoughput one, except with """ fig = plt.figure() axes = fig.gca() fields = ["tcpdest", "tcpstream", "mptcpdest"] # TODO this should be configured in the parser # destinations = kwargs.get("destinations", list(mp.ConnectionRoles)) destinations = kwargs.get("pcap_destinations") skipped = kwargs.get("skipped_subflows", []) df = pcap # df Classified df_classified = classify_reinjections(df) # then it's the same as for throughput log.debug("Dropping redundant packets") df_useful = df_classified[df_classified.redundant == False] df_useful = df_useful.copy() df_useful.dropna( axis="index", subset=[_sender("abstime")], inplace=True, ) # print("after dropna") # print(df_useful) pd_abstime = pd.to_datetime(df_useful[_sender("abstime")], unit="s", errors="raise") df_useful.set_index(pd_abstime, inplace=True) df_useful.sort_index(inplace=True) suffix = " towards MPTCP {mptcpdest}" # plots MPTCP level goodput ################################################## label_fmt = "Aggregated" + (suffix if len(destinations) > 1 else "") for mptcpdest, subdf in df_useful.groupby("mptcpdest"): # tcpdest, tcpstream, mptcpdest = idx if mptcpdest not in destinations: log.debug("Ignoring destination %s", mptcpdest) continue log.debug("Plotting mptcp destination %s", mptcpdest) # add id plot_tput( fig, subdf["tcplen"], subdf["abstime"], window, label=label_fmt.format( mptcpdest=mp.ConnectionRoles(mptcpdest).to_string()), ) label_fmt = "Subflow {tcpstream}" if len(destinations) == 1: # TODO as we look at acks, it should be swapped ! self.title_fmt = self.title_fmt + suffix else: # label_suffix = suffix label_fmt = label_fmt + suffix for idx, subdf in df_useful.groupby(_sender(fields), as_index=False, sort=False): # print("len= %r" % len(subdf)) tcpdest, tcpstream, mptcpdest = idx print("tcpdest= %r, tcpstream %r mptcpdest %r" % (tcpdest, tcpstream, mptcpdest)) if mptcpdest not in destinations: log.debug("skipping MPTCP dest %s", tcpdest) continue if tcpstream in skipped: log.debug("skipping subflow %d", tcpstream) continue # log.debug("plotting MPTCP dest %s" % tcpdest) # if len(destinations) >= 2: # label_fmt = label_fmt + suffix plot_tput( fig, # subdf["dack"], subdf["tcplen"], subdf.index, # no need window, label=label_fmt.format( tcpstream=tcpstream, mptcpdest=mp.ConnectionRoles(mptcpdest).to_string()), ) self.title_fmt = self.title_fmt.format( tcpstream=tcpstream, mptcpdest=mp.ConnectionRoles(mptcpdest).to_string()) return fig