Beispiel #1
0
    def plot_mptcp(self, df, fig, fields, pcap_destinations, **kwargs):
        axes = fig.gca()
        fields = ["tcpdest", "tcpstream", "mptcpdest"]
        destinations = pcap_destinations

        label_fmt = "Stream {tcpstream}"

        if len(destinations) > 1:
            label_fmt = label_fmt + " towards {dest}"

        print("pcap", pcap_destinations)

        for idx, subdf in df.groupby(_sender(fields), sort=False):

            tcpdest, tcpstream, mptcpdest = idx
            if mptcpdest not in destinations:
                log.debug("Ignoring destination %s", mptcpdest)
                continue

            # print("OWD")
            # with pd.option_context('display.max_rows', None, 'display.max_columns', None):
            #     # more options can be specified also
            #     # print(df)
            #     print(df.owd)

# "Subflow %d towards tcp %s" % (tcpstream, tcpdest),  # seems to be a bug
            pplot = subdf.plot(
                # gca = get current axes (Axes), create one if necessary
                ax=axes,
                legend=True,
                # TODO should depend from
                x=_sender("abstime"),
                y="owd",
                label=label_fmt.format(tcpstream=tcpstream, dest=mp.ConnectionRoles(mptcpdest).to_string())
            )
Beispiel #2
0
    def plot_tcp(self, df, fig, fields, **kwargs):
        axes = fig.gca()
        # fields = ["tcpdest", "tcpstream"]

        # ConnctionRole doesn't support <
        for idx, subdf in df.groupby(_sender(fields), sort=False):

            print("t= %r" % (idx, ))
            print("len= %r" % len(subdf))
            tcpdest, tcpstream = idx

            # if protocol == tcpdest not in kwargs.destinations:
            #     log.debug("skipping TCP dest %s" % tcpdest)
            #     continue

            # print("tcpdest= %r" % tcpdest)
            # print("=== less than 0\n", subdf[subdf.owd < 0.050])
            # print("=== less than 0\n", subdf.tail())

            # if tcpdest
            # df = debug_convert(df)
            pplot = subdf.plot.line(
                # gca = get current axes (Axes), create one if necessary
                ax=axes,
                legend=True,
                # TODO should depend from
                x=_sender("abstime"),
                y="owd",
                label="towards %s" % tcpdest,  # seems to be a bug
                # grid=True,
                # xticks=tcpstreams["reltime"],
                # rotation for ticks
                # rot=45,
                # lw=3
            )
Beispiel #3
0
    def plot_mptcp(self, df, fig, fields, **kwargs):
        axes = fig.gca()
        fields = ["tcpdest", "tcpstream", "mptcpdest"]

        for idx, subdf in df.groupby(_sender(fields), sort=False):

            print("t= %r" % (idx, ))
            print("len= %r" % len(subdf))
            tcpdest, tcpstream, mptcpdest = idx

            # if protocol == tcpdest not in kwargs.destinations:
            #     log.debug("skipping TCP dest %s" % tcpdest)
            #     continue

            # if tcpdest
            # df = debug_convert(df)
            pplot = subdf.plot(
                # gca = get current axes (Axes), create one if necessary
                ax=axes,
                legend=True,
                # TODO should depend from
                x=_sender("abstime"),
                y="owd",
                label="Subflow %d towards tcp %s" %
                (tcpstream, tcpdest),  # seems to be a bug
                # grid=True,
                # xticks=tcpstreams["reltime"],
                # rotation for ticks
                # rot=45,
                # lw=3
            )
Beispiel #4
0
    def plot_tcp(self, df, fig, fields, **kwargs):
        axes = fig.gca()
        # fields = ["tcpdest", "tcpstream"]


        label_fmt = "Stream {tcpstream} towards {tcpdest}"
        for idx, subdf in df.groupby(_sender(fields), sort=False):

            # print("t= %r" % (idx,))
            print("len= %r" % len(subdf))
            tcpdest, tcpstream = idx

            # print("tcpdest= %r" % tcpdest)
            # print("=== less than 0\n", subdf[subdf.owd < 0.050])
            # print("=== less than 0\n", subdf.tail())

            # if tcpdest
            debug_dataframe(subdf, "subdf stream %d destination %r" % (tcpstream, tcpdest))

            pplot = subdf.plot.line(
                # gca = get current axes (Axes), create one if necessary
                ax=axes,
                legend=True,
                # TODO should depend from
                x=_sender("abstime"),
                y="owd",
                label=label_fmt.format(tcpstream=tcpstream, tcpdest=tcpdest),
            )
Beispiel #5
0
    def plot(self, pcap, pcapstream, **kwargs):
        """
        getcallargs
        """
        df = pcap

        # Need to compute reinjections
        df.mptcp.fill_dest(pcapstream)
        df = classify_reinjections(df)

        fig = plt.figure()

        # log.info("%d streams in the MPTCP flow" % len(tcpstreams))
        log.info("Plotting reinjections ")

        axes = fig.gca()

        fields = ["tcpstream", "mptcpdest"]

        fig.suptitle(
            "Reinjections CDF ",
            verticalalignment="top",
        )

        # il n'a pas encore eu les destinations !!
        debug_dataframe(df, "DATASET HEAD")
        for idx, subdf in df.groupby(_sender(fields), sort=False):
            log.info("len(df)= %d" % len(df))

            # TODO check destination
            # TODO skip if no reinjection
            debug_dataframe(subdf, "DATASET HEAD")

            # for idx, (streamid, ds) in enumerate(tcpstreams):
            # subdf[_sender("reinj_delta")].plot.line(
            #     x="abstime",
            #     ax=axes,
            #     # use_index=False,
            #     legend=False,
            #     grid=True,
            # )
            subdf[_sender("reinj_delta")].hist(cumulative=True,
                                               density=1,
                                               bins=100)

        axes.set_xlabel("Time (s)")
        axes.set_ylabel("Reinjection delay")

        handles, labels = axes.get_legend_handles_labels()

        # Generate "subflow X" labels
        # location: 3 => bottom left, 4 => bottom right
        axes.legend(handles,
                    ["Subflow %d" % (x) for x, _ in enumerate(labels)],
                    loc=4)
        return fig
Beispiel #6
0
    def plot(self, pcap, protocol, **kwargs):
        """
        Ideally it should be mapped automatically
        For now plots only one direction but there could be a wrapper to plot forward owd, then backward OWDs
        Disclaimer: Keep in mind this assumes a perfect synchronization between nodes, i.e.,
        it relies on the pcap absolute time field.
        While this is true in discrete time simulators such as ns3

        """
        fig = plt.figure()
        axes = fig.gca()
        res = pcap
        res[_sender("abstime")] = pd.to_datetime(res[_sender("abstime")],
                                                 unit="s")

        # TODO here we should rewrite
        debug_fields = _sender(TCP_DEBUG_FIELDS) + _receiver(
            TCP_DEBUG_FIELDS) + ["owd"]

        print("columns", pcap)
        print("columns", res.columns)
        print("info", res.info())
        print(res.loc[res._merge == "both", debug_fields])

        df = res

        print("STARTING LOOP")
        print("DESTINATION=%r" % kwargs.get("pcapdestinations", []))
        # df= df[df.owd > 0.010]

        fields = [
            "tcpdest",
            "tcpstream",
        ]
        # if True:
        if protocol == "mptcp":
            self.plot_mptcp(df, fig, fields, **kwargs)
        else:
            self.plot_tcp(df, fig, fields, **kwargs)

        # TODO add units
        axes.set_xlabel("Time (s)")
        axes.set_ylabel("One Way Delay (s)")

        self.title = "One Way Delays for {} streams {} <-> {} {dest}".format(
            protocol,
            kwargs.get("pcap1stream"),
            kwargs.get("pcap2stream"),
            dest="")

        return fig
Beispiel #7
0
        def _print_reinjection_comparison(original_packet, reinj, ):
            """
            Expects tuples of original and reinjection packets
            """
            # original_packet  = sender_df.loc[ sender_df.packetid == initial_packetid, ].iloc[0]
            row = reinj

            reinjection_packetid = getattr(row, _sender("packetid")),
            reinjection_start    = getattr(row, _sender("abstime")),
            reinjection_arrival  = getattr(row, _receiver("abstime")),
            original_start       = original_packet[_sender("abstime")],
            original_arrival     = original_packet[_receiver("abstime")] 

            if reinj.redundant == False:
                # print(original_packet["packetid"])
                msg = ("packet {pktid} is a successful reinjection of {initial_packetid}."
                        " It arrived at {reinjection_arrival} to compare with {original_arrival}"
                        " while being transmitted at {reinjection_start} to compare with "
                        "{original_start}, i.e., {reinj_delta} before")
                # TODO use assert instead
                if getattr(row, _receiver("abstime")) > original_packet[ _receiver("abstime") ]:
                    print("BUG: this is not a valid reinjection after all ?")

            elif args.failed:
                # only de
                msg = "packet {pktid} is a failed reinjection of {initial_packetid}."
            else:
                return

            msg = msg.format(
                pktid               = reinjection_packetid,
                initial_packetid    = initial_packetid,

                reinjection_start   = reinjection_start,
                reinjection_arrival = reinjection_arrival,
                original_start      = original_start,
                original_arrival    = original_arrival,
                reinj_delta         = reinj.reinj_delta,
            )
            self.poutput(msg)
Beispiel #8
0
    def plot(self, pcap, protocol, **kwargs):
        """
        Ideally it should be mapped automatically
        For now plots only one direction but there could be a wrapper to plot forward owd, then backward OWDs
        Disclaimer: Keep in mind this assumes a perfect synchronization between nodes, i.e.,
        it relies on the pcap absolute time field.
        While this is true in discrete time simulators such as ns3

        """
        fig = plt.figure()
        axes = fig.gca()
        res = pcap
        destinations = kwargs.get("pcap_destinations")
        # should already be done
        # res[_sender("abstime")] = pd.to_datetime(res[_sender("abstime")], unit="s")

        # TODO here we should rewrite
        debug_fields = _sender(TCP_DEBUG_FIELDS) + _receiver(TCP_DEBUG_FIELDS) + ["owd"]

        # print("columns", pcap)
        debug_dataframe(res, "owd dataframe")
        # print(res.loc[res.merge_status == "both", debug_fields])

        df = res

        # print("DESTINATION=%r" % destinations)
        # df= df[df.owd > 0.010]

        fields = ["tcpdest", "tcpstream", ]
        # if True:
        # TODO: use Protocol.MPTCP:
        if protocol == "mptcp":
            self.plot_mptcp(df, fig, fields, **kwargs)
        elif protocol == "tcp":
            self.plot_tcp(df, fig, fields, **kwargs)
        else:
            raise Exception("Unsupported protocol %r" % protocol)


        self.title_fmt = "One Way Delays for {protocol}"
        if len(destinations) == 1:
            self.title_fmt = self.title_fmt + " towards {dest}"

        self.title_fmt = self.title_fmt.format(
            protocol=protocol,
            # kwargs.get("pcap1stream"),
            # kwargs.get("pcap2stream"),
            dest=destinations[0].to_string()
        )

        return fig
Beispiel #9
0
    def plot(self, df, pcapstream, field, **kwargs):
        """
        getcallargs
        """
        fig = plt.figure()
        # tcpstreams = dat.groupby('tcpstream')

        # log.info("%d streams in the MPTCP flow" % len(tcpstreams))
        log.info("Plotting field %s" % field)
        log.info("len(df)= %d" % len(df))

        axes = fig.gca()

        fields = ["tcpstream", "mptcpdest"]

        fig.suptitle(
            "Plot of subflow %s" % field,
            verticalalignment="top",
            # x=0.1, y=.95,
        )

        # il n'a pas encore eu les destinations !!
        print("DATASET HEAD")
        print(df.head())
        for idx, subdf in df.groupby(_sender(fields), sort=False):
            log.info("len(df)= %d" % len(df))

            # TODO check destination

            # for idx, (streamid, ds) in enumerate(tcpstreams):
            subdf[field].plot.line(
                x="abstime",
                ax=axes,
                # use_index=False,
                legend=False,
                grid=True,
            )

        axes.set_xlabel("Time (s)")
        axes.set_ylabel(self._attributes[field])

        handles, labels = axes.get_legend_handles_labels()

        # Generate "subflow X" labels
        # location: 3 => bottom left, 4 => bottom right
        axes.legend(
            handles,
            ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)],
            loc=4)
        return fig
Beispiel #10
0
def mptcp_compute_throughput(
        rawdf, mptcpstreamid, destination: ConnectionRoles
    # mptcpstreamid2=None
) -> Tuple[bool, Any]:
    """
    Very raw computation: substract highest dsn from lowest by the elapsed time

    Returns:
        a tuple (True/false, dict)
    """

    df = rawdf[rawdf.mptcpstream == mptcpstreamid]
    if df.empty:
        return False, "No packet with mptcp.stream == %d" % mptcpstreamid

    con = MpTcpConnection.build_from_dataframe(df, mptcpstreamid)
    q = con.generate_direction_query(destination)
    df = unidirectional_df = df.query(q)

    dsn_min = df.dss_dsn.min()
    dsn_max = df.dss_dsn.max()
    total_transferred = dsn_max - dsn_min
    d = df.groupby(_sender('tcpstream'))
    subflow_stats: List[Any] = []
    for tcpstream, group in d:
        # TODO drop retransmitted
        subflow_load = group.drop_duplicates(subset="dss_dsn").dss_length.sum()
        subflow_load = subflow_load if not math.isnan(subflow_load) else 0
        subflow_stats.append({
            'tcpstreamid': tcpstream,
            'throughput_bytes': int(subflow_load)
        })

    return True, {
        'mptcpstreamid':
        mptcpstreamid,
        # TODO append bytes
        'mptcp_goodput_bytes':
        total_transferred,
        'mptcp_throughput_bytes':
        sum(map(lambda x: x['throughput_bytes'], subflow_stats)),
        'subflow_stats':
        subflow_stats,
    }
Beispiel #11
0
def map_mptcp_connection(rawdf2: pd.DataFrame,
                         main: MpTcpConnection) -> List[MpTcpMapping]:
    """
    warn: Do not trust the results yet WIP !

    Returns:
        List of (connection, score) with the best mapping first

    This function tries to map a mptcp.stream from a dataframe (aka pcap) to mptcp.stream
    in another dataframe. For now it just looks at IP level stuff without considering subflow
    mapping score
    """
    log.warning("mapping between datasets is not considered trustable yet")
    results: List[MpTcpMapping] = []

    for mptcpstream2 in rawdf2[_sender("mptcpstream")].dropna().unique():
        other = MpTcpConnection.build_from_dataframe(rawdf2, mptcpstream2)
        mapping = map_mptcp_connection_from_known_streams(main, other)
        results.append(mapping)

    results.sort(key=lambda x: x.score, reverse=True)

    return results
Beispiel #12
0
def map_mptcp_connection(rawdf2: pd.DataFrame,
                         main: MpTcpConnection) -> List[MpTcpMapping]:
    """
    warn: Do not trust the results yet WIP !

    Returns:
        List of (connection, score) with the best mapping first

    This function tries to map a mptcp.stream from a dataframe (aka pcap) to mptcp.stream
    in another dataframe. For now it just looks at IP level stuff without considering subflow
    mapping score
    """
    log.warning("mapping between datasets is not considered trustable yet")
    results = []  # type: List[MpTcpMapping]

    # mappings = {}  # type: Dict[int,Tuple[Any, float]]

    score = -1  # type: float
    results = []

    # print("%r" % main)
    # print(rawdf2["mptcpstream"].unique().dropna())

    for mptcpstream2 in rawdf2[_sender("mptcpstream")].dropna().unique():
        other = MpTcpConnection.build_from_dataframe(rawdf2, mptcpstream2)
        mapping = map_mptcp_connection_from_known_streams(main, other)
        # score = main.score(other)
        # if score > float('-inf'):
        #     # (other, score)
        #     mapped_subflows = _map_subflows(main, other)
        #     mapping = MpTcpMapping(mapped=other, score=score, subflow_mappings=mapped_subflows)
        results.append(mapping)

    # sort based on the score
    results.sort(key=lambda x: x[1], reverse=True)

    return results
Beispiel #13
0
    def do_qualify_reinjections(self, args, unknown):
        """
        test with:
            mp qualify_reinjections 0

        TODO move the code into a proper function
        """
        # TODO this should be done automatically right ?
        df_all = load_merged_streams_into_pandas(
            args.pcap1,
            args.pcap2,
            args.pcap1stream,
            args.pcap2stream,
            mptcp=True,
            tshark_config=self.tshark_config
        )

        # adds a redundant column
        df = classify_reinjections(df_all)

        # print(df_all[ pd.notnull(df_all[_sender("reinjection_of")])] [
        #     _sender(["reinjection_of", "reinjected_in", "packetid", "reltime"]) +
        #     _receiver(["packetid", "reltime"])
        # ])

        # to help debug
        # df.to_excel("temp.xls")

        def _print_reinjection_comparison(original_packet, reinj, ):
            """
            Expects tuples of original and reinjection packets
            """
            # original_packet  = sender_df.loc[ sender_df.packetid == initial_packetid, ].iloc[0]
            row = reinj

            reinjection_packetid = getattr(row, _sender("packetid")),
            reinjection_start    = getattr(row, _sender("abstime")),
            reinjection_arrival  = getattr(row, _receiver("abstime")),
            original_start       = original_packet[_sender("abstime")],
            original_arrival     = original_packet[_receiver("abstime")] 

            if reinj.redundant == False:
                # print(original_packet["packetid"])
                msg = ("packet {pktid} is a successful reinjection of {initial_packetid}."
                        " It arrived at {reinjection_arrival} to compare with {original_arrival}"
                        " while being transmitted at {reinjection_start} to compare with "
                        "{original_start}, i.e., {reinj_delta} before")
                # TODO use assert instead
                if getattr(row, _receiver("abstime")) > original_packet[ _receiver("abstime") ]:
                    print("BUG: this is not a valid reinjection after all ?")

            elif args.failed:
                # only de
                msg = "packet {pktid} is a failed reinjection of {initial_packetid}."
            else:
                return

            msg = msg.format(
                pktid               = reinjection_packetid,
                initial_packetid    = initial_packetid,

                reinjection_start   = reinjection_start,
                reinjection_arrival = reinjection_arrival,
                original_start      = original_start,
                original_arrival    = original_arrival,
                reinj_delta         = reinj.reinj_delta,
            )
            self.poutput(msg)


        # with pd.option_context('display.max_rows', None, 'display.max_columns', 300):
        #     print(reinjected_packets[["packetid", "packetid_receiver", *_receiver(["reinjected_in", "reinjection_of"])]].head())
        # TODO filter depending on --failed and --destinations

        if args.csv:
            self.pfeedback("Exporting to csv")
            # keep redundant
            # only export a subset ?
            # for 
            # df1 = df[['a','d']]
            # smalldf = df.drop()
            columns = _sender(["abstime", "reinjection_of", "reinjected_in", "packetid", "tcpstream", "mptcpstream", "tcpdest", "mptcpdest"])
            columns += _receiver(["abstime", "packetid"])
            columns += ["redundant", "owd", "reinj_delta"]

            df[columns].to_csv(
                self.stdout,
                sep="|",
                index=False,
                header=True,
            )
            return

        for destination in ConnectionRoles:

            if args.destinations and destination not in args.destinations:
                log.debug("ignoring destination %s " % destination)
                continue

            self.poutput("looking for reinjections towards mptcp %s" % destination)
            sender_df = df[df.mptcpdest == destination]
            log.debug("%d reinjections in that direction" % (len(sender_df), ))

            # TODO we now need to display successful reinjections
            reinjections = sender_df[pd.notnull(sender_df[_sender("reinjection_of")])]

            successful_reinjections = reinjections[reinjections.redundant == False]

            self.poutput("%d successful reinjections" % len(successful_reinjections))
            # print(successful_reinjections[ _sender(["packetid", "reinjection_of"]) + _receiver(["packetid"]) ])

            for row in reinjections.itertuples(index=False):

                # loc ? this is an array, sort it and take the first one ?
                initial_packetid = row.reinjection_of[0]
                # print("initial_packetid = %r %s" % (initial_packetid, type(initial_packetid)))

                original_packet  = df_all.loc[df_all.packetid == initial_packetid].iloc[0]
                # print("original packet = %r %s" % (original_packet, type(original_packet)))

                # if row.redundant == True and args.failed:
                    # _print_failed_reinjection(original_packet, row, debug=args.debug)

                _print_reinjection_comparison(original_packet, row, )
Beispiel #14
0
    def plot(self, df, tcpstream, fields, destinations, **kwargs):
        """
        getcallargs
        """
        fig = plt.figure()
        # tcpstreams = dat.groupby('tcpstream')

        # print("%d streams in the MPTCP flow" % len(tcpstream))
        log.debug("Plotting field(s) %s" % fields)

        axes = fig.gca()

        # for idx, (streamid, ds) in enumerate(tcpstreams):
        tcpdf = df
        # [df.tcpstream == tcpstream]

        # if dropsyn
        # tcpdf[field].iloc[3:]

        labels = []  # type: List[str]

        # TODO le .iloc permet d'eliminer les syn/ack
        # print("DTYPES")
        # print(tcpdf.dtypes)
        for dest, ddf in tcpdf.groupby(_sender("tcpdest")):
            # print("dest %r in %r" %( dest , destinations))
            # TODO remove ?
            if dest in destinations:

                for field in fields:
                    # print("dest", dest, " in " , destinations)

                    ddf[field].plot.line(
                        x=_sender("abstime"),
                        ax=axes,
                        # use_index=False,
                        legend=False,
                        grid=True,
                    )
                    labels.append("%s towards %s" %
                                  (self._attributes[field], dest))

        axes.set_xlabel("Time (s)")
        if len(fields) == 1:
            y_label = self._attributes[fields[0]]
        else:
            y_label = "/".join(fields)
        axes.set_ylabel(y_label)

        handles, _labels = axes.get_legend_handles_labels()

        # TODO generate correct labels ?

        # print(tcpdf[field].iloc[3:])
        # Generate "subflow X" labels
        # location: 3 => bottom left, 4 => bottom right
        axes.legend(
            handles, labels
            #     ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)],
            #     loc=4
        )

        fig.suptitle(" %s " % y_label)

        return fig
Beispiel #15
0
    def plot(self, dat, destinations, protocol, **kwargs):
        """
        getcallargs
        """

        fig = plt.figure()
        axes = fig.gca()
        mptcp_plot = (protocol == "mptcp")
        # success, ret = mptcp_compute_throughput(dat, mptcpstream, destination)
        # if success is not True:
        #     print("Failure: %s", ret)
        #     return

        # data = map(lambda x: x['bytes'], ret['subflow_stats'])
        # s = pd.DataFrame(data=pd.Series(data))
        # print (s)

        # gca = get current axes (Axes), create one if necessary
        axes = fig.gca()

        title = "TCP throughput/goodput"

        fields = [
            "tcpdest",
            "tcpstream",
        ]
        if mptcp_plot:
            fields.append("mptcpdest")
            title = "MPTCP throughput/goodput"

        for idx, subdf in dat.groupby(_sender(fields), sort=False):

            # filler in case
            stream, tcpdest, mptcpdest, _catchall = (*idx, "filler1", "filler2"
                                                     )  # type: ignore

            filtereddest = mptcpdest if mptcp_plot else tcpdest
            if filtereddest not in kwargs.get("destinations"):
                continue

            tput_df = compute_goodput(subdf, kwargs.get("window"))
            tput_df.plot.line(
                ax=axes,
                legend=True,
                # TODO should depend from
                x=_sender("dt_abstime"),
                y="tput",
                # y="gput",
                label="Xput towards %s" % filtereddest,  # seems to be a bug
            )

        # TODO plot on one y the throughput; on the other the goodput
        axes.set_xlabel("Time (s)")
        axes.set_ylabel("contribution")
        fig.suptitle(title)

        # handles, labels = axes.get_legend_handles_labels()

        # # Generate "subflow X" labels
        # # location: 3 => bottom left, 4 => bottom right
        # axes.legend(
        #     handles,
        #     ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)],
        #     loc=4
        # )

        return fig
Beispiel #16
0
def compute_goodput(df, averaging_window):
    """
    wireshark example can be found in:
    ui/qt/tcp_stream_dialog.cpp: void TCPStreamDialog::fillThroughput()

    // Throughput Graph - rate of sent bytes
    // Goodput Graph - rate of ACKed bytes

    todo should make it work with dack/ack
    problem is we don't support sack :'(

    Adds following columns to the dataframe:
    - tput
    - gput 
    - dt_abstime: abstime but in datetime format so that one can apply "rolling" features
    """
    # df.rolling(on="bytes")
    # we can use mptcp.ack
    # we can use tcp.ack that are relative
    # rolling window can use offset

    # assert (field == "tcpack" or field "dack")

    df[_sender("dt_abstime")] = pd.to_datetime(df[_sender("abstime")],
                                               unit="s")

    print(df["dt_abstime"])
    import re
    string1 = averaging_window
    # TODO I should retreive the unit afterwards
    averaging_window_int = int(re.search(r'\d+', string1).group())

    # TODO use it as index to use the rolling ?
    # win_type=
    # rolling
    def _compute_tput(x, ):
        """
        Not an exact one, does not account for TCP sack for instance
        """
        print("compute_tput called !!")
        # print("%r" % x )
        # so now it gets a series
        return (x.max() - x.min()) / averaging_window_int

    # TODO test
    newdf = df.set_index("dt_abstime", drop=False)

    print(newdf[["abstime", "tcpack"]])
    newdf["tput"] = newdf["tcpack"].rolling(
        # 3,
        averaging_window,
        # on="tcpack",
        # closed="right",
        # center=True
        # ).mean()
    ).apply(
        _compute_tput,
        raw=False,
    )  # args=(), kwargs={}

    print("AFTER rolling ")
    print(newdf[["abstime", "tcpack", "tput"]].head(5))
    return newdf
Beispiel #17
0
    def plot(self, pcap, pcapstream, fields, pcap_destinations, **kwargs):
        """
        getcallargs
        """
        log.debug("Plotting field(s) %s", fields)

        fig = plt.figure()
        axes = fig.gca()

        tcpdf = pcap

        # should be done when filtering the stream
        tcpdf.tcp.fill_dest(pcapstream)

        labels = []  # type: List[str]

        print(pcap)
        print(tcpdf)

        for dest, ddf in tcpdf.groupby(_sender("tcpdest")):
            if dest not in pcap_destinations:
                log.debug("Ignoring destination %s", dest)

            log.debug("Plotting destination %s", dest)

            for field in fields:
                # print("dest", dest, " in " , destinations)

                final = ddf.drop_duplicates(subset=field)
                print("dataframe to plot")
                print(final)

                # log.debug("Plotting field %s" % field)
                # print("len len(ddf[field])=%d" % len(ddf[field]))
                if len(final) <= 0:
                    log.info("No datapoint to plot")
                    continue

                # drop duplicate ?
                # the astype is a workaround pandas failure

                debug_dataframe(final, "tcp_attr")
                final.plot(
                    x="abstime",
                    y=field,
                    ax=axes,
                    use_index=False,
                    legend=False,
                    grid=True,
                )
                label_fmt = "{field} towards {dest}"
                labels.append(
                    label_fmt.format(field=self._attributes[field],
                                     dest=str(dest)))

        self.x_label = "Time (s)"
        if len(fields) == 1:
            y_label = self._attributes[fields[0]]
        else:
            y_label = "/".join(fields)
        self.y_label = y_label

        handles, _labels = axes.get_legend_handles_labels()

        # TODO generate correct labels ?

        # print(tcpdf[field].iloc[3:])
        # Generate "subflow X" labels
        # location: 3 => bottom left, 4 => bottom right
        axes.legend(
            handles, labels
            #     ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)],
            #     loc=4
        )

        # TODO fix dest
        self.title_fmt = " %s " % y_label

        return fig
Beispiel #18
0
def classify_reinjections(df_all: pd.DataFrame) -> pd.DataFrame:
    """
    look at reinjections on the receiver side, see which one is first
    packets with reinjected_in_receiver are (at least they should) be the first DSN arrived.

    Returns:
        a new dataframe with an added column "redundant" and "time_delta"
    """
    log.info("Classifying reinjections")

    if df_all.merged.already_classified():
        log.debug("Already classified, aborting")
        return df_all

    df_all = df_all.assign(redundant=False, reinj_delta=np.nan)

    df = df_all[df_all.merge_status == "both"]

    # print(df_all[ pd.notnull(df_all[_sender("reinjection_of")])] [
    #     _sender(["reinjection_of", "reinjected_in", "packetid", "reltime"]) +
    #     _receiver(["packetid", "reltime"])
    # ])

    for destination in ConnectionRoles:

        log.debug("Looking at mptcp destination %r", destination)
        sender_df = df[df.mptcpdest == destination]

        # print(sender_df[ sender_df.reinjected_in.notna() ][["packetid", "reinjected_in"]])
        # print("successful reinjections" % len(reinjected_in))

        # select only packets that have been reinjected

        # debug_dataframe(sender_df, "reinjections", usecols=["reinjection_of"])
        reinjected_packets = sender_df.dropna(
            axis='index', subset=[_sender("reinjection_of")])

        log.debug("%d reinjected packets", len(reinjected_packets))
        # with pd.option_context('display.max_rows', None, 'display.max_columns', 300):
        #     print(reinjected_packets[
        #         _sender(["packetid", "reinjected_in", "reinjection_of"])
        #          + _receiver(["reinjected_in", "reinjection_of"])
        #         ].head())

        for reinjection in reinjected_packets.itertuples():
            # here we look at all the reinjected packets

            # print("full reinjection %r" % (reinjection,))

            # if there are packets in _receiver(reinjected_in), it means the reinjections
            # arrived before other similar segments and thus these segments are useless
            # it should work because
            # useless_reinjections = getattr(reinjection, _receiver("reinjected_in"), [])

            # if it was correctly mapped
            if reinjection.merge_status != "both":
                log.log(mp.TRACE,
                        "reinjection %d could not be mapped, giving up...",
                        reinjection.packetid)
                continue

            # print("%r" % reinjection.reinjection_of)
            initial_packetid = reinjection.reinjection_of[0]
            # print("initial_packetid = %r %s" % (initial_packetid, type(initial_packetid)))

            original_packet = df_all.loc[df_all.packetid ==
                                         initial_packetid].iloc[0]

            if original_packet.merge_status != "both":
                # TODO count missed classifications ?
                log.log(
                    mp.TRACE,
                    "Original packet %d could not be mapped, giving up...",
                    original_packet.packetid)
                continue

            orig_arrival = getattr(original_packet, _receiver("reltime"))
            reinj_arrival = getattr(reinjection, _receiver("reltime"))
            reinj_pktid = getattr(reinjection, _sender("packetid"))

            reinj_delta = orig_arrival - reinj_arrival
            df_all.loc[reinj_pktid, "reinj_delta"] = reinj_delta

            if reinj_delta < pd.Timedelta(0):
                # print("GOT A failed reinjection")
                df_all.loc[df_all[_sender("packetid")] == reinjection.packetid,
                           "redundant"] = True
                #TODO set reinj_delta for reinjection.packetid
            else:
                # print("GOT a successful reinjection")
                pass

    return df_all
Beispiel #19
0
def load_merged_streams_into_pandas(
        pcap1: str,
        pcap2: str,
        streamid1: int,
        streamid2: int,
        # TODO changed to protocol
        mptcp: bool,
        tshark_config: TsharkConfig,
        clock_offset1: int = 0,
        clock_offset2: int = 0,
        mapping_mode: PacketMappingMode = PacketMappingMode.HASH,
        **extra):
    """
    Arguments:
        protocol: mptcp or tcp
        mapping_mode: Only HASH works for now
        clock_offset: untested

    Returns
        a dataframe with columns... owd ?
    """
    protocolStr = "mptcp" if mptcp else "tcp"
    log.debug(f"Asked to load {protocolStr} merged streams {streamid1} and "
              "{streamid2} from pcaps {pcap1} and {pcap2}")

    cache = mp.get_cache()

    cacheid = cache.cacheuid(
        "merged", [getrealpath(pcap1), getrealpath(pcap2)],
        protocolStr + "_" + str(streamid1) + "_" + str(streamid2) + ".csv")

    # if we can't load that file from cache
    try:
        merged_df = pd.DataFrame()
        res = pd.DataFrame()

        valid, cachename = cache.get(cacheid)
        log.info("Cache validity=%s and cachename=%s" % (valid, cachename))

        # TODO disable when clock_offset is set
        if not valid:
            df1 = load_into_pandas(pcap1,
                                   tshark_config,
                                   clock_offset=clock_offset1)
            df2 = load_into_pandas(pcap2,
                                   tshark_config,
                                   clock_offset=clock_offset2)

            main_connection = None  # type: Union[MpTcpConnection, TcpConnection]
            other_connection = None  # type: Union[MpTcpConnection, TcpConnection]
            if mptcp:
                main_connection = MpTcpConnection.build_from_dataframe(
                    df1, MpTcpStreamId(streamid1))
                other_connection = MpTcpConnection.build_from_dataframe(
                    df2, MpTcpStreamId(streamid2))

                # for now we use known streams exclusively
                # might be interested to use merge_tcp_dataframes later
                merged_df = merge_mptcp_dataframes_known_streams(
                    (df1, main_connection), (df2, other_connection))

            else:
                main_connection = TcpConnection.build_from_dataframe(
                    df1, TcpStreamId(streamid1))
                other_connection = TcpConnection.build_from_dataframe(
                    df2, TcpStreamId(streamid2))

                # for now we use known streams exclusively
                # might be interested to use merge_tcp_dataframes later
                merged_df = merge_tcp_dataframes_known_streams(
                    (df1, main_connection), (df2, other_connection))

            assert cachename
            log.info("Saving into %s" % cachename)
            # trying to export lists correctly
            # print(merged_df.reinjected_in.dropna().head())
            # convert arrays back to strings
            # merged_df.apply(",".join()
            # or abstime ?

            # TODO rechange the flags hex()
            merged_df.to_csv(
                cachename,
                # columns=columns,
                index=False,
                header=True,
                sep=tshark_config.delimiter,
            )

            # tcpdest had become an objected instead of a CategoricalDtype
            # see https://github.com/pandas-dev/pandas/issues/22361
            log.log(mp.TRACE, "saving with dtypes=", dict(merged_df.dtypes))

        else:
            log.info("Loading from cache %s", cachename)

            date_cols = get_date_cols(tshark_config.fields)

            with open(cachename) as fd:
                # generate fieldlist
                def _gen_fields(fields):
                    gfields = {}  # type: ignore
                    for _name in [_first, _second]:
                        gfields.update(
                            {_name(k): v
                             for k, v in fields.items()})
                    return gfields

                # reltime discarded on save ?
                tshark_config.fields.pop("reltime")
                gfields = _gen_fields(tshark_config.fields)
                merge_dtypes = get_dtypes(gfields)
                # log.log(mp.TRACE, "Using gfields %s" % pp.pformat(gfields))

                # we don't need any converters
                converters = {}
                date_cols = get_date_cols(gfields)

                log.log(mp.TRACE, "Using date_cols %s" % pp.pformat(date_cols))
                log.log(mp.TRACE, "Using dtypes %s" % pp.pformat(merge_dtypes))
                # log.log(mp.TRACE, "Using converters %s" % (pp.pformat(converters)))
                merged_df = pd.read_csv(
                    fd,
                    skip_blank_lines=True,
                    comment='#',
                    # we don't need 'header' when metadata is with comment
                    sep=tshark_config.delimiter,
                    # memory_map=True, # could speed up processing
                    dtype=merge_dtypes,  # poping still generates
                    converters=converters,
                    # date_parser=date_converter,
                    parse_dates=date_cols,
                )
                # at this stage, destinatiosn are nan

                debug_fields = ["abstime", "tcpstream", "tcpdest", "mptcpdest"]
                mptcpanalyzer.debug.debug_dataframe(
                    merged_df,
                    "Merged dataframe",
                    usecols=(_first(debug_fields) + _second(debug_fields)))

                # workaround bug https://github.com/pandas-dev/pandas/issues/25448
                def _convert_to_enums():
                    # per_pcap_artificial_fields
                    for col in [
                            _first("tcpdest"),
                            _first("mptcpdest"),
                            _second("tcpdest"),
                            _second("mptcpdest")
                    ]:
                        merged_df[col] = merged_df[col].apply(
                            _convert_role, convert_dtype=False)

        # we fix the clocks a posteriori so that the cache is still usable
        log.debug("Postprocessing clock if needed")
        # merged_df[_first('abstime')] += clock_offset1
        # merged_df[_second('abstime')] += clock_offset2

        log.debug("Converting dataframes to be sender/receiver based...")

        # in both cases
        # TODO here we should attribute the definite mptcprole
        if mptcp:
            log.error(
                "We should correct the clocks if the argument is passed !")
            # raise mp.MpTcpException("Implement mptcp merge")

            res = convert_to_sender_receiver(merged_df)
            # fill MPTCP dest ?
        else:
            # tcp
            res = convert_to_sender_receiver(merged_df)

        # log.debug("Sorting by sender abstime")
        # merged_df.sort_values(by=_sender("abstime"), ascending=True, inplace=True)
        # debug_dataframe(res, "checking merge", usecols=["merge_status"])
        # print("%d nan values" % len(res[res.merge_status == np.nan]))

        log.debug("Computing owds")

        debug_dataframe(res, "before owds")
        # TODO we don't necessarely need to generate the OWDs here, might be put out
        res['owd'] = res[_receiver('abstime')] - res[_sender('abstime')]

        debug_dataframe(
            res,
            "owd",
            usecols=["owd", _sender('abstime'),
                     _receiver('abstime')])
        # with pd.option_context('float_format', '{:f}'.format):
        #     print(
        #         res[_sender(["ipsrc", "ipdst", "abstime"])
        #          + _receiver(["abstime", "packetid"]) + TCP_DEBUG_FIELDS + ["owd"] ]
        #     )

    except Exception as e:
        log.exception("exception happened while merging")

    # pd.set_option('display.max_rows', 200)
    # pd.set_option('display.max_colwidth', -1)
    # print("dtypes=", dict(dtypes))
    log.log(mp.TRACE, "Dtypes after load:%s\n", pp.pformat(res.dtypes))
    log.info("Finished loading. merged dataframe size: %d", len(res))

    return res
Beispiel #20
0
def convert_to_sender_receiver(df) -> pd.DataFrame:
    """
    Convert dataframe from  X_HOST1 | X_HOST2 to X_SENDER | X_RECEIVER

    each packet has a destination marker
    Assume clocks are fine here !
    """
    log.debug("Converting from host_1/host_2 to sender/receiver format")

    # fill up afterwards
    total = pd.DataFrame()

    for tcpstream, subdf in df.groupby(_first("tcpstream")):

        min_h1 = subdf.iloc[0, subdf.columns.get_loc(_first('abstime'))]
        min_h2 = subdf.iloc[0, subdf.columns.get_loc(_second('abstime'))]

        #         def _rename_columns(h1_role: ConnectionRoles):
        #             """
        # client_suffix, server_suffix
        #             Params:
        #                 client_suffix must be one of HOST1_SUFFIX or HOST2_SUFFIX
        #                 server_suffix can be deduced
        #             """
        def _rename_column(col_name, suffixes) -> str:

            for suffix_to_replace, new_suffix in suffixes.items():
                if col_name.endswith(suffix_to_replace):
                    return col_name.replace(suffix_to_replace, new_suffix)
            return col_name

            # total = pd.concat([total, subdf], ignore_index=True)

        log.debug(f"Comparing {min_h1} (h1) with {min_h2} (h2)")

        assert min_h1 != min_h2, (
            f"Same sending {min_h1} and receiving time {min_h2}."
            "Either the clock is not precise enough or it's a bug"
            " (more likely)")
        if min_h1 < min_h2:
            log.debug("Looks like h1 is the tcp client")
            # suffixes = { HOST1_SUFFIX: SENDER_SUFFIX, HOST2_SUFFIX: RECEIVER_SUFFIX }
            h1_role = ConnectionRoles.Client

        else:
            if min_h1 == min_h2:
                log.warn("there is an issue")

            log.debug("Looks like h2 is the tcp client")
            h1_role = (ConnectionRoles.Server)

        # _rename_columns(role)
        for tcpdest, tdf in subdf.groupby(_first("tcpdest"), sort=False):
            if tcpdest == h1_role:
                suffixes = {
                    HOST2_SUFFIX: SENDER_SUFFIX,
                    HOST1_SUFFIX: RECEIVER_SUFFIX
                }
            else:
                suffixes = {
                    HOST1_SUFFIX: SENDER_SUFFIX,
                    HOST2_SUFFIX: RECEIVER_SUFFIX
                }

            log.debug("suffixes: %s" % suffixes)
            rename_func = functools.partial(_rename_column, suffixes=suffixes)
            log.log(mp.TRACE, "renaming inplace")
            rename_func = functools.partial(_rename_column, suffixes=suffixes)

            log.debug("total df size = %d" % len(total))
            with pd.option_context('precision', 20):
                debug_cols = _first(["abstime", "tcpdest"]) + _second(
                    ["abstime", "tcpdest"])
                log.log(mp.TRACE, "before rename \n%s", tdf[debug_cols])
                tdf = tdf.rename(columns=rename_func, copy=True, inplace=False)

                debug_cols = _sender(["abstime", "tcpdest"]) + _receiver(
                    ["abstime", "tcpdest"])
                log.log(mp.TRACE, "After rename \n%s" % tdf[debug_cols])
                # print(tdf[debug_cols])
                # debug_dataframe(tdf, "temporary dataframe")
                total = pd.concat(
                    [total, tdf],
                    ignore_index=True,
                    sort=False,
                )
                # print("total df size = %d" % len(total))

        # subdf[ _first("tcpdest") == ConnectionRole.Client] .rename(columns=_rename_cols, inplace=True)
        # print(subdf.columns)
        # print(total.columns)
    # debug_dataframe(total, "total")

    log.debug("Converted to sender/receiver format")
    log.log(mp.TRACE, "Comparing #unique entries %d vs #all %d",
            total[_sender("abstime")].count(), len(total[_sender("abstime")]))
    # assert total[_sender("abstime")].count() == len(total[_sender("abstime")])
    return total
Beispiel #21
0
    def plot(self, pcap, pcapstream, **kwargs):
        """
        getcallargs
        """

        fig = plt.figure()

        df = pcap
        window = kwargs.get("window")
        destinations = kwargs.get("pcap_destinations")

        print("Destinations", destinations)

        con = df.tcp.connection(pcapstream)
        df = con.fill_dest(df)

        debug_dataframe(df, "plotting TCP throughput")

        # la il faudrait resampler
        pd_abstime = pd.to_datetime(
            df[_sender("abstime")],
            unit="s",
            errors='raise',
        )
        df.set_index(pd_abstime, inplace=True)
        df.sort_index(inplace=True)

        # TODO at some point here, we lose the dest type :'(
        for dest, subdf in df.groupby("tcpdest"):
            if dest not in destinations:
                log.debug("Ignoring destination %s", dest)
                continue

            log.debug("Plotting destination %s", dest)

            label_fmt = "TCP stream {stream}"
            if len(destinations) >= 2:
                label_fmt = label_fmt + " towards {dest}"

            plot_tput(
                fig,
                subdf["tcplen"],
                # subdf["tcpack"],
                # subdf["abstime"],
                subdf.index,
                window,
                label=label_fmt.format(
                    stream=pcapstream,
                    dest=mp.ConnectionRoles(dest).to_string()))

        self.y_label = "Throughput (bytes/second)"

        # TODO fix connection towards a direction ?
        self.title_fmt = "TCP Throughput (Averaging window of {window}) for:\n{con:c<->s}".format(
            window=window, con=con)
        # self.title = "TCP Throughput (Average window of %s)" % window

        # handles, labels = axes.get_legend_handles_labels()

        # # Generate "subflow X" labels
        # # location: 3 => bottom left, 4 => bottom right
        # axes.legend(
        #     handles,
        #     ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)],
        #     loc=4
        # )

        return fig
Beispiel #22
0
def classify_reinjections(df_all: pd.DataFrame) -> pd.DataFrame:
    """
    here the idea is to look at reinjections on the receiver side, see which one is first
    packets with reinjected_in_receiver are (at least they should) be the first DSN arrived.

    Returns
        a new dataframe with an added column "redundant"
    """

    df_all["redundant"] = False
    df_all["reinj_delta"] = np.nan

    # rename to df_both ?
    df = df_all[df_all._merge == "both"]

    # print(df_all[ pd.notnull(df_all[_sender("reinjection_of")])] [
    #     _sender(["reinjection_of", "reinjected_in", "packetid", "reltime"]) +
    #     _receiver(["packetid", "reltime"])
    # ])

    for destination in ConnectionRoles:

        sender_df = df[df.mptcpdest == destination]

        # print(sender_df[ sender_df.reinjected_in.notna() ][["packetid", "reinjected_in"]])
        # print("successful reinjections" % len(reinjected_in))

        # select only packets that have been reinjected

        # print("%d sender_df packets" % len(sender_df))
        # print(sender_df["reinjection_of"])
        reinjected_packets = sender_df.dropna(
            axis='index', subset=[_sender("reinjection_of")])

        logging.debug("%d reinjected packets" % len(reinjected_packets))
        # with pd.option_context('display.max_rows', None, 'display.max_columns', 300):
        #     print(reinjected_packets[
        #         _sender(["packetid", "reinjected_in", "reinjection_of"]) + _receiver(["reinjected_in", "reinjection_of"])
        #         ].head())

        for reinjection in reinjected_packets.itertuples():
            # here we look at all the reinjected packets

            # print("full reinjection %r" % (reinjection,))

            # if there are packets in _receiver(reinjected_in), it means the reinjections
            # arrived before other similar segments and thus these segments are useless
            # it should work because
            # useless_reinjections = getattr(reinjection, _receiver("reinjected_in"), [])

            # if it was correctly mapped
            # TODO why reinjection._merge doesn't exist ?
            if reinjection._1 != "both":
                # TODO count missed classifications ?
                log.debug("reinjection %d could not be mapped, giving up..." %
                          (reinjection.packetid))
                continue

            # print("%r" % reinjection.reinjection_of)
            initial_packetid = reinjection.reinjection_of[0]
            # print("initial_packetid = %r %s" % (initial_packetid, type(initial_packetid)))

            original_packet = df_all.loc[df_all.packetid ==
                                         initial_packetid].iloc[0]

            if original_packet._merge != "both":
                # TODO count missed classifications ?
                logging.debug(
                    "Original packet %d could not be mapped, giving up..." %
                    (original_packet.packetid))
                continue

            orig_arrival = getattr(original_packet, _receiver("reltime"))
            reinj_arrival = getattr(reinjection, _receiver("reltime"))
            reinj_pktid = getattr(reinjection, _sender("packetid"))

            reinj_delta = orig_arrival - reinj_arrival
            df_all.loc[reinj_pktid, "reinj_delta"] = reinj_delta

            if reinj_delta < 0:
                # print("GOT A MATCH")
                df_all.loc[df_all[_sender("packetid")] == reinjection.packetid,
                           "redundant"] = True
                #TODO set reinj_delta for reinjection.packetid

    return df_all
Beispiel #23
0
def load_merged_streams_into_pandas(
        pcap1: str,
        pcap2: str,
        streamid1: int,  # Union[MpTcpStreamId, TcpStreamId],
        streamid2: int,
        mptcp: bool,
        tshark_config: TsharkConfig,
        clock_offset1: int = 0,
        clock_offset2: int = 0,
        mapping_mode: PacketMappingMode = PacketMappingMode.HASH,
        **extra):
    """
    Arguments:
        protocol: mptcp or tcp

        mapping_mode: Only HASH works for now

    Returns
        a dataframe with columns... owd ?
    """
    log.debug(
        "Asked to load merged tcp streams %d and %d from pcaps %s and %s" %
        (streamid1, streamid2, pcap1, pcap2))

    cache = mp.get_cache()
    protocolStr = "mptcp" if mptcp else "tcp"

    cacheid = cache.cacheuid(
        "merged", [
            getrealpath(pcap1),
            getrealpath(pcap2),
        ], protocolStr + "_" + str(streamid1) + "_" + str(streamid2) + ".csv")

    # if we can't load that file from cache
    try:
        merged_df = pd.DataFrame()
        res = pd.DataFrame()

        valid, cachename = cache.get(cacheid)
        log.info("Cache validity=%s and cachename=%s" % (valid, cachename))

        # TODO disable when clock_offset is set
        if not valid:
            df1 = load_into_pandas(pcap1,
                                   tshark_config,
                                   clock_offset=clock_offset1)
            df2 = load_into_pandas(pcap2,
                                   tshark_config,
                                   clock_offset=clock_offset2)

            main_connection = None  # type: Union[MpTcpConnection, TcpConnection]
            other_connection = None  # type: Union[MpTcpConnection, TcpConnection]
            if mptcp:
                main_connection = MpTcpConnection.build_from_dataframe(
                    df1, streamid1)
                other_connection = MpTcpConnection.build_from_dataframe(
                    df2, streamid2)

                # TODO generate
                # map_mptcp_connection()

                # for now we use known streams exclusively
                # might be interested to use merge_tcp_dataframes later
                merged_df = merge_mptcp_dataframes_known_streams(
                    (df1, main_connection), (df2, other_connection))

            else:
                main_connection = TcpConnection.build_from_dataframe(
                    df1, streamid1)
                other_connection = TcpConnection.build_from_dataframe(
                    df2, streamid2)

                # for now we use known streams exclusively
                # might be interested to use merge_tcp_dataframes later
                merged_df = merge_tcp_dataframes_known_streams(
                    (df1, main_connection), (df2, other_connection))

            assert cachename
            logging.info("Saving into %s" % cachename)
            # trying to export lists correctly
            # print(merged_df.reinjected_in.dropna().head())
            # convert arrays back to strings
            # merged_df.apply(",".join()
            merged_df.to_csv(
                cachename,
                # columns=columns,
                index=False,
                header=True,
                sep=tshark_config.delimiter,
            )

            # la on a perdu tcpdest est devenu object
            print("saving with dtypes=", dict(merged_df.dtypes))
            # print("MERGED_DF", merged_df[TCP_DEBUG_FIELDS].head(20))

            # if log level >= DEBUG then save to xls too !
            # if True:
            #     filename = cachename + ".xls"
            #     logging.debug("Saved a debug excel copy at %s" % filename)
            #     merged_df.to_excel(filename)

        else:
            logging.info("Loading from cache %s" % cachename)

            # dtypes = {k: v for k, v in temp.items() if v is not None or k not in ["tcpflags"]}

            def _gen_dtypes(fields) -> Dict[str, Any]:
                dtypes = {}  # type: ignore
                for _name in [_first, _second]:

                    # TODO this could be simplified
                    for k, v in fields.items():
                        if v is not None or k not in ["tcpflags"]:
                            dtypes.setdefault(_name(k), v)

                    # add generated field dtypes
                    dtypes.update({
                        _name(f.fullname): f.type
                        for f in per_pcap_artificial_fields.values()
                    })

                # these are overrides from the generated dtypes
                dtypes.update({
                    # during the merge, we join even unmapped packets so some entries
                    # may be empty => float64
                    _first("packetid"):
                    np.float64,
                    _second("packetid"):
                    np.float64,
                })

                return dtypes

            def _gen_converters() -> Dict[str, Callable]:

                # converters = {}   # type: Dict[str, Any]
                fields = dict(tshark_config.fields)
                fields.update(per_pcap_artificial_fields)
                converters = {}
                # no need to convert tcpflags
                default_converters = {
                    name: f.converter
                    for name, f in fields.items()
                    if f.converter and name != "tcpflags"
                }
                # converters.update({ name: f.converter for name, f in per_pcap_artificial_fields.items() if f.converter})
                for name, converter in default_converters.items():
                    converters.update({
                        _first(name): converter,
                        _second(name): converter
                    })

                return converters

            with open(cachename) as fd:
                dtypes = _gen_dtypes({
                    name: field.type
                    for name, field in tshark_config.fields.items()
                })
                converters = _gen_converters()
                # more recent versions can do without it
                # pd.set_option('display.max_rows', 200)
                # pd.set_option('display.max_colwidth', -1)
                # print("converters=", converters)
                merged_df = pd.read_csv(
                    fd,
                    skip_blank_lines=True,
                    comment='#',
                    # we don't need 'header' when metadata is with comment
                    sep=tshark_config.delimiter,
                    # memory_map=True, # could speed up processing
                    dtype=dtypes,  # poping still generates
                    converters=converters,
                )

                # log.debug("Column names after loading from cache: %s", merged_df.columns)

                # TODO:
                # No columns to parse from file

        # we fix the clocks a posteriori so that the cache is still usable

        logging.debug("Postprocessing clock if needed")
        merged_df[_first('abstime')] += clock_offset1
        merged_df[_second('abstime')] += clock_offset2

        logging.debug("Converting dataframes to be sender/receiver based...")
        # in both cases
        # TODO here we should attribute the definite mptcprole
        # compute owd
        if mptcp:
            print("Should be merging OWDs")
            logging.error(
                "We should correct the clocks if the argument is passed !")
            # raise mp.MpTcpException("Implement mptcp merge")

            res = convert_to_sender_receiver(merged_df)
        else:
            # tcp
            # c la ou ou corrige les temps
            # on rename les colonnes host1 ou host2 par _sender ou bien _receiver ?!
            res = convert_to_sender_receiver(merged_df)

            # don't do it here else we might repeat it
            # data["abstime"] += clock_offset

        logging.debug("Computing owds")
        log.debug("Column names: %s", res.columns)
        log.debug("Dtypes after load:%s\n" % dict(res.dtypes))
        print("res=")
        # TODO we don't necessarely need to generate the OWDs here, might be put out
        res['owd'] = res[_receiver('abstime')] - res[_sender('abstime')]
        # .head(40))
        with pd.option_context('float_format', '{:f}'.format):
            print(res[_sender(["ipsrc", "ipdst", "abstime"]) +
                      _receiver(["abstime", "packetid"]) + TCP_DEBUG_FIELDS +
                      ["owd"]])

    except Exception:
        logging.exception("exception happened while merging")

    # pd.set_option('display.max_rows', 200)
    # pd.set_option('display.max_colwidth', -1)
    # print("dtypes=", dict(dtypes))
    # log.debug("Dtypes after load:%s\n" % pp.pformat(merged_df.dtypes))
    log.info("Finished loading. merged dataframe size: %d" % len(merged_df))

    return res
Beispiel #24
0
    def plot(self, pcap, pcapstream, window, **kwargs):
        """
        TODO for now only plots subflows
        plots the mptcp aggregate or mptcpack instead ?
        """
        fig = plt.figure()

        df = pcap
        destinations = kwargs.get("pcap_destinations")

        con = df.mptcp.connection(pcapstream)
        df = con.fill_dest(df)

        if len(destinations) == 1:
            suffix = " towards MPTCP %s" % (destinations[0].to_string())
            self.title_fmt = self.title_fmt + suffix

        # origin
        pd_abstime = pd.to_datetime(
            df[_sender("abstime")],
            unit="s",
            errors='raise',
        )
        df.set_index(pd_abstime, inplace=True)
        df.sort_index(inplace=True)

        # then plots MPTCP level throughput
        ##################################################
        label_fmt = "MPTCP"
        if len(destinations) >= 2:
            label_fmt = label_fmt + " towards {mptcpdest}"

        for mptcpdest, subdf in df.groupby(_sender("mptcpdest")):
            # tcpdest, tcpstream, mptcpdest = idx
            mptcpdest = mp.ConnectionRoles(mptcpdest)
            if mptcpdest not in destinations:
                log.debug("Ignoring destination %s", mptcpdest)
                continue

            log.debug("Plotting mptcp destination %s", mptcpdest)

            plot_tput(fig,
                      subdf["tcplen"],
                      subdf["abstime"],
                      window,
                      label=label_fmt.format(mptcpdest=mptcpdest.to_string()))

        # plot subflows first...
        ##################################################
        fields = ["tcpstream", "tcpdest", "mptcpdest"]

        label_fmt = "Subflow {tcpstream}"
        if len(destinations) >= 2:
            label_fmt = label_fmt + " towards MPTCP {mptcpdest}"

        for idx, subdf in df.groupby(fields, sort=False):
            tcpstream, tcpdest, mptcpdest = idx
            mptcpdest = mp.ConnectionRoles(mptcpdest)
            if mptcpdest not in destinations:
                log.debug("Ignoring MPTCP destination %s", tcpdest)
                continue

            log.debug("Plotting tcp destination %s", tcpdest)

            # basically the same as for tcp
            plot_tput(
                fig,
                subdf["tcplen"],
                subdf.index,  # subdf["abstime"],
                window,
                label=label_fmt.format(tcpstream=tcpstream,
                                       mptcpdest=mptcpdest.to_string()))

        # return {
        #     'fig': fig
        # }
        return fig
Beispiel #25
0
    def plot(self, pcap, pcapstream, window, **kwargs):
        """
        Should be very similar to the thoughput one, except with

        """
        fig = plt.figure()
        axes = fig.gca()
        fields = ["tcpdest", "tcpstream", "mptcpdest"]

        # TODO this should be configured in the parser
        # destinations = kwargs.get("destinations", list(mp.ConnectionRoles))
        destinations = kwargs.get("pcap_destinations")
        skipped = kwargs.get("skipped_subflows", [])
        df = pcap

        # df Classified
        df_classified = classify_reinjections(df)

        # then it's the same as for throughput
        log.debug("Dropping redundant packets")
        df_useful = df_classified[df_classified.redundant == False]
        df_useful = df_useful.copy()
        df_useful.dropna(
            axis="index",
            subset=[_sender("abstime")],
            inplace=True,
        )
        # print("after dropna")
        # print(df_useful)

        pd_abstime = pd.to_datetime(df_useful[_sender("abstime")],
                                    unit="s",
                                    errors="raise")

        df_useful.set_index(pd_abstime, inplace=True)
        df_useful.sort_index(inplace=True)

        suffix = " towards MPTCP {mptcpdest}"

        # plots MPTCP level goodput
        ##################################################
        label_fmt = "Aggregated" + (suffix if len(destinations) > 1 else "")

        for mptcpdest, subdf in df_useful.groupby("mptcpdest"):
            # tcpdest, tcpstream, mptcpdest = idx
            if mptcpdest not in destinations:
                log.debug("Ignoring destination %s", mptcpdest)
                continue

            log.debug("Plotting mptcp destination %s", mptcpdest)

            # add id
            plot_tput(
                fig,
                subdf["tcplen"],
                subdf["abstime"],
                window,
                label=label_fmt.format(
                    mptcpdest=mp.ConnectionRoles(mptcpdest).to_string()),
            )

        label_fmt = "Subflow {tcpstream}"
        if len(destinations) == 1:
            # TODO as we look at acks, it should be swapped !
            self.title_fmt = self.title_fmt + suffix
        else:
            # label_suffix = suffix
            label_fmt = label_fmt + suffix

        for idx, subdf in df_useful.groupby(_sender(fields),
                                            as_index=False,
                                            sort=False):

            # print("len= %r" % len(subdf))
            tcpdest, tcpstream, mptcpdest = idx
            print("tcpdest= %r, tcpstream %r mptcpdest %r" %
                  (tcpdest, tcpstream, mptcpdest))

            if mptcpdest not in destinations:
                log.debug("skipping MPTCP dest %s", tcpdest)
                continue

            if tcpstream in skipped:
                log.debug("skipping subflow %d", tcpstream)
                continue

            # log.debug("plotting MPTCP dest %s" % tcpdest)
            # if len(destinations) >= 2:
            #     label_fmt = label_fmt + suffix

            plot_tput(
                fig,
                # subdf["dack"],
                subdf["tcplen"],
                subdf.index,  # no need
                window,
                label=label_fmt.format(
                    tcpstream=tcpstream,
                    mptcpdest=mp.ConnectionRoles(mptcpdest).to_string()),
            )

        self.title_fmt = self.title_fmt.format(
            tcpstream=tcpstream,
            mptcpdest=mp.ConnectionRoles(mptcpdest).to_string())

        return fig