Ejemplos de debug_dataframe en Python, ejemplos de mptcpanalyzer.debug.debug_dataframe en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: owd.py Proyecto: 5l1v3r1/mptcpanalyzer

    def plot_tcp(self, df, fig, fields, **kwargs):
        axes = fig.gca()
        # fields = ["tcpdest", "tcpstream"]


        label_fmt = "Stream {tcpstream} towards {tcpdest}"
        for idx, subdf in df.groupby(_sender(fields), sort=False):

            # print("t= %r" % (idx,))
            print("len= %r" % len(subdf))
            tcpdest, tcpstream = idx

            # print("tcpdest= %r" % tcpdest)
            # print("=== less than 0\n", subdf[subdf.owd < 0.050])
            # print("=== less than 0\n", subdf.tail())

            # if tcpdest
            debug_dataframe(subdf, "subdf stream %d destination %r" % (tcpstream, tcpdest))

            pplot = subdf.plot.line(
                # gca = get current axes (Axes), create one if necessary
                ax=axes,
                legend=True,
                # TODO should depend from
                x=_sender("abstime"),
                y="owd",
                label=label_fmt.format(tcpstream=tcpstream, tcpdest=tcpdest),
            )

Ejemplo n.º 2

0

Mostrar archivo

    def plot(self, pcap, pcapstream, **kwargs):
        """
        getcallargs
        """
        df = pcap

        # Need to compute reinjections
        df.mptcp.fill_dest(pcapstream)
        df = classify_reinjections(df)

        fig = plt.figure()

        # log.info("%d streams in the MPTCP flow" % len(tcpstreams))
        log.info("Plotting reinjections ")

        axes = fig.gca()

        fields = ["tcpstream", "mptcpdest"]

        fig.suptitle(
            "Reinjections CDF ",
            verticalalignment="top",
        )

        # il n'a pas encore eu les destinations !!
        debug_dataframe(df, "DATASET HEAD")
        for idx, subdf in df.groupby(_sender(fields), sort=False):
            log.info("len(df)= %d" % len(df))

            # TODO check destination
            # TODO skip if no reinjection
            debug_dataframe(subdf, "DATASET HEAD")

            # for idx, (streamid, ds) in enumerate(tcpstreams):
            # subdf[_sender("reinj_delta")].plot.line(
            #     x="abstime",
            #     ax=axes,
            #     # use_index=False,
            #     legend=False,
            #     grid=True,
            # )
            subdf[_sender("reinj_delta")].hist(cumulative=True,
                                               density=1,
                                               bins=100)

        axes.set_xlabel("Time (s)")
        axes.set_ylabel("Reinjection delay")

        handles, labels = axes.get_legend_handles_labels()

        # Generate "subflow X" labels
        # location: 3 => bottom left, 4 => bottom right
        axes.legend(handles,
                    ["Subflow %d" % (x) for x, _ in enumerate(labels)],
                    loc=4)
        return fig

Ejemplo n.º 3

0

Mostrar archivo

Archivo: owd.py Proyecto: 5l1v3r1/mptcpanalyzer

    def plot(self, pcap, protocol, **kwargs):
        """
        Ideally it should be mapped automatically
        For now plots only one direction but there could be a wrapper to plot forward owd, then backward OWDs
        Disclaimer: Keep in mind this assumes a perfect synchronization between nodes, i.e.,
        it relies on the pcap absolute time field.
        While this is true in discrete time simulators such as ns3

        """
        fig = plt.figure()
        axes = fig.gca()
        res = pcap
        destinations = kwargs.get("pcap_destinations")
        # should already be done
        # res[_sender("abstime")] = pd.to_datetime(res[_sender("abstime")], unit="s")

        # TODO here we should rewrite
        debug_fields = _sender(TCP_DEBUG_FIELDS) + _receiver(TCP_DEBUG_FIELDS) + ["owd"]

        # print("columns", pcap)
        debug_dataframe(res, "owd dataframe")
        # print(res.loc[res.merge_status == "both", debug_fields])

        df = res

        # print("DESTINATION=%r" % destinations)
        # df= df[df.owd > 0.010]

        fields = ["tcpdest", "tcpstream", ]
        # if True:
        # TODO: use Protocol.MPTCP:
        if protocol == "mptcp":
            self.plot_mptcp(df, fig, fields, **kwargs)
        elif protocol == "tcp":
            self.plot_tcp(df, fig, fields, **kwargs)
        else:
            raise Exception("Unsupported protocol %r" % protocol)


        self.title_fmt = "One Way Delays for {protocol}"
        if len(destinations) == 1:
            self.title_fmt = self.title_fmt + " towards {dest}"

        self.title_fmt = self.title_fmt.format(
            protocol=protocol,
            # kwargs.get("pcap1stream"),
            # kwargs.get("pcap2stream"),
            dest=destinations[0].to_string()
        )

        return fig

Ejemplo n.º 4

0

Mostrar archivo

Archivo: parser.py Proyecto: 5l1v3r1/mptcpanalyzer

    def __call__(self, parser, namespace, values, option_string=None):
        # super(argparse.Action).__call__(parser, namespace, values, option_string)

        # make sure result
        df = self.get_dataframe(namespace)

        log.debug("Filtering stream %s", (values))

        field = "tcpstream"
        protocol = mp.Protocol.TCP
        if isinstance(values, MpTcpStreamId):
            field = "mptcpstream"
            protocol = mp.Protocol.MPTCP
            log.debug("Mptcp instance")
        elif isinstance(values, TcpStreamId):
            pass
        else:
            parser.error(
                "Unsupported 'type' %s. Set it to TcpStreamId or MpTcpStreamId" % type(values)
            )

        log.debug("Assign filter to %s", self.dest)
        setattr(namespace, self.dest, values)
        query = self.query_tpl.format(field=field, streamid=values)

        log.log(mp.TRACE, "Applying query [%s]", query)
        debug_dataframe(df, "after query")  # usecolds ['tcpstream']

        import pandas as pd
        log.log(mp.TRACE, "use numexpr? %d", pd.get_option('compute.use_numexpr'))
        # hack to prevent NA errors with pandas
        df.dropna(subset=[field], inplace=True)

        # TODO avoid query as https://github.com/pandas-dev/pandas/issues/25369
        # is not
        df.query(query, inplace=True, engine="python")

Ejemplo n.º 5

0

Mostrar archivo

Archivo: data.py Proyecto: 5l1v3r1/mptcpanalyzer

def map_tcp_packets_via_hash(
        # TODO rename, these are not host1/host2 anymore
        host1_df,
        host2_df,
        *kargs,
        **kwargs):
    """
    Merge on hash of different fields
    Resulting dataframe has H1_SUFFIX / H2_SUFFIX
    """
    log.info("Merging dataframes via hash")
    debug_cols = ["packetid", "hash", "abstime"]
    # debug_dataframe(total, "concatenated df",
    #     usecols=_first(["abstime", "tcpdest"]) + _second(["abstime", "tcpdest"]))

    debug_dataframe(
        host1_df,
        "host1_df",
    )
    debug_dataframe(host2_df, "host2 df")

    # todo we could now use merge_asof
    # TODO here we should be able to drop some columns in double
    try:
        # first check hashes are identical
        # check hashes are different

        host1_df = deal_with_duplicated_hash(host1_df)
        host2_df = deal_with_duplicated_hash(host2_df)

        res = pd.merge(
            host1_df,
            host2_df,
            on="hash",
            suffixes=(HOST1_SUFFIX, HOST2_SUFFIX),  # columns suffixes
            how="outer",  # we want to keep packets from both
            # we want to know how many packets were not mapped correctly, adds the merge column
            # can take values "left_only"/ "right_only" or both
            indicator="merge_status",
            # run additionnal checks against duplicate hashes
            validate="one_to_one",  # can slow process
        )

    except pd.errors.MergeError as e:

        # TODO we don't want to print here
        print("An error happened during the merge of the 2 pcaps")
        print(e)
        raise e

    # TCP_DEBUG_FIELDS
    TCP_DEBUG_FIELDS = ['packetid', "abstime"]
    debug_cols = _first(TCP_DEBUG_FIELDS) + _second(TCP_DEBUG_FIELDS)
    debug_dataframe(res, "Result of merging by hash", usecols=debug_cols)
    return res

Ejemplo n.º 6

0

Mostrar archivo

    def plot(self, pcap, pcapstream, **kwargs):
        """
        getcallargs
        """

        fig = plt.figure()

        df = pcap
        window = kwargs.get("window")
        destinations = kwargs.get("pcap_destinations")

        print("Destinations", destinations)

        con = df.tcp.connection(pcapstream)
        df = con.fill_dest(df)

        debug_dataframe(df, "plotting TCP throughput")

        # la il faudrait resampler
        pd_abstime = pd.to_datetime(
            df[_sender("abstime")],
            unit="s",
            errors='raise',
        )
        df.set_index(pd_abstime, inplace=True)
        df.sort_index(inplace=True)

        # TODO at some point here, we lose the dest type :'(
        for dest, subdf in df.groupby("tcpdest"):
            if dest not in destinations:
                log.debug("Ignoring destination %s", dest)
                continue

            log.debug("Plotting destination %s", dest)

            label_fmt = "TCP stream {stream}"
            if len(destinations) >= 2:
                label_fmt = label_fmt + " towards {dest}"

            plot_tput(
                fig,
                subdf["tcplen"],
                # subdf["tcpack"],
                # subdf["abstime"],
                subdf.index,
                window,
                label=label_fmt.format(
                    stream=pcapstream,
                    dest=mp.ConnectionRoles(dest).to_string()))

        self.y_label = "Throughput (bytes/second)"

        # TODO fix connection towards a direction ?
        self.title_fmt = "TCP Throughput (Averaging window of {window}) for:\n{con:c<->s}".format(
            window=window, con=con)
        # self.title = "TCP Throughput (Average window of %s)" % window

        # handles, labels = axes.get_legend_handles_labels()

        # # Generate "subflow X" labels
        # # location: 3 => bottom left, 4 => bottom right
        # axes.legend(
        #     handles,
        #     ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)],
        #     loc=4
        # )

        return fig

Ejemplo n.º 7

0

Mostrar archivo

    def plot(self, pcap, pcapstream, fields, pcap_destinations, **kwargs):
        """
        getcallargs
        """
        log.debug("Plotting field(s) %s", fields)

        fig = plt.figure()
        axes = fig.gca()

        tcpdf = pcap

        # should be done when filtering the stream
        tcpdf.tcp.fill_dest(pcapstream)

        labels = []  # type: List[str]

        print(pcap)
        print(tcpdf)

        for dest, ddf in tcpdf.groupby(_sender("tcpdest")):
            if dest not in pcap_destinations:
                log.debug("Ignoring destination %s", dest)

            log.debug("Plotting destination %s", dest)

            for field in fields:
                # print("dest", dest, " in " , destinations)

                final = ddf.drop_duplicates(subset=field)
                print("dataframe to plot")
                print(final)

                # log.debug("Plotting field %s" % field)
                # print("len len(ddf[field])=%d" % len(ddf[field]))
                if len(final) <= 0:
                    log.info("No datapoint to plot")
                    continue

                # drop duplicate ?
                # the astype is a workaround pandas failure

                debug_dataframe(final, "tcp_attr")
                final.plot(
                    x="abstime",
                    y=field,
                    ax=axes,
                    use_index=False,
                    legend=False,
                    grid=True,
                )
                label_fmt = "{field} towards {dest}"
                labels.append(
                    label_fmt.format(field=self._attributes[field],
                                     dest=str(dest)))

        self.x_label = "Time (s)"
        if len(fields) == 1:
            y_label = self._attributes[fields[0]]
        else:
            y_label = "/".join(fields)
        self.y_label = y_label

        handles, _labels = axes.get_legend_handles_labels()

        # TODO generate correct labels ?

        # print(tcpdf[field].iloc[3:])
        # Generate "subflow X" labels
        # location: 3 => bottom left, 4 => bottom right
        axes.legend(
            handles, labels
            #     ["%s for Subflow %d" % (field, x) for x, _ in enumerate(labels)],
            #     loc=4
        )

        # TODO fix dest
        self.title_fmt = " %s " % y_label

        return fig

Ejemplo n.º 8

0

Mostrar archivo

def mptcp_compute_throughput(rawdf, mptcpstreamid: MpTcpStreamId,
                             destination: ConnectionRoles,
                             merged_df: bool) -> MpTcpUnidirectionalStats:
    """
    Very raw computation: substract highest dsn from lowest by the elapsed time
    Args:
        merged_df: True if merged_df

    Returns:
        a tuple (True/false, dict)
    """
    assert isinstance(destination,
                      ConnectionRoles), "destination is %r" % destination

    con = rawdf.mptcp.connection(mptcpstreamid)
    q = con.generate_direction_query(destination)
    df = unidirectional_df = rawdf.query(q, engine="python")

    # -1 because of syn
    dsn_range, dsn_max, dsn_min = transmitted_seq_range(df, "dsn")

    msg = "dsn_range ({}) = {} (dsn_max) - {} (dsn_min) - 1"
    log.debug(msg.format(dsn_range, dsn_max, dsn_min))

    _col = _sender if merged_df else lambda x: x

    # print("test _sender %s" % _col("toto"))
    # Could groupby destination as well
    groups = df.groupby(_col('tcpstream'))

    subflow_stats: List[TcpUnidirectionalStats] = []
    for tcpstream, subdf in groups:
        # subdf.iloc[0, subdf.columns.get_loc(_second('abstime'))]
        # debug_dataframe(subdf, "subdf for stream %d" % tcpstream)
        dest = subdf.iloc[0, subdf.columns.get_loc(_col('tcpdest'))]
        sf_stats = tcp_get_stats(
            subdf,
            tcpstream,
            # work around pandas issue (since for now it's a float
            ConnectionRoles(dest),
            True)

        fields = ["tcpdest", "mptcpdest", "dss_dsn", "dss_length"]
        # debug_dataframe(subdf, "Debugging", usecols=[fields])

        # DSNs can be discontinuous, so we have to look at each packet
        # we drop duplicates
        transmitted_dsn_df = subdf.drop_duplicates(subset="dsn")

        sf_stats.mptcp_application_bytes = transmitted_dsn_df["tcplen"].sum()

        # + 1 to deal with syn oddity
        assert sf_stats.mptcp_application_bytes <= sf_stats.tcp_byte_range + 1, sf_stats

        log.log(mp.TRACE, "Adding subflow stats %r", sf_stats)
        subflow_stats.append(sf_stats)

    times = df["abstime"]
    duration = times.iloc[-1] - times.iloc[0]

    total_tput = sum(map(lambda x: x.throughput_bytes, subflow_stats))

    for sf in subflow_stats:
        # can be > 1 in case of redundant packets
        if total_tput > 0:
            sf.throughput_contribution = sf.throughput_bytes.bytes / total_tput
        else:
            sf.throughput_contribution = 0
            log.warn("Total Throughput <= 0. Something fishy possibly ?")
    """
    If it's a merged df, then we can classify reinjections and give more results
    on the goodput
    """
    if merged_df:
        df = classify_reinjections(unidirectional_df)

        debug_dataframe(df, "after reinjections have been analyzed")

        # mptcp_application_bytes = df.loc[df.redundant == False, "tcplen"].sum()
        for sf in subflow_stats:
            log.debug("for tcpstream %d" % sf.tcpstreamid)
            # columns.get_loc(_first('abstime'))]
            df_sf = df[df.tcpstream == sf.tcpstreamid]

            non_redundant_pkts = df_sf.loc[df_sf.redundant == False, "tcplen"]
            # print("non_redundant_pkts")
            # print(non_redundant_pkts)
            sf.mptcp_application_bytes = non_redundant_pkts.sum()
            # print("sf.mptcp_application_bytes" , sf.mptcp_application_bytes)

            sf.goodput_contribution = sf.mptcp_application_bytes / dsn_range

    return MpTcpUnidirectionalStats(
        mptcpstreamid=mptcpstreamid,
        mptcp_application_bytes=Byte(dsn_range),
        mptcp_duration=duration,
        subflow_stats=subflow_stats,
    )

Ejemplo n.º 9

0

Mostrar archivo

Archivo: data.py Proyecto: 5l1v3r1/mptcpanalyzer

def merge_tcp_dataframes_known_streams(
        con1: Tuple[pd.DataFrame, TcpConnection],
        con2: Tuple[pd.DataFrame, TcpConnection]) -> pd.DataFrame:
    """
    Generates an intermediate file with the owds.

    1/ clean up dataframe to keep
    2/ identify which dataframe is server's/client's

    Args:
        con1: Tuple dataframe/tcpstream id
        con2: same

    Returns:
        A dataframe with a "merge_status" column and valid tcp/mptcp destinations
        To ease debug we want to see packets in chronological order
    """
    h1_df, main_connection = con1
    h2_df, mapped_connection = con2

    log.info(
        "Trying to merge connection {} to {} of respective sizes {} and {}".
        format(mapped_connection, main_connection, len(h1_df), len(h2_df)))

    # cleanup the dataframes to contain only the current stream packets
    h1_df = h1_df[h1_df.tcpstream == main_connection.tcpstreamid]
    h2_df = h2_df[h2_df.tcpstream == mapped_connection.tcpstreamid]

    # TODO reorder columns to have packet ids first !
    total = pd.DataFrame()

    for tcpdest in ConnectionRoles:

        log.debug("Merging tcp destination %s" % tcpdest)
        q = main_connection.generate_direction_query(tcpdest)
        h1_unidirectional_df = h1_df.query(q, engine="python")
        q = mapped_connection.generate_direction_query(tcpdest)
        h2_unidirectional_df = h2_df.query(q, engine="python")

        res = map_tcp_packets(h1_unidirectional_df, h2_unidirectional_df)

        # pandas trick to avoid losing dtype
        # see https://github.com/pandas-dev/pandas/issues/22361#issuecomment-413147667
        # no need to set _second (as they are just opposite)
        # TODO this should be done somewhere else
        # else summary won't work
        res[_first('tcpdest')][:] = tcpdest
        res[_second('tcpdest')][:] = tcpdest

        # generate_mptcp_direction_query
        # TODO this is not always reached ?
        log.info("con of TYPE %r", main_connection)
        if isinstance(main_connection, MpTcpSubflow):

            log.debug("This is a subflow, setting mptcp destinations...")
            mptcpdest = main_connection.mptcp_dest_from_tcpdest(tcpdest)
            log.debug("Setting mptcpdest to {mptcpdest}")
            res[_first('mptcpdest')][:] = mptcpdest
            res[_second('mptcpdest')][:] = mptcpdest

            log.debug("Setting mptcpdest to %s" % mptcpdest)

        total = pd.concat([res, total])
        debugcols = _first(["abstime", "tcpdest", "mptcpdest"]) + \
        _second(["abstime", "tcpdest", "mptcpdest"])
        debug_dataframe(total, "concatenated df", usecols=debugcols)

    log.info(
        "Resulting merged tcp dataframe of size {} ({} mapped packets vs {} unmapped)"
        "with input dataframes of size {} and {}.".format(
            len(total), len(total[total.merge_status == "both"]),
            len(total[total.merge_status != "both"]), len(h1_df), len(h2_df)))

    # print("unmapped packets:")
    # print(total.loc[total._merge != "both", _sender(TCP_DEBUG_FIELDS) + _receiver(TCP_DEBUG_FIELDS) ])
    return total

Ejemplo n.º 10

0

Mostrar archivo

Archivo: data.py Proyecto: 5l1v3r1/mptcpanalyzer

def load_merged_streams_into_pandas(
        pcap1: str,
        pcap2: str,
        streamid1: int,
        streamid2: int,
        # TODO changed to protocol
        mptcp: bool,
        tshark_config: TsharkConfig,
        clock_offset1: int = 0,
        clock_offset2: int = 0,
        mapping_mode: PacketMappingMode = PacketMappingMode.HASH,
        **extra):
    """
    Arguments:
        protocol: mptcp or tcp
        mapping_mode: Only HASH works for now
        clock_offset: untested

    Returns
        a dataframe with columns... owd ?
    """
    protocolStr = "mptcp" if mptcp else "tcp"
    log.debug(f"Asked to load {protocolStr} merged streams {streamid1} and "
              "{streamid2} from pcaps {pcap1} and {pcap2}")

    cache = mp.get_cache()

    cacheid = cache.cacheuid(
        "merged", [getrealpath(pcap1), getrealpath(pcap2)],
        protocolStr + "_" + str(streamid1) + "_" + str(streamid2) + ".csv")

    # if we can't load that file from cache
    try:
        merged_df = pd.DataFrame()
        res = pd.DataFrame()

        valid, cachename = cache.get(cacheid)
        log.info("Cache validity=%s and cachename=%s" % (valid, cachename))

        # TODO disable when clock_offset is set
        if not valid:
            df1 = load_into_pandas(pcap1,
                                   tshark_config,
                                   clock_offset=clock_offset1)
            df2 = load_into_pandas(pcap2,
                                   tshark_config,
                                   clock_offset=clock_offset2)

            main_connection = None  # type: Union[MpTcpConnection, TcpConnection]
            other_connection = None  # type: Union[MpTcpConnection, TcpConnection]
            if mptcp:
                main_connection = MpTcpConnection.build_from_dataframe(
                    df1, MpTcpStreamId(streamid1))
                other_connection = MpTcpConnection.build_from_dataframe(
                    df2, MpTcpStreamId(streamid2))

                # for now we use known streams exclusively
                # might be interested to use merge_tcp_dataframes later
                merged_df = merge_mptcp_dataframes_known_streams(
                    (df1, main_connection), (df2, other_connection))

            else:
                main_connection = TcpConnection.build_from_dataframe(
                    df1, TcpStreamId(streamid1))
                other_connection = TcpConnection.build_from_dataframe(
                    df2, TcpStreamId(streamid2))

                # for now we use known streams exclusively
                # might be interested to use merge_tcp_dataframes later
                merged_df = merge_tcp_dataframes_known_streams(
                    (df1, main_connection), (df2, other_connection))

            assert cachename
            log.info("Saving into %s" % cachename)
            # trying to export lists correctly
            # print(merged_df.reinjected_in.dropna().head())
            # convert arrays back to strings
            # merged_df.apply(",".join()
            # or abstime ?

            # TODO rechange the flags hex()
            merged_df.to_csv(
                cachename,
                # columns=columns,
                index=False,
                header=True,
                sep=tshark_config.delimiter,
            )

            # tcpdest had become an objected instead of a CategoricalDtype
            # see https://github.com/pandas-dev/pandas/issues/22361
            log.log(mp.TRACE, "saving with dtypes=", dict(merged_df.dtypes))

        else:
            log.info("Loading from cache %s", cachename)

            date_cols = get_date_cols(tshark_config.fields)

            with open(cachename) as fd:
                # generate fieldlist
                def _gen_fields(fields):
                    gfields = {}  # type: ignore
                    for _name in [_first, _second]:
                        gfields.update(
                            {_name(k): v
                             for k, v in fields.items()})
                    return gfields

                # reltime discarded on save ?
                tshark_config.fields.pop("reltime")
                gfields = _gen_fields(tshark_config.fields)
                merge_dtypes = get_dtypes(gfields)
                # log.log(mp.TRACE, "Using gfields %s" % pp.pformat(gfields))

                # we don't need any converters
                converters = {}
                date_cols = get_date_cols(gfields)

                log.log(mp.TRACE, "Using date_cols %s" % pp.pformat(date_cols))
                log.log(mp.TRACE, "Using dtypes %s" % pp.pformat(merge_dtypes))
                # log.log(mp.TRACE, "Using converters %s" % (pp.pformat(converters)))
                merged_df = pd.read_csv(
                    fd,
                    skip_blank_lines=True,
                    comment='#',
                    # we don't need 'header' when metadata is with comment
                    sep=tshark_config.delimiter,
                    # memory_map=True, # could speed up processing
                    dtype=merge_dtypes,  # poping still generates
                    converters=converters,
                    # date_parser=date_converter,
                    parse_dates=date_cols,
                )
                # at this stage, destinatiosn are nan

                debug_fields = ["abstime", "tcpstream", "tcpdest", "mptcpdest"]
                mptcpanalyzer.debug.debug_dataframe(
                    merged_df,
                    "Merged dataframe",
                    usecols=(_first(debug_fields) + _second(debug_fields)))

                # workaround bug https://github.com/pandas-dev/pandas/issues/25448
                def _convert_to_enums():
                    # per_pcap_artificial_fields
                    for col in [
                            _first("tcpdest"),
                            _first("mptcpdest"),
                            _second("tcpdest"),
                            _second("mptcpdest")
                    ]:
                        merged_df[col] = merged_df[col].apply(
                            _convert_role, convert_dtype=False)

        # we fix the clocks a posteriori so that the cache is still usable
        log.debug("Postprocessing clock if needed")
        # merged_df[_first('abstime')] += clock_offset1
        # merged_df[_second('abstime')] += clock_offset2

        log.debug("Converting dataframes to be sender/receiver based...")

        # in both cases
        # TODO here we should attribute the definite mptcprole
        if mptcp:
            log.error(
                "We should correct the clocks if the argument is passed !")
            # raise mp.MpTcpException("Implement mptcp merge")

            res = convert_to_sender_receiver(merged_df)
            # fill MPTCP dest ?
        else:
            # tcp
            res = convert_to_sender_receiver(merged_df)

        # log.debug("Sorting by sender abstime")
        # merged_df.sort_values(by=_sender("abstime"), ascending=True, inplace=True)
        # debug_dataframe(res, "checking merge", usecols=["merge_status"])
        # print("%d nan values" % len(res[res.merge_status == np.nan]))

        log.debug("Computing owds")

        debug_dataframe(res, "before owds")
        # TODO we don't necessarely need to generate the OWDs here, might be put out
        res['owd'] = res[_receiver('abstime')] - res[_sender('abstime')]

        debug_dataframe(
            res,
            "owd",
            usecols=["owd", _sender('abstime'),
                     _receiver('abstime')])
        # with pd.option_context('float_format', '{:f}'.format):
        #     print(
        #         res[_sender(["ipsrc", "ipdst", "abstime"])
        #          + _receiver(["abstime", "packetid"]) + TCP_DEBUG_FIELDS + ["owd"] ]
        #     )

    except Exception as e:
        log.exception("exception happened while merging")

    # pd.set_option('display.max_rows', 200)
    # pd.set_option('display.max_colwidth', -1)
    # print("dtypes=", dict(dtypes))
    log.log(mp.TRACE, "Dtypes after load:%s\n", pp.pformat(res.dtypes))
    log.info("Finished loading. merged dataframe size: %d", len(res))

    return res

Ejemplo n.º 11

0

Mostrar archivo

    def plot(self, pcap, pcapstream, pcap_destinations, dack=False, relative=None, **args):
        """
        Might be

        """
        dack_str = "dss_rawack"
        dsn_str = "dss_dsn"
        # dsn_str = "dsn"


        debug_dataframe(pcap, "dss")

        rawdf = pcap.set_index("reltime")

        print("pcapstream", pcapstream)
        con = rawdf.mptcp.connection(pcapstream)
        df = con.fill_dest(rawdf)

        # only select entries with a dss_dsn
        # df_forward = self.preprocess(rawdf, destination=destination, extra_query="dss_dsn > 0", **args)

        # kinda buggy
        destination = pcap_destinations[0]
        print("destination:", destination)

        # tcpdest or mptcpdest
        df_forward = df[df.mptcpdest == destination]
        df_forward = df_forward[df_forward[dsn_str] > 0]
        debug_dataframe(df_forward, "Forward dest", usecols=["dsn", "mptcpdest", "dss_dsn", "dss_length"])

        # compute limits of the plot
        # ymin, ymax = float('inf'), 0
        # ymin, ymax = min(ymin, df_forward[dsn_str].min()), max(ymax, df_forward[dsn_str].max())
        ymin, ymax = df_forward[dsn_str].min(), df_forward[dsn_str].max()
        print("setting ymin/ymax", ymin, ymax)

        fig = plt.figure()
        axes = fig.gca()

        # plt.vlines([0, 1], 0, 3)

        def show_dss(idx, row, style):
            """
            dss_dsn
            """
            # returns a FancyArrow
            # print("dss_length", row["dss_length"],)
            print("Arrow settings: origin=%s/%d with length %d" % (idx, int(row[dsn_str]), row["dss_length"]))
            res = axes.arrow(
                idx,
                int(row[dsn_str]),  # x, y
                0,
                row["dss_length"],    # dx, dy
                # 20,
                # head_width=0.05,
                head_width=0.08,
                # head_length=0.00002
                head_length=0.1,
                # length_includes_head=True,
                **style
            )
            res.set_label("hello")
            return res

        handles, labels = axes.get_legend_handles_labels()


        # TODO cycle manually through
        cycler = mpl.rcParams['axes.prop_cycle']
        styles = cycle(cycler)
        legends = []
        legend_artists = []

        df_forward.set_index("abstime", inplace=True)

        ### Plot dss dsn (forward)
        ######################################################
        for tcpstream, df in df_forward.groupby('tcpstream'):
            style = next(styles)
            print("arrows for tcpstream %d" % tcpstream)

            # style = next(styles)

            artist_recorded = False
            # TODO itertuples should be faster
            for index, row in df_forward.iterrows():
                artist = show_dss(
                    index,
                    # row["packetid"],
                    row,
                    style
                )
                print("artists %r" % artist)
                if not artist_recorded:
                    legend_artists.append(artist)
                    artist_recorded = True

            if artist_recorded:
                legends.append("dss for Subflow %d" % tcpstream)


        ### if enabled, plot dack (backward)
        ######################################################
        # TODO fix
        if dack:
            df_backward = self.preprocess(rawdf, **args, destination=mp.reverse_destination(destination),
                    extra_query=dack_str + " >=0 ")

            for tcpstream, df in df_backward.groupby('tcpstream'):
                # marker = next(markers)
                if df.empty:
                    log.debug("No dack for tcpstream %d", tcpstream)
                else:
                    ax1 = df[dack_str].plot.line(ax=axes, legend=False)
                    lines, labels = ax1.get_legend_handles_labels()
                    legend_artists.append(lines[-1])
                    legends.append("dack for sf %d" % tcpstream)

        # location: 3 => bottom left, 4 => bottom right
        axes.legend(legend_artists, legends, loc=4)

        # xmin, xmax = 0, 5000
        # axes.set_xlim([xmin, xmax])
        # axes.set_ylim([ymin, ymax])
        axes.relim()
        axes.autoscale_view()
        # axes.autoscale(enable=True, axis="both")

        return fig