Beispiel #1
0
def last_k_instalment_features_with_fractions(gr, periods, fraction_periods):
    gr_ = gr.copy()

    features = {}
    features_temp = {}

    for period in periods:
        gr_period = gr_[gr_['days'] <= period]

        features_temp = utils.add_features_in_group(
            features_temp, gr_period, 'installments',
            ['mean', 'var', 'skew', 'kurt', 'iqr'], 'last_{}_'.format(period))

        features_temp = utils.add_features_in_group(
            features_temp, gr_period, 'purchase_amount',
            ['sum', 'max', 'mean', 'var', 'skew', 'kurt', 'iqr'],
            'last_{}_'.format(period))

    for short_period, long_period in fraction_periods:
        short_feature_names = utils._get_feature_names(features_temp,
                                                       short_period)
        long_feature_names = utils._get_feature_names(features_temp,
                                                      long_period)

        for short_feature, long_feature in zip(short_feature_names,
                                               long_feature_names):
            old_name_chunk = '_{}_'.format(short_period)
            new_name_chunk = '_{}by{}_fraction_'.format(
                short_period, long_period)
            fraction_feature_name = short_feature.replace(
                old_name_chunk, new_name_chunk)
            features[fraction_feature_name] = utils.safe_div(
                features_temp[short_feature], features_temp[long_feature])
    return pd.Series(features)
Beispiel #2
0
def minmax(x, y=None):
    # batch-wise pre-processing
    axis = tuple(range(1, len(x.shape)))

    # MIN-MAX
    x_max = tf.math.reduce_max(x, axis=axis, keepdims=True)
    x_min = tf.math.reduce_min(x, axis=axis, keepdims=True)
    x = safe_div(x-x_min, x_max-x_min)
    if y is not None:
        return x, y
    return x
Beispiel #3
0
def qcd_ratio( qcd_only, method="" ) :
    ratio = None
    if method == "abcd" :
        q = qcd_only
        r = utils.safe_div(q[:-1,:],q[1:,:])
        r = np.delete(r,r.shape[1]-1,1) # delete last row 
        r = np.insert(r,0,ufloat(np.nan,np.nan),1) # insert first row
        r = np.insert(r,0,ufloat(np.nan,np.nan),0) # insert first column
        ratio = r
    elif method == "double" :
        q = qcd_only
        r = utils.safe_div(q[:-1,:],q[1:,:])
        rr = utils.safe_div(r[:,1:-1]*r[:,1:-1],r[:,:-2])
        rr = np.insert(rr,(0,0),ufloat(np.nan,np.nan),1) # insert first two rows
        rr = np.insert(rr,0,ufloat(np.nan,np.nan),0) # insert first column
        ratio = rr
    else : # "raw"
        q = qcd_only
        r = utils.safe_div(q[:-1,:],q[1:,:])
        r = np.insert(r,0,ufloat(np.nan,np.nan),0) # insert first column
        ratio = r
    return ratio
Beispiel #4
0
def qcd_ratio(qcd_only, method=""):
    ratio = None
    if method == "abcd":
        q = qcd_only
        r = utils.safe_div(q[:-1, :], q[1:, :])
        r = np.delete(r, r.shape[1] - 1, 1)  # delete last row
        r = np.insert(r, 0, ufloat(np.nan, np.nan), 1)  # insert first row
        r = np.insert(r, 0, ufloat(np.nan, np.nan), 0)  # insert first column
        ratio = r
    elif method == "double":
        q = qcd_only
        r = utils.safe_div(q[:-1, :], q[1:, :])
        rr = utils.safe_div(r[:, 1:-1] * r[:, 1:-1], r[:, :-2])
        rr = np.insert(rr, (0, 0), ufloat(np.nan, np.nan),
                       1)  # insert first two rows
        rr = np.insert(rr, 0, ufloat(np.nan, np.nan), 0)  # insert first column
        ratio = rr
    else:  # "raw"
        q = qcd_only
        r = utils.safe_div(q[:-1, :], q[1:, :])
        r = np.insert(r, 0, ufloat(np.nan, np.nan), 0)  # insert first column
        ratio = r
    return ratio
Beispiel #5
0
def top_conversations_by_chars(self_name: str,
                               conversations: List[NamedConversation],
                               exhaustive_lists: bool):
    """
    Generates list of top conversations ordered by characters exchanged from specified
    list of conversations.
    
    :param self_name: name of the person which should be considered as "myself"
    :param conversations: list of conversations
    :param exhaustive_lists: whether the list should include all conversations
    :return: 
    """
    conversation_counts = {}
    total_messages = 0  # Used for computing threshold when not using exhaustive lists.

    for name, participants, messages in conversations:
        total_messages += len(messages)

        # Create counters for each conversation not present in list of conversations.
        if not str(participants) in conversation_counts:
            conversation_counts[name] = [0, 0]  # others, me

        for sender, text, _ in messages:
            if sender == self_name:
                conversation_counts[name][1] += 0 if text is None else len(
                    text)
            else:
                conversation_counts[name][0] += 0 if text is None else len(
                    text)

    top_conversations = reversed(
        sorted((value[0] + value[1], key, value)
               for (key, value) in conversation_counts.items()))

    # Threshold is used to prevent outputting lot of conversations
    # with very little messages. Currently it is calculated as average
    # message count in conversation.
    threshold = safe_div(total_messages, len(conversation_counts))

    print('Conversations by characters exchanged:')
    for characters, conversation_name, counts in top_conversations:
        if characters != 0:
            if exhaustive_lists or characters > threshold:
                print(
                    f'{conversation_name}\t{characters} ({counts[1]} sent, {counts[0]} received)'
                )

    if not exhaustive_lists:
        print('And more...')
Beispiel #6
0
    def result(self):
        # Location-senstive detection performance
        ER = safe_div(self.S + self.D + self.I, self.Nref)

        prec = safe_div(self.TP, self.Nsys)
        recall = safe_div(self.TP, self.Nref)
        F = safe_div(2 * prec * recall, prec + recall)

        # Class-sensitive localization performance
        if self.DE_TP > 0:
            DE = safe_div(self.total_DE, self.DE_TP)
        else:
            # When the total number of prediction is zero
            DE = tf.ones([], tf.float32) * 180

        DE_prec = safe_div(self.DE_TP, self.Nsys)
        DE_recall = safe_div(self.DE_TP, self.Nref)
        DE_F = safe_div(2 * DE_prec * DE_recall, DE_prec + DE_recall)

        return ER, F, DE, DE_F
Beispiel #7
0
def evaluate_net(model, test_data, ctx):
    triplet_loss = gluon.loss.TripletLoss(margin=0)
    sum_correct = 0
    sum_all = 0
    rate = 0.0
    for i, (data, _) in enumerate(test_data):
        data = data.as_in_context(ctx)

        anc_ins, pos_ins, neg_ins = data[:, 0], data[:, 1], data[:, 2]
        inter1 = model(anc_ins)  # 训练的时候组合
        inter2 = model(pos_ins)
        inter3 = model(neg_ins)
        loss = triplet_loss(inter1, inter2, inter3)

        loss = loss.asnumpy()
        n_all = loss.shape[0]
        n_correct = np.sum(np.where(loss == 0, 1, 0))

        sum_correct += n_correct
        sum_all += n_all
        rate = safe_div(sum_correct, sum_all)

    print('准确率: %.4f (%s / %s)' % (rate, sum_correct, sum_all))
    return rate
Beispiel #8
0
def parse_pcap(sim_dir, out_dir):
    """ Parse a PCAP file. """
    print(f"Parsing: {sim_dir}")
    sim = utils.Sim(sim_dir)
    assert sim.unfair_flws > 0, f"No unfair flows to analyze: {sim_dir}"

    # Construct the output filepaths.
    out_flp = path.join(out_dir, f"{sim.name}.npz")
    # If the output file exists, then we do not need to parse this file.
    if path.exists(out_flp):
        print(f"    Already parsed: {sim_dir}")
        return

    # Process PCAP files from unfair senders and receivers.
    #
    # The final output, with one entry per unfair flow.
    unfair_flws = []
    for unfair_idx in range(sim.unfair_flws):
        # Since this will not be used in practice, we can calculate
        # the min one-way delay using the simulation's parameters.
        one_way_us = sim.btl_delay_us + 2 * sim.edge_delays[unfair_idx]

        # Packet lists are of tuples of the form:
        #     (seq, sender, timestamp us, timestamp option)
        sent_pkts = utils.parse_packets(path.join(
            sim_dir, f"{sim.name}-{unfair_idx + 2}-0.pcap"),
                                        sim.payload_B,
                                        direction="data")
        recv_pcap_flp = path.join(
            sim_dir,
            (f"{sim.name}-{unfair_idx + 2 + sim.unfair_flws + sim.fair_flws}-0"
             ".pcap"))
        recv_pkts = utils.parse_packets(recv_pcap_flp,
                                        sim.payload_B,
                                        direction="data")
        # Ack packets for RTT calculation
        ack_pkts = utils.parse_packets(recv_pcap_flp,
                                       sim.payload_B,
                                       direction="ack")

        # State that the windowed metrics need to track across packets.
        win_state = {
            win: {
                # The index at which this window starts.
                "window_start_idx": 0,
                # The "loss event rate".
                "loss_interval_weights": make_interval_weight(8),
                "loss_event_intervals": collections.deque(),
                "current_loss_event_start_idx": 0,
                "current_loss_event_start_time": 0,
                # For "loss rate true".
                "loss_queue_true": collections.deque(),
                # For "loss rate estimated".
                "loss_queue_estimate": collections.deque()
            }
            for win in WINDOWS
        }

        # The final output. -1 implies that a value was unable to be
        # calculated.
        output = np.empty(len(recv_pkts), dtype=DTYPE)
        output.fill(-1)
        # Total number of packet losses up to the current received
        # packet.
        pkt_loss_total_true = 0
        pkt_loss_total_estimate = 0
        # Loss rate estimation.
        prev_pkt_seq = 0
        highest_seq = 0
        # RTT estimation.
        ack_idx = 0

        for j, recv_pkt in enumerate(recv_pkts):
            # Regular metrics.
            recv_pkt_seq = recv_pkt[0]
            output[j]["seq"] = recv_pkt_seq
            recv_time_cur = recv_pkt[2]
            output[j]["arrival time us"] = recv_time_cur

            if j > 0:
                # Receiver-side RTT estimation using the TCP timestamp
                # option. Attempt to find a new RTT estimate. Move
                # ack_idx to the first occurance of the timestamp
                # option TSval corresponding to the current packet's
                # TSecr.
                tsval = ack_pkts[ack_idx][3][0]
                tsecr = recv_pkt[3][1]
                ack_idx_old = ack_idx
                while tsval != tsecr and ack_idx < len(ack_pkts):
                    ack_idx += 1
                    tsval = ack_pkts[ack_idx][3][0]
                if tsval == tsecr:
                    # If we found a timestamp option match, then
                    # update the RTT estimate.
                    rtt_estimate_us = recv_time_cur - ack_pkts[ack_idx][2]
                else:
                    # Otherwise, use the previous RTT estimate and
                    # reset ack_idx to search again for the next
                    # packet.
                    rtt_estimate_us = output[j - 1][make_ewma_metric(
                        "RTT estimate us", alpha=1.)]
                    ack_idx = ack_idx_old
                # Update the min RTT estimate.
                min_rtt_us = utils.safe_min(output[j - 1]["min RTT us"],
                                            rtt_estimate_us)
                output[j]["min RTT us"] = min_rtt_us
                # Compute the new RTT ratio.
                rtt_estimate_ratio = utils.safe_div(rtt_estimate_us,
                                                    min_rtt_us)

                # Calculate the inter-arrival time.
                recv_time_prev = recv_pkts[j - 1][2]
                interarr_time_us = recv_time_cur - recv_time_prev
            else:
                rtt_estimate_us = -1
                rtt_estimate_ratio = -1
                min_rtt_us = -1
                recv_time_prev = -1
                interarr_time_us = -1

            # Calculate the true packet loss rate. Count the number of
            # dropped packets by checking if the sequence numbers at
            # sender and receiver are the same. If not, the packet is
            # dropped, and the pkt_loss_total_true counter increases
            # by one to keep the index offset at sender
            sent_pkt_seq = sent_pkts[j + pkt_loss_total_true][0]
            pkt_loss_total_true_prev = pkt_loss_total_true
            while sent_pkt_seq != recv_pkt_seq:
                # Packet loss
                pkt_loss_total_true += 1
                sent_pkt_seq = sent_pkts[j + pkt_loss_total_true][0]
            # Calculate how many packets were lost since receiving the
            # last packet.
            pkt_loss_cur_true = pkt_loss_total_true - pkt_loss_total_true_prev

            # Receiver-side loss rate estimation. Estimate the losses
            # since the last packet.
            pkt_loss_cur_estimate = math.ceil(
                0 if recv_pkt_seq == prev_pkt_seq + sim.payload_B else ((
                    (recv_pkt_seq - highest_seq - sim.payload_B) /
                    sim.payload_B
                ) if recv_pkt_seq > highest_seq + sim.payload_B else (1 if (
                    recv_pkt_seq < prev_pkt_seq and prev_pkt_seq != highest_seq
                ) else 0)))
            pkt_loss_total_estimate += pkt_loss_cur_estimate
            prev_pkt_seq = recv_pkt_seq
            highest_seq = max(highest_seq, prev_pkt_seq)

            # Calculate the true RTT and RTT ratio. Look up the send
            # time of this packet to calculate the true
            # sender-receiver delay. Assume that, on the reverse path,
            # packets will experience no queuing delay.
            rtt_true_us = (recv_time_cur -
                           sent_pkts[j + pkt_loss_total_true][2] + one_way_us)
            rtt_true_ratio = rtt_true_us / (2 * one_way_us)

            # EWMA metrics.
            for (metric, _), alpha in itertools.product(EWMAS, ALPHAS):
                metric = make_ewma_metric(metric, alpha)
                if "interarrival time us" in metric:
                    new = interarr_time_us
                elif ("throughput p/s" in metric
                      and "mathis model" not in metric):
                    # Do not use the existing interarrival EWMA to
                    # calculate the throughput. Instead, use the true
                    # interarrival time so that the value used to
                    # update the throughput EWMA is not "EWMA-ified"
                    # twice. Divide by 1e6 to convert from
                    # microseconds to seconds.
                    new = utils.safe_div(1,
                                         utils.safe_div(interarr_time_us, 1e6))
                elif "RTT estimate us" in metric:
                    new = rtt_estimate_us
                elif "RTT estimate ratio" in metric:
                    new = rtt_estimate_ratio
                elif "RTT true us" in metric:
                    new = rtt_true_us
                elif "RTT true ratio" in metric:
                    new = rtt_true_ratio
                elif "loss rate estimate" in metric:
                    # See comment in case for "loss rate true".
                    new = pkt_loss_cur_estimate / (pkt_loss_cur_estimate + 1)
                elif "loss rate true" in metric:
                    # Divide the pkt_loss_cur_true by
                    # (pkt_loss_cur_true + 1) because over the course
                    # of sending (pkt_loss_cur_true + 1) packets, one
                    # got through and pkt_loss_cur_true were lost.
                    new = pkt_loss_cur_true / (pkt_loss_cur_true + 1)
                elif "queue occupancy" in metric:
                    # Queue occupancy is calculated using the router
                    # logs, below.
                    continue
                elif "mathis model throughput p/s" in metric:
                    # Use the estimated loss rate to compute the
                    # Mathis model fair throughput. Contrary to the
                    # decision for interarrival time, above, here we
                    # use the value of another EWMA (loss rate
                    # estimate) to compute the new value for the
                    # Mathis model throughput EWMA. I believe that
                    # this is desirable because we want to see how the
                    # metric as a whole reacts to a certain degree of
                    # memory.
                    loss_rate_estimate = (pkt_loss_total_estimate /
                                          j if j > 0 else -1)
                    # Use "safe" operations in case any of the
                    # supporting values are -1 (unknown).
                    new = (-1 if loss_rate_estimate <= 0 else utils.safe_div(
                        MATHIS_C,
                        utils.safe_div(
                            utils.safe_mul(
                                min_rtt_us,
                                utils.safe_sqrt(loss_rate_estimate)), 1e6)))
                elif "mathis model label" in metric:
                    # Use the current throughput and the Mathis model
                    # fair throughput to compute the Mathis model
                    # label.
                    output[j][metric] = utils.safe_mathis_label(
                        output[j][make_ewma_metric("throughput p/s", alpha)],
                        output[j][make_ewma_metric(
                            "mathis model throughput p/s", alpha)])
                    # Continue because the value of this metric is not
                    # an EWMA.
                    continue
                else:
                    raise Exception(f"Unknown EWMA metric: {metric}")
                # Update the EWMA.
                output[j][metric] = utils.safe_update_ewma(
                    -1 if j == 0 else output[j - 1][metric], new, alpha)

            # Windowed metrics.
            for (metric, _), win in itertools.product(WINDOWED, WINDOWS):
                metric = make_win_metric(metric, win)
                # If we have not been able to estimate the min RTT
                # yet, then we cannot compute any of the windowed
                # metrics.
                if min_rtt_us == -1:
                    continue
                win_size_us = win * min_rtt_us

                # Move the start of the window forward.
                while ((recv_time_cur -
                        recv_pkts[win_state[win]["window_start_idx"]][2]) >
                       win_size_us):
                    win_state[win]["window_start_idx"] += 1
                win_start_idx = win_state[win]["window_start_idx"]

                if "average interarrival time us" in metric:
                    new = ((recv_time_cur - recv_pkts[win_start_idx][2]) /
                           (j - win_start_idx + 1))
                elif "average throughput p/s" in metric:
                    # We base the throughput calculation on the
                    # average interarrival time over the window.
                    avg_interarr_time_us = output[j][make_win_metric(
                        "average interarrival time us", win)]
                    # Divide by 1e6 to convert from microseconds to
                    # seconds.
                    new = utils.safe_div(
                        1, utils.safe_div(avg_interarr_time_us, 1e6))
                elif "average RTT estimate us" in metric:
                    new = utils.safe_mean(
                        output[make_ewma_metric("RTT estimate us", alpha=1.)],
                        win_start_idx, j)
                elif "average RTT estimate ratio" in metric:
                    new = utils.safe_mean(
                        output[make_ewma_metric("RTT estimate ratio",
                                                alpha=1.)], win_start_idx, j)
                elif "average RTT true us" in metric:
                    new = utils.safe_mean(
                        output[make_ewma_metric("RTT true us", alpha=1.)],
                        win_start_idx, j)
                elif "average RTT true ratio" in metric:
                    new = utils.safe_mean(
                        output[make_ewma_metric("RTT true ratio", alpha=1.)],
                        win_start_idx, j)
                elif "loss event rate" in metric and "1/sqrt" not in metric:
                    rtt_estimate_us = output[j][make_win_metric(
                        "average RTT estimate us", win)]
                    if rtt_estimate_us == -1:
                        # The RTT estimate is -1 (unknown), so we
                        # cannot compute the loss event rate.
                        continue

                    cur_start_idx = win_state[win][
                        "current_loss_event_start_idx"]
                    cur_start_time = win_state[win][
                        "current_loss_event_start_time"]
                    if pkt_loss_cur_estimate > 0:
                        # There was a loss since the last packet.
                        #
                        # The index of the first packet in the current
                        # loss event.
                        new_start_idx = (j + pkt_loss_total_estimate -
                                         pkt_loss_cur_estimate)

                        if cur_start_idx == 0:
                            # This is the first loss event.
                            #
                            # Naive fix for the loss event rate
                            # calculation The described method in the
                            # RFC is complicated for the first event
                            # handling.
                            cur_start_idx = 1
                            cur_start_time = 0
                            new = 1 / j
                        else:
                            # This is not the first loss event. See if
                            # any of the newly-lost packets start a
                            # new loss event.
                            #
                            # The average time between when packets
                            # should have arrived, since we received
                            # the last packet.
                            loss_interval = ((recv_time_cur - recv_time_prev) /
                                             (pkt_loss_cur_estimate + 1))

                            # Look at each lost packet...
                            for k in range(pkt_loss_cur_estimate):
                                # Compute the approximate time at
                                # which the packet should have been
                                # received if it had not been lost.
                                loss_time = (recv_time_prev +
                                             (k + 1) * loss_interval)

                                # If the time of this loss is more
                                # than one RTT from the time of the
                                # start of the current loss event,
                                # then this is a new loss event.
                                if (loss_time - cur_start_time >=
                                        rtt_estimate_us):
                                    # Record the number of packets
                                    # between the start of the new
                                    # loss event and the start of the
                                    # previous loss event.
                                    win_state[win][
                                        "loss_event_intervals"].appendleft(
                                            new_start_idx - cur_start_idx)
                                    # Potentially discard an old event.
                                    if len(win_state[win]
                                           ["loss_event_intervals"]) > win:
                                        win_state[win][
                                            "loss_event_intervals"].pop()

                                    cur_start_idx = new_start_idx
                                    cur_start_time = loss_time
                                # Calculate the index at which the
                                # new loss event begins.
                                new_start_idx += 1

                            new = compute_weighted_average(
                                (j + pkt_loss_total_estimate - cur_start_idx),
                                win_state[win]["loss_event_intervals"],
                                win_state[win]["loss_interval_weights"])
                    elif pkt_loss_total_estimate > 0:
                        # There have been no losses since the last
                        # packet, but the total loss is nonzero.
                        # Increase the size of the current loss event.
                        new = compute_weighted_average(
                            j + pkt_loss_total_estimate - cur_start_idx,
                            win_state[win]["loss_event_intervals"],
                            win_state[win]["loss_interval_weights"])
                    else:
                        # There have never been any losses, so the
                        # loss event rate is 0.
                        new = 0

                    # Record the new values of the state variables.
                    win_state[win][
                        "current_loss_event_start_idx"] = cur_start_idx
                    win_state[win][
                        "current_loss_event_start_time"] = cur_start_time
                elif "1/sqrt loss event rate" in metric:
                    # Use the loss event rate to compute
                    # 1 / sqrt(loss event rate).
                    new = utils.safe_div(
                        1,
                        utils.safe_sqrt(output[j][make_win_metric(
                            "loss event rate", win)]))
                elif "loss rate estimate" in metric:
                    # We do not need to check whether recv_time_prev
                    # is -1 (unknown) because the windowed metrics
                    # skip the case where j == 0.
                    win_state[win]["loss_queue_estimate"], new = loss_rate(
                        win_state[win]["loss_queue_estimate"], win_start_idx,
                        pkt_loss_cur_estimate, recv_time_cur, recv_time_prev,
                        win_size_us, j)
                elif "loss rate true" in metric:
                    # We do not need to check whether recv_time_prev
                    # is -1 (unknown) because the windowed metrics
                    # skip the case where j == 0.
                    win_state[win]["loss_queue_true"], new = loss_rate(
                        win_state[win]["loss_queue_true"], win_start_idx,
                        pkt_loss_cur_true, recv_time_cur, recv_time_prev,
                        win_size_us, j)
                elif "queue occupancy" in metric:
                    # Queue occupancy is calculated using the router
                    # logs, below.
                    continue
                elif "mathis model throughput p/s" in metric:
                    # Use the loss event rate to compute the Mathis
                    # model fair throughput.
                    loss_rate_estimate = (pkt_loss_total_estimate /
                                          j if j > 0 else -1)
                    new = utils.safe_div(
                        MATHIS_C,
                        utils.safe_div(
                            utils.safe_mul(
                                min_rtt_us,
                                utils.safe_sqrt(loss_rate_estimate)), 1e6))
                elif "mathis model label" in metric:
                    # Use the current throughput and Mathis model
                    # fair throughput to compute the Mathis model
                    # label.
                    new = utils.safe_mathis_label(
                        output[j][make_win_metric("average throughput p/s",
                                                  win)],
                        output[j][make_win_metric(
                            "mathis model throughput p/s", win)])
                else:
                    raise Exception(f"Unknown windowed metric: {metric}")
                output[j][metric] = new
        unfair_flws.append(output)

    # Save memory by explicitly deleting the sent and received packets
    # after they have been parsed. This happens outside of the above
    # for-loop because only the last iteration's sent and received
    # packets are not automatically cleaned up by now (they go out of
    # scope when the sent_pkts and recv_pkts variables are overwritten
    # by the next loop).
    del sent_pkts
    del recv_pkts

    # Process pcap files from the bottleneck router to determine queue
    # occupency. Packet lists are of tuples of the form:
    #     (seq, sender, timestamp us, timestamp option)
    router_pkts = utils.parse_packets(path.join(sim_dir,
                                                f"{sim.name}-1-0.pcap"),
                                      sim.payload_B,
                                      direction="data")
    # State pertaining to each flow.
    flw_state = {
        flw: {
            # Index of the output array where the queue occupency
            # results should be appended.
            "output_idx": 0,
            # The number of other flows' packets that have arrived
            # since the last packet for this flow.
            "packets_since_last": 0,
            # The number of packets from this flow currently in the
            # window.
            "window_flow_packets": {win: 0
                                    for win in WINDOWS}
        }
        for flw in range(sim.unfair_flws)
    }
    # The index of the first packet in the window, for every window
    # size.
    win_start_idxs = {win: 0 for win in WINDOWS}

    # Loop over all of the packets receiver by the bottleneck
    # router. Note that we process all flows at once.
    for j, router_pkt in enumerate(router_pkts):
        _, sender, curr_time, _ = router_pkt
        # Process only packets that are part of one of the unfair
        # flows. Discard packets that did not make it to the receiver
        # (e.g., at the end of the experiment).
        if (sender < sim.unfair_flws and flw_state[sender]["output_idx"] <
                unfair_flws[sender].shape[0]):
            # We cannot move this above the if-statement condition
            # because it is valid only if sender < sim.unfair_flws.
            output_idx = flw_state[sender]["output_idx"]

            # EWMA metrics.
            for (metric, _), alpha in itertools.product(EWMAS, ALPHAS):
                metric = make_ewma_metric(metric, alpha)
                if "interarrival time us" in metric:
                    # The interarrival time is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                if "throughput p/s" in metric:
                    # The throughput is calculated using the sender
                    # and/or receiver logs, above.
                    continue
                if "RTT estimate us" in metric:
                    # The RTT is calculated using the sender and/or
                    # receiver logs, above.
                    continue
                if "RTT estimate ratio" in metric:
                    # The RTT ratio is calculated using the sender
                    # and/or receiver logs, above.
                    continue
                if "RTT true us" in metric:
                    # The RTT is calculated using the sender and/or
                    # receiver logs, above.
                    continue
                if "RTT true ratio" in metric:
                    # The RTT ratio is calculated using the sender
                    # and/or receiver logs, above.
                    continue
                if "loss rate estimate" in metric:
                    # The estiamted loss rate is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                if "loss rate true" in metric:
                    # The true loss rate is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                if "queue occupancy" in metric:
                    # The instanteneous queue occupancy is 1 divided
                    # by the number of packets that have entered the
                    # queue since the last packet from the same
                    # flow. This is the fraction of packets added to
                    # the queue corresponding to this flow, over the
                    # time since when the flow's last packet arrived.
                    new = utils.safe_div(
                        1, flw_state[sender]["packets_since_last"])
                elif "mathis model throughput p/s" in metric:
                    # The Mathis model fair throughput is calculated
                    # using the sender and/or receiver logs, above.
                    continue
                elif "mathis model label" in metric:
                    # The Mathis model label is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                else:
                    raise Exception(f"Unknown EWMA metric: {metric}")
                unfair_flws[sender][output_idx][metric] = (
                    utils.safe_update_ewma(
                        unfair_flws[sender][output_idx - 1][metric], new,
                        alpha))

            # Windowed metrics.
            for (metric, _), win in itertools.product(WINDOWED, WINDOWS):
                metric = make_win_metric(metric, win)
                if "average interarrival time us" in metric:
                    # The average interarrival time is calculated
                    # using the sender and/or receiver logs, above.
                    continue
                if "average throughput p/s" in metric:
                    # The average throughput is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                if "average RTT estimate us" in metric:
                    # The average RTT is calculated using the sender
                    # and/or receiver logs, above.
                    continue
                if "average RTT estimate ratio" in metric:
                    # The average RTT ratio is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                if "average RTT true us" in metric:
                    # The average RTT is calculated using the sender
                    # and/or receiver logs, above.
                    continue
                if "average RTT true ratio" in metric:
                    # The average RTT ratio is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                if "loss event rate" in metric:
                    # The loss event rate is calcualted using the
                    # sender and/or receiver logs, above.
                    continue
                if "1/sqrt loss event rate" in metric:
                    # The reciprocal of the square root of the loss
                    # event rate is calculated using the sender and/or
                    # receiver logs, above.
                    continue
                if "loss rate estimate" in metric:
                    # The estimated loss rate is calcualted using the
                    # sender and/or reciever logs, above.
                    continue
                if "loss rate true" in metric:
                    # The true loss rate is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                if "queue occupancy" in metric:
                    win_start_idx = win_start_idxs[win]
                    # By definition, the window now contains one more
                    # packet from this flow.
                    win_flw_pkts = (
                        flw_state[sender]["window_flow_packets"][win] + 1)

                    # The current length of the window.
                    win_cur_us = curr_time - router_pkts[win_start_idx][2]
                    # Extract the RTT estimate.
                    rtt_estimate_us = unfair_flws[sender][output_idx][
                        make_win_metric("average RTT estimate us", win)]
                    if rtt_estimate_us == -1:
                        # The RTT estimate is -1 (unknown), so we
                        # cannot calculate the size of the window. We
                        # must record the new value of
                        # "window_flow_packets".
                        flw_state[sender]["window_flow_packets"][
                            win] = win_flw_pkts
                        continue

                    # Calculate the target length of the window.
                    win_target_us = win * rtt_estimate_us

                    # If the current window size is greater than the
                    # target window size, then shrink the window.
                    while win_cur_us > win_target_us:
                        # If the packet that will be removed from
                        # the window is from this flow, then we
                        # need to decrease our record of the
                        # number of this flow's packets in the
                        # window by one.
                        if router_pkts[win_start_idx][1] == sender:
                            win_flw_pkts -= 1
                        # Move the start of the window forward.
                        win_start_idx += 1
                        win_cur_us = curr_time - router_pkts[win_start_idx][2]

                    # If the current window size is smaller than the
                    # target window size, then grow the window.
                    while (win_start_idx > 0 and win_cur_us < win_target_us):
                        # Move the start of the window backward.
                        win_start_idx -= 1
                        win_cur_us = curr_time - router_pkts[win_start_idx][2]
                        # If the new packet that was added to the
                        # window is from this flow, then we need
                        # to increase our record of the number of
                        # this flow's packets in the window by
                        # one.
                        if router_pkts[win_start_idx][1] == sender:
                            win_flw_pkts += 1

                    # The queue occupancy is the number of this flow's
                    # packets in the window divided by the total
                    # number of packets in the window.
                    new = win_flw_pkts / (j - win_start_idx + 1)
                    # Record the new values of the state variables.
                    win_start_idxs[win] = win_start_idx
                    flw_state[sender]["window_flow_packets"][
                        win] = win_flw_pkts
                elif "mathis model throughput p/s" in metric:
                    # The Mathis model fair throughput is calculated
                    # using the sender and/or receiver logs, above.
                    continue
                elif "mathis model label" in metric:
                    # The Mathis model label is calculated using the
                    # sender and/or receiver logs, above.
                    continue
                else:
                    raise Exception(f"Unknown windowed metric: {metric}")
                unfair_flws[sender][output_idx][metric] = new
            flw_state[sender]["output_idx"] += 1
            # For the current packet's flow, the number of packets
            # since the last packet in this flow is now 1.
            flw_state[sender]["packets_since_last"] = 1
        # For each unfair flow except the current packet's flow,
        # increment the number of packets since the last packet from
        # that flow.
        for flw in range(sim.unfair_flws):
            if flw != sender:
                flw_state[flw]["packets_since_last"] += 1

    # Determine if there are any NaNs or Infs in the results. For the
    # results for each unfair flow, look through all features
    # (columns) and make a note of the features that bad
    # values. Flatten these lists of feature names, using a set
    # comprehension to remove duplicates.
    bad_fets = {
        fet
        for flw_dat in unfair_flws for fet in flw_dat.dtype.names
        if not np.isfinite(flw_dat[fet]).all()
    }
    if bad_fets:
        print(f"    Simulation {sim_dir} has NaNs of Infs in features: "
              f"{bad_fets}")

    # Save the results.
    if path.exists(out_flp):
        print(f"    Output already exists: {out_flp}")
    else:
        print(f"    Saving: {out_flp}")
        np.savez_compressed(
            out_flp, **{str(k + 1): v
                        for k, v in enumerate(unfair_flws)})
Beispiel #9
0
 def calc_avg(self, video):
     return utils.safe_div(sum(self.iouList[video]),
                           len(self.iouList[video]))
Beispiel #10
0
    had_data_title = "Had data (taken from SM MC)"
    had_data_raw = copy(had_data)
    if use_data is True:
        had_data = had_data / diff
        had_data_title = "Had data (trigger eff corrected)"

    tab.newpage("Hadronic trigger and data")
    tab.add_table(diff, "Rebinned had trigger effs")
    tab.add_table(had_data_raw, "Had data raw")
    tab.add_table(had_data, had_data_title)

    tab.newpage("Hadronic data and MC yields")
    tab.add_table(had_data, had_data_title)
    tab.add_table(had_sm, "Had SM MC")
    tab.add_table(utils.safe_div(had_data, had_sm), "Had data / SM MC")
    tab.add_table(utils.safe_div(had_data, had_ewk), "Had data / EWK MC")

    tab.newpage("Hadronic data and MC yields")
    had_data_raw = copy(had_data)
    if use_data is True:
        had_data = had_data / diff
        tab.add_table(had_data, "Had data (trigger eff corrected)")
    else:
        tab.add_table(had_data, "Had data (taken from SM MC)")
    tab.add_table(had_sm, "Had SM MC")
    tab.add_table(utils.safe_div(had_data, had_sm), "Had data / SM MC")

    tab.newpage("Hadronic EWK and QCD yields from MC")
    tab.add_table(had_ewk, "Had EWK MC")
    tab.add_table(had_qcd, "Had QCD MC")
Beispiel #11
0
    def update_block_states(self, y_true_block, y_pred_block):
        sed_true, doa_true = y_true_block
        sed_pred, doa_pred = y_pred_block
        sed_pred = tf.cast(sed_pred > 0.5, sed_pred.dtype)

        # change doa shape from [..., n_classes*3] to [..., n_classes, 3]
        doa_true = tf.reshape(doa_true, (*doa_true.shape[:-1], 3, -1))
        doa_pred = tf.reshape(doa_pred, (*doa_pred.shape[:-1], 3, -1))
        perm = [
            *range(doa_true.ndim - 2), doa_true.ndim - 1, doa_true.ndim - 2
        ]
        doa_true = tf.transpose(doa_true, perm=perm)
        doa_pred = tf.transpose(doa_pred, perm=perm)

        # whether a particular class exists in a block
        # true_classes, pred_classes: [..., n_frames, n_classes] shaped Tensor
        true_classes = tf.math.reduce_max(sed_true, axis=-2, keepdims=True)
        pred_classes = tf.math.reduce_max(sed_pred, axis=-2, keepdims=True)

        self.Nref += tf.math.reduce_sum(true_classes)
        self.Nsys += tf.math.reduce_sum(pred_classes)
        self.TN += tf.math.reduce_sum((1 - true_classes) * (1 - pred_classes))

        false_negative = true_classes * (1 - pred_classes)
        false_positive = (1 - true_classes) * pred_classes

        self.FN += tf.math.reduce_sum(false_negative)
        self.FP += tf.math.reduce_sum(false_positive)
        loc_FN = tf.math.reduce_sum(false_negative, axis=(-2, -1))
        loc_FP = tf.math.reduce_sum(false_positive, axis=(-2, -1))
        ''' when a class exists in both y_true and y_pred '''
        true_positives = true_classes * pred_classes
        frames_true = sed_true * true_positives
        frames_pred = sed_pred * true_positives
        frames_matched = frames_true * frames_pred

        # [..., 1, n_classes]
        total_matched_frames = tf.reduce_sum(frames_matched,
                                             axis=-2,
                                             keepdims=True)
        matched_frames_exist = tf.cast(total_matched_frames > 0,
                                       total_matched_frames.dtype)
        self.DE_TP += tf.math.reduce_sum(matched_frames_exist)

        false_negative = true_positives * (1 - matched_frames_exist)
        self.FN += tf.math.reduce_sum(false_negative)
        loc_FN += tf.math.reduce_sum(false_negative, axis=(-2, -1))

        # [..., n_frames, n_classes]
        angular_distances = distance_between_cartesian_coordinates(
            doa_true * tf.expand_dims(frames_matched, -1),
            doa_pred * tf.expand_dims(frames_matched, -1))
        average_distances = safe_div(
            tf.reduce_sum(angular_distances, -2, keepdims=True),
            total_matched_frames)
        self.total_DE += tf.reduce_sum(average_distances)

        close_angles = tf.cast(average_distances <= self.doa_threshold,
                               average_distances.dtype)
        self.TP += tf.reduce_sum(close_angles * matched_frames_exist)

        false_negative = (1 - close_angles) * matched_frames_exist
        self.FN += tf.reduce_sum(false_negative)
        loc_FN += tf.reduce_sum(false_negative, axis=(-2, -1))

        self.S += tf.reduce_sum(tf.math.minimum(loc_FP, loc_FN))
        self.D += tf.reduce_sum(tf.math.maximum(0, loc_FN - loc_FP))
        self.I += tf.reduce_sum(tf.math.maximum(0, loc_FP - loc_FN))
Beispiel #12
0
    def __init__(self, team_stats=None):
        self.goals = 0
        self.ball_possession = 0
        self.own_half_ball_losses = 0
        self.opponent_half_ball_recoveries = 0
        self.own_half_ball_recoveries = 0
        self.successful_tackles = 0
        self.fouls = 0
        self.yellow_cards = 0
        self.red_cards = 0
        self.penalty_kick_goals = 0
        self.shots_on_goal = 0
        self.shots_inside_the_area = 0
        self.shots_outside_the_area = 0
        self.shots_on_target = 0
        self.shots_off_target = 0
        self.shots_after_left_side_attacks = 0
        self.shots_after_center_attacks = 0
        self.shots_after_right_side_attacks = 0
        self.direct_crosses_into_the_area = 0
        self.attacking_passes = 0
        self.key_passes = 0
        self.air_challenges_won = 0
        self.ground_challenges_won = 0
        self.dribbles_won = 0

        if team_stats is None:
            return

        # Ball Possession
        tmp_lost_balls = team_stats.get('lostBall', 0)
        tmp_own_half_lost_ball = team_stats.get('ownHalfLostBall', 0)
        tmp_ball_possession = team_stats.get('ballPossession', 0)
        tmp_ball_recovery = team_stats.get('ballRecovery', 0)
        tmp_ball_recovery_in_opponent_half = team_stats.get(
            'ballRecoveryInOppHalf', 0)
        tmp_ball_recovery_in_own_half = team_stats.get('ballRecoveryInOwnHalf',
                                                       0)

        ball_possession = tmp_ball_possession / 100
        own_half_ball_losses = safe_div(tmp_own_half_lost_ball, tmp_lost_balls)
        opponent_half_ball_recoveries = safe_div(
            tmp_ball_recovery_in_opponent_half, tmp_ball_recovery)
        own_half_ball_recoveries = safe_div(tmp_ball_recovery_in_own_half,
                                            tmp_ball_recovery)

        # Cards
        tmp_tackles = team_stats.get('tackles', 0)
        tmp_successful_tackles = team_stats.get('tacklesSuccess', 0)
        tmp_fouls = team_stats.get('foul', 0)
        tmp_yellow_cards = team_stats.get('YellowCard', 0)
        tmp_red_cards = team_stats.get('RedCard', 0)

        successful_tackles = safe_div(tmp_successful_tackles, tmp_tackles)
        fouls = safe_div(tmp_fouls, tmp_tackles)
        yellow_cards = safe_div(tmp_yellow_cards, tmp_fouls)
        red_cards = safe_div(tmp_red_cards, tmp_fouls)

        # Penalties
        tmp_penalty_kicks = team_stats.get('PenaltyKick', 0)
        tmp_penalty_kick_goals = team_stats.get('PenaltyShot_Goal', 0)
        # tmp_missed_penalty = team_stats.get('MissedPenalty', 0)

        penalty_kick_goals = safe_div(tmp_penalty_kick_goals,
                                      tmp_penalty_kicks)

        # Goals
        tmp_regular_goals = team_stats.get('GoalRegular', 0)
        tmp_attempts_on_goal = team_stats.get('AttemptonGoal', 0)
        tmp_shots_inside_the_area = team_stats.get('ShotInsidetheArea', 0)
        tmp_shots_outside_the_area = team_stats.get('ShotOutsidetheArea', 0)
        tmp_shots_on_target = team_stats.get('OnTarget', 0)
        tmp_shots_off_target = team_stats.get('missedShot', 0)
        # tmp_blocked_shots = team_stats.get('blockedShot', 0)
        tmp_left_side_attacks = team_stats.get('leftSideAttack', 0)
        tmp_left_side_attacks_with_shot = team_stats.get(
            'leftSideAttackWithShot', 0)
        tmp_center_attacks = team_stats.get('centerAttack', 0)
        tmp_center_attacks_with_shot = team_stats.get('centerAttackWithShot',
                                                      0)
        tmp_right_side_attacks = team_stats.get('rightSideAttack', 0)
        tmp_right_side_attacks_with_shot = team_stats.get(
            'rightSideAttackWithShot', 0)

        shots_on_goal = safe_div(tmp_regular_goals, tmp_attempts_on_goal)
        shots_inside_the_area = safe_div(tmp_shots_inside_the_area,
                                         tmp_attempts_on_goal)
        shots_outside_the_area = safe_div(tmp_shots_outside_the_area,
                                          tmp_attempts_on_goal)
        shots_on_target = safe_div(tmp_shots_on_target, tmp_attempts_on_goal)
        shots_off_target = safe_div(tmp_shots_off_target, tmp_attempts_on_goal)
        shots_after_left_side_attacks = safe_div(
            tmp_left_side_attacks_with_shot, tmp_left_side_attacks)
        shots_after_center_attacks = safe_div(tmp_center_attacks_with_shot,
                                              tmp_center_attacks)
        shots_after_right_side_attacks = safe_div(
            tmp_right_side_attacks_with_shot, tmp_right_side_attacks)

        # Crossing
        tmp_crosses = team_stats.get('Cross', 0)
        tmp_direct_crosses_into_the_area = team_stats.get(
            'DirectCrossintotheArea', 0)
        # tmp_headers = team_stats.get('Header', 0)

        direct_crosses_into_the_area = safe_div(
            tmp_direct_crosses_into_the_area, tmp_crosses)

        # Passing
        tmp_passes = team_stats.get('passes', 0)
        # tmp_non_attacking_passes = team_stats.get('nonAttackingPasses', 0)
        tmp_attacking_passes = team_stats.get('attackingPasses', 0)
        # tmp_accurate_passes = team_stats.get('accuratePasses', 0)
        tmp_key_passes = team_stats.get('keyPasses', 0)
        # tmp_long_balls = team_stats.get('longBall', 0)
        # tmp_accurate_long_balls = team_stats.get('accurateLongBall', 0)

        attacking_passes = safe_div(tmp_attacking_passes, tmp_passes)
        key_passes = safe_div(tmp_key_passes, tmp_attacking_passes)

        # Challenges
        tmp_air_challenges = team_stats.get('airChallenge', 0)
        tmp_air_challenges_won = team_stats.get('wonAirChallenge', 0)
        tmp_ground_challenges = team_stats.get('groundChallenge', 0)
        tmp_ground_challenges_won = team_stats.get('wonGroundChallenge', 0)
        tmp_dribbles = team_stats.get('dribble', 0)
        tmp_dribbles_won = team_stats.get('wonDribble', 0)

        air_challenges_won = safe_div(tmp_air_challenges_won,
                                      tmp_air_challenges)
        ground_challenges_won = safe_div(tmp_ground_challenges_won,
                                         tmp_ground_challenges)
        dribbles_won = safe_div(tmp_dribbles_won, tmp_dribbles)

        self.goals = tmp_regular_goals
        self.ball_possession = ball_possession
        self.own_half_ball_losses = own_half_ball_losses
        self.opponent_half_ball_recoveries = opponent_half_ball_recoveries
        self.own_half_ball_recoveries = own_half_ball_recoveries
        self.successful_tackles = successful_tackles
        self.fouls = fouls
        self.yellow_cards = yellow_cards
        self.red_cards = red_cards
        self.penalty_kick_goals = penalty_kick_goals
        self.shots_on_goal = shots_on_goal
        self.shots_inside_the_area = shots_inside_the_area
        self.shots_outside_the_area = shots_outside_the_area
        self.shots_on_target = shots_on_target
        self.shots_off_target = shots_off_target
        self.shots_after_left_side_attacks = shots_after_left_side_attacks
        self.shots_after_center_attacks = shots_after_center_attacks
        self.shots_after_right_side_attacks = shots_after_right_side_attacks
        self.direct_crosses_into_the_area = direct_crosses_into_the_area
        self.attacking_passes = attacking_passes
        self.key_passes = key_passes
        self.air_challenges_won = air_challenges_won
        self.ground_challenges_won = ground_challenges_won
        self.dribbles_won = dribbles_won
Beispiel #13
0
 def add_player_stats(self, player_stats: Player):
     self.own_half_ball_losses += safe_div(player_stats.own_half_lost_ball,
                                           player_stats.lost_ball) / 11
     self.opponent_half_ball_recoveries += safe_div(
         player_stats.ball_recovery_in_opp_half,
         player_stats.ball_recovery) / 11
     self.own_half_ball_recoveries += safe_div(
         player_stats.ball_recovery_in_own_half,
         player_stats.ball_recovery) / 11
     self.successful_tackles += safe_div(player_stats.tackles_success,
                                         player_stats.tackles) / 11
     self.fouls += safe_div(player_stats.foul, player_stats.tackles) / 11
     self.yellow_cards += safe_div(player_stats.yellow_card,
                                   player_stats.foul) / 11
     self.red_cards += safe_div(player_stats.red_card,
                                player_stats.foul) / 11
     self.penalty_kick_goals += safe_div(player_stats.penalty_shot_goal,
                                         player_stats.penalty_kick) / 11
     self.shots_on_goal += safe_div(player_stats.regular_goals,
                                    player_stats.attempts_on_goal) / 11
     self.shots_inside_the_area += safe_div(
         player_stats.shots_inside_the_area,
         player_stats.attempts_on_goal) / 11
     self.shots_outside_the_area += safe_div(
         player_stats.shots_outside_the_area,
         player_stats.attempts_on_goal) / 11
     self.shots_on_target += safe_div(player_stats.shots_on_target,
                                      player_stats.attempts_on_goal) / 11
     self.shots_off_target += safe_div(player_stats.shots_off_target,
                                       player_stats.attempts_on_goal) / 11
     self.shots_after_right_side_attacks += safe_div(
         player_stats.right_side_attacks_with_shot,
         player_stats.right_side_attacks) / 11
     self.shots_after_center_attacks += safe_div(
         player_stats.center_attacks_with_shot,
         player_stats.center_attacks) / 11
     self.shots_after_left_side_attacks += safe_div(
         player_stats.left_side_attacks_with_shot,
         player_stats.left_side_attacks) / 11
     self.direct_crosses_into_the_area += safe_div(
         player_stats.direct_crosses_into_the_area,
         player_stats.crosses) / 11
     self.attacking_passes += safe_div(player_stats.attacking_passes,
                                       player_stats.passes) / 11
     self.key_passes += safe_div(player_stats.key_passes,
                                 player_stats.attacking_passes) / 11
     self.air_challenges_won += safe_div(player_stats.air_challenge_won,
                                         player_stats.air_challenge) / 11
     self.ground_challenges_won += safe_div(
         player_stats.ground_challenge_won,
         player_stats.ground_challenge) / 11
     self.dribbles_won += safe_div(player_stats.won_dribbles,
                                   player_stats.dribbles) / 11
Beispiel #14
0
    tab.add_table(effs[index][3], "Had trigger effs (stat. uncert.)")
    tab.alphat_bins(alphat_bins)
    tab.mhtmet_bins(mhtmet_bins)
    tab.add_table(diff, "Rebinned had trigger effs (stat. uncert.)")
    #tab.add_table(effs[index][5],"Had trigger syst. uncert.")
    #tab.add_table(mu_effs*mu_syst,"Muon trigger effs (total uncert.)")

    tab.newpage("Hadronic data and MC yields")
    had_data_raw = copy(had_data)
    if use_data is True:
        had_data = had_data / diff
        tab.add_table(had_data, "Had data (trigger eff corrected)")
    else:
        tab.add_table(had_data, "Had data (taken from SM MC)")
    tab.add_table(had_sm, "Had SM MC (scaled to correct lumi)")
    tab.add_table(utils.safe_div(had_data, had_sm),
                  "Had data / SM MC (stat. uncert.)")

    tab.newpage("Hadronic EWK and QCD yields from MC")
    tab.add_table(had_ewk, "Had EWK MC (scaled to correct lumi)")
    tab.add_table(had_qcd, "Had QCD MC (scaled to correct lumi)")
    tab.add_table(utils.safe_div(had_qcd, had_sm),
                  "Fraction QCD/SM MC (scaled to correct lumi)")

    tab.newpage("Muon data and MC yields")
    mu_data_raw = copy(mu_data)
    if use_data is True:
        mu_data = mu_data / (mu_effs * mu_syst)
        tab.add_table(mu_data, "Mu data (trigger eff corrected)")
    else:
        tab.add_table(mu_data, "Mu data (taken from SM MC)")
Beispiel #15
0
def ewk_tf( signal, control ) :
    return utils.safe_div(signal,control)
Beispiel #16
0
    tab.add_table(effs[index][3],"Had trigger effs (stat. uncert.)")
    tab.alphat_bins(alphat_bins)
    tab.mhtmet_bins(mhtmet_bins)
    tab.add_table(diff,"Rebinned had trigger effs (stat. uncert.)")
    #tab.add_table(effs[index][5],"Had trigger syst. uncert.")
    #tab.add_table(mu_effs*mu_syst,"Muon trigger effs (total uncert.)")

    tab.newpage("Hadronic data and MC yields")
    had_data_raw = copy(had_data)
    if use_data is True :
        had_data = had_data/diff
        tab.add_table(had_data,"Had data (trigger eff corrected)")
    else :
        tab.add_table(had_data,"Had data (taken from SM MC)")
    tab.add_table(had_sm,"Had SM MC (scaled to correct lumi)")
    tab.add_table(utils.safe_div(had_data,had_sm),"Had data / SM MC (stat. uncert.)")

    tab.newpage("Hadronic EWK and QCD yields from MC")
    tab.add_table(had_ewk,"Had EWK MC (scaled to correct lumi)")
    tab.add_table(had_qcd,"Had QCD MC (scaled to correct lumi)")
    tab.add_table(utils.safe_div(had_qcd,had_sm),"Fraction QCD/SM MC (scaled to correct lumi)")

    tab.newpage("Muon data and MC yields")
    mu_data_raw = copy(mu_data)
    if use_data is True :
        mu_data = mu_data/(mu_effs*mu_syst)
        tab.add_table(mu_data,"Mu data (trigger eff corrected)")
    else :
        tab.add_table(mu_data,"Mu data (taken from SM MC)")
    tab.add_table(mu_sm,"Mu SM MC (scaled to correct lumi)")
    tab.add_table(utils.safe_div(mu_data,mu_sm),"Mu data / SM MC (stat. uncert.)")
Beispiel #17
0
def parse_opened_exp(exp, exp_flp, exp_dir, out_flp, skip_smoothed):
    """ Parses an experiment. Returns the smallest safe window size. """
    print(f"Parsing: {exp_flp}")
    if exp.name.startswith("FAILED"):
        print(f"Error: Experimant failed: {exp_flp}")
        return -1
    if exp.tot_flws == 0:
        print(f"Error: No flows to analyze in: {exp_flp}")
        return -1

    # Determine flow src and dst ports.
    params_flp = path.join(exp_dir, f"{exp.name}.json")
    if not path.exists(params_flp):
        print(f"Error: Cannot find params file ({params_flp}) in: {exp_flp}")
        return -1
    with open(params_flp, "r") as fil:
        params = json.load(fil)
    # Dictionary mapping a flow to its flow's CCA. Each flow is a tuple of the
    # form: (client port, server port)
    #
    # { (client port, server port): CCA }
    flw_to_cca = {(client_port, flw[4]): flw[0]
                  for flw in params["flowsets"] for client_port in flw[3]}
    flws = list(flw_to_cca.keys())

    client_pcap = path.join(exp_dir, f"client-tcpdump-{exp.name}.pcap")
    server_pcap = path.join(exp_dir, f"server-tcpdump-{exp.name}.pcap")
    if not (path.exists(client_pcap) and path.exists(server_pcap)):
        print(f"Warning: Missing pcap file in: {exp_flp}")
        return -1
    flw_to_pkts_client = utils.parse_packets(client_pcap, flw_to_cca)
    flw_to_pkts_server = utils.parse_packets(server_pcap, flw_to_cca)

    # Determine the path to the bottleneck queue log file.
    toks = exp.name.split("-")
    q_log_flp = path.join(
        exp_dir,
        "-".join(toks[:-1]) + "-forward-bottleneckqueue-" + toks[-1] + ".log")
    q_log = None
    if path.exists(q_log_flp):
        q_log = list(enumerate(utils.parse_queue_log(q_log_flp)))

    # Transform absolute times into relative times to make life easier.
    #
    # Determine the absolute earliest time observed in the experiment.
    earliest_time_us = min(first_time_us for bounds in [
        get_time_bounds(flw_to_pkts_client, direction="data"),
        get_time_bounds(flw_to_pkts_client, direction="ack"),
        get_time_bounds(flw_to_pkts_server, direction="data"),
        get_time_bounds(flw_to_pkts_server, direction="ack")
    ] for first_time_us, _ in bounds)
    # Subtract the earliest time from all times.
    for flw in flws:
        flw_to_pkts_client[flw][0][
            features.ARRIVAL_TIME_FET] -= earliest_time_us
        flw_to_pkts_client[flw][1][
            features.ARRIVAL_TIME_FET] -= earliest_time_us
        flw_to_pkts_server[flw][0][
            features.ARRIVAL_TIME_FET] -= earliest_time_us
        flw_to_pkts_server[flw][1][
            features.ARRIVAL_TIME_FET] -= earliest_time_us

        assert (flw_to_pkts_client[flw][0][features.ARRIVAL_TIME_FET] >=
                0).all()
        assert (flw_to_pkts_client[flw][1][features.ARRIVAL_TIME_FET] >=
                0).all()
        assert (flw_to_pkts_server[flw][0][features.ARRIVAL_TIME_FET] >=
                0).all()
        assert (flw_to_pkts_server[flw][1][features.ARRIVAL_TIME_FET] >=
                0).all()

    flws_time_bounds = get_time_bounds(flw_to_pkts_server, direction="data")

    # Process PCAP files from senders and receivers.
    # The final output, with one entry per flow.
    flw_results = {}

    # Keep track of the number of erroneous throughputs (i.e., higher than the
    # experiment bandwidth) for each window size.
    win_to_errors = {win: 0 for win in features.WINDOWS}

    # Create the (super-complicated) dtype. The dtype combines each metric at
    # multiple granularities.
    dtype = (features.REGULAR +
             ([] if skip_smoothed else features.make_smoothed_features()))

    for flw_idx, flw in enumerate(flws):
        cca = flw_to_cca[flw]
        # Copa and PCC Vivace use packet-based sequence numbers as opposed to
        # TCP's byte-based sequence numbers.
        packet_seq = cca in {"copa", "vivace"}
        snd_data_pkts, snd_ack_pkts = flw_to_pkts_client[flw]
        recv_data_pkts, recv_ack_pkts = flw_to_pkts_server[flw]

        first_data_time_us = recv_data_pkts[0][features.ARRIVAL_TIME_FET]

        # The final output. -1 implies that a value could not be calculated.
        output = np.full(len(recv_data_pkts), -1, dtype=dtype)

        # If this flow does not have any packets, then skip it.
        skip = False
        if snd_data_pkts.shape[0] == 0:
            skip = True
            print(f"Warning: No data packets sent for flow {flw_idx} in: "
                  f"{exp_flp}")
        if recv_data_pkts.shape[0] == 0:
            skip = True
            print(f"Warning: No data packets received for flow {flw_idx} in: "
                  f"{exp_flp}")
        if recv_ack_pkts.shape[0] == 0:
            skip = True
            print(f"Warning: No ACK packets sent for flow {flw_idx} in: "
                  f"{exp_flp}")
        if skip:
            flw_results[flw] = output
            continue

        # State that the windowed metrics need to track across packets.
        win_state = {
            win: {
                # The index at which this window starts.
                "window_start_idx": 0,
                # The "loss event rate".
                "loss_interval_weights": make_interval_weight(8),
                "loss_event_intervals": collections.deque(),
                "current_loss_event_start_idx": 0,
                "current_loss_event_start_time": 0,
            }
            for win in features.WINDOWS
        }
        # Total number of packet losses up to the current received
        # packet.
        pkt_loss_total_estimate = 0
        # Loss rate estimation.
        prev_seq = None
        prev_payload_B = None
        highest_seq = None
        # Use for Copa RTT estimation.
        snd_ack_idx = 0
        snd_data_idx = 0
        # Use for TCP and PCC Vivace RTT estimation.
        recv_ack_idx = 0

        # Track which packets are definitely retransmissions. Ignore these
        # packets when estimating the RTT. Note that because we are doing
        # receiver-side retransmission tracking, it is possible that there are
        # other retransmissions that we cannot detect.
        #
        # All sequence numbers that have been received.
        unique_pkts = set()
        # Sequence numbers that have been received multiple times.
        retrans_pkts = set()

        for j, recv_pkt in enumerate(recv_data_pkts):
            if j % 1000 == 0:
                print(f"\tFlow {flw_idx + 1}/{exp.tot_flws}: "
                      f"{j}/{len(recv_data_pkts)} packets")
            # Whether this is the first packet.
            first = j == 0
            # Note that Copa and Vivace use packet-level sequence numbers
            # instead of TCP's byte-level sequence numbers.
            recv_seq = recv_pkt[features.SEQ_FET]
            output[j][features.SEQ_FET] = recv_seq
            retrans = (recv_seq in unique_pkts or
                       (prev_seq is not None and prev_payload_B is not None and
                        (prev_seq +
                         (1 if packet_seq else prev_payload_B)) > recv_seq))
            if retrans:
                # If this packet is a multiple retransmission, then this line
                # has no effect.
                retrans_pkts.add(recv_seq)
            # If this packet has already been seen, then this line has no
            # effect.
            unique_pkts.add(recv_seq)

            recv_time_cur_us = recv_pkt[features.ARRIVAL_TIME_FET]
            output[j][features.ARRIVAL_TIME_FET] = recv_time_cur_us

            payload_B = recv_pkt[features.PAYLOAD_FET]
            wirelen_B = recv_pkt[features.WIRELEN_FET]
            output[j][features.PAYLOAD_FET] = payload_B
            output[j][features.WIRELEN_FET] = wirelen_B
            output[j][features.TOTAL_SO_FAR_FET] = (
                (0 if first else output[j - 1][features.TOTAL_SO_FAR_FET]) +
                wirelen_B)
            output[j][features.PAYLOAD_SO_FAR_FET] = (
                (0 if first else output[j - 1][features.PAYLOAD_SO_FAR_FET]) +
                payload_B)

            # Count how many flows were active when this packet was captured.
            active_flws = sum(
                1 for first_time_us, last_time_us in flws_time_bounds
                if first_time_us <= recv_time_cur_us <= last_time_us)
            assert active_flws > 0, \
                (f"Error: No active flows detected for packet {j} of "
                 f"flow {flw_idx} in: {exp_flp}")

            output[j][features.ACTIVE_FLOWS_FET] = active_flws
            output[j][features.BW_FAIR_SHARE_FRAC_FET] = utils.safe_div(
                1, active_flws)
            output[j][features.BW_FAIR_SHARE_BPS_FET] = utils.safe_div(
                exp.bw_bps, active_flws)

            # Calculate RTT-related metrics.
            rtt_us = -1
            if not first and recv_seq != -1 and not retrans:
                if cca == "copa":
                    # In a Copa ACK, the sender timestamp is the time at which
                    # the corresponding data packet was sent. The receiver
                    # timestamp is the time that the data packet was received
                    # and the ACK was sent. This enables sender-side RTT
                    # estimation. However, because the sender does not echo a
                    # value back to the receiver, this cannot be used for
                    # receiver-size RTT estimation.
                    #
                    # For now, we will just do sender-side RTT estimation. When
                    # selecting which packets to use for the RTT estimate, we
                    # will select the packet/ACK pair whose ACK arrived soonest
                    # before packet j was sent. This means that the sender would
                    # have been able to calculate this RTT estimate before
                    # sending packet j, and could very well have included the
                    # RTT estimate in packet j's header.
                    #
                    # First, find the index of the ACK that was received soonest
                    # before packet j was sent.
                    snd_ack_idx = utils.find_bound(
                        snd_ack_pkts[features.SEQ_FET],
                        recv_seq,
                        snd_ack_idx,
                        snd_ack_pkts.shape[0] - 1,
                        which="before")
                    snd_ack_seq = snd_ack_pkts[snd_ack_idx][features.SEQ_FET]
                    # Then, find this ACK's data packet.
                    snd_data_seq = snd_data_pkts[snd_data_idx][
                        features.SEQ_FET]
                    while snd_data_idx < snd_data_pkts.shape[0]:
                        snd_data_seq = snd_data_pkts[snd_data_idx][
                            features.SEQ_FET]
                        if snd_data_seq == snd_ack_seq:
                            # Third, the RTT is the difference between the
                            # sending time of the data packet and the arrival
                            # time of its ACK.
                            rtt_us = (snd_ack_pkts[snd_ack_idx][
                                features.ARRIVAL_TIME_FET] -
                                      snd_data_pkts[snd_data_idx][
                                          features.ARRIVAL_TIME_FET])
                            assert rtt_us >= 0, \
                                (f"Error: Calculated negative RTT ({rtt_us} "
                                 f"us) for packet {j} of flow {flw} in: "
                                 f"{exp_flp}")
                            break
                        snd_data_idx += 1
                elif cca == "vivace":
                    # UDT ACKs may contain the RTT. Find the last ACK to be sent
                    # by the receiver before packet j was received.
                    recv_ack_idx = utils.find_bound(
                        recv_ack_pkts[features.ARRIVAL_TIME_FET],
                        recv_time_cur_us,
                        recv_ack_idx,
                        recv_ack_pkts.shape[0] - 1,
                        which="before")
                    udt_rtt_us = recv_ack_pkts[recv_ack_idx][features.TS_1_FET]
                    if udt_rtt_us > 0:
                        # The RTT is an optional field in UDT ACK packets. I
                        # assume that this means that if the RTT is not
                        # included, then the field will be 0.
                        rtt_us = udt_rtt_us
                else:
                    # This is a TCP flow. Do receiver-side RTT estimation using
                    # the TCP timestamp option. Attempt to find a new RTT
                    # estimate. Move recv_ack_idx to the first occurance of the
                    # timestamp option TSval corresponding to the current
                    # packet's TSecr.
                    recv_ack_idx_old = recv_ack_idx
                    tsval = recv_ack_pkts[recv_ack_idx][features.TS_1_FET]
                    tsecr = recv_pkt[features.TS_2_FET]
                    while recv_ack_idx < recv_ack_pkts.shape[0]:
                        tsval = recv_ack_pkts[recv_ack_idx][features.TS_1_FET]
                        if tsval == tsecr:
                            # If we found a timestamp option match, then update
                            # the RTT estimate.
                            rtt_us = (recv_time_cur_us -
                                      recv_ack_pkts[recv_ack_idx][
                                          features.ARRIVAL_TIME_FET])
                            break
                        recv_ack_idx += 1
                    else:
                        # If we never found a matching tsval, then use the
                        # previous RTT estimate and reset recv_ack_idx to search
                        # again on the next packet.
                        rtt_us = output[j - 1][features.RTT_FET]
                        recv_ack_idx = recv_ack_idx_old

            recv_time_prev_us = (-1 if first else
                                 output[j - 1][features.ARRIVAL_TIME_FET])
            interarr_time_us = utils.safe_sub(recv_time_cur_us,
                                              recv_time_prev_us)
            output[j][features.INTERARR_TIME_FET] = interarr_time_us
            output[j][features.INV_INTERARR_TIME_FET] = utils.safe_mul(
                8 * 1e6 * wirelen_B, utils.safe_div(1, interarr_time_us))

            output[j][features.RTT_FET] = rtt_us
            min_rtt_us = utils.safe_min(
                sys.maxsize if first else output[j - 1][features.MIN_RTT_FET],
                rtt_us)
            output[j][features.MIN_RTT_FET] = min_rtt_us
            rtt_estimate_ratio = utils.safe_div(rtt_us, min_rtt_us)
            output[j][features.RTT_RATIO_FET] = rtt_estimate_ratio

            # Receiver-side loss rate estimation. Estimate the number of lost
            # packets since the last packet. Do not try anything complex or
            # prone to edge cases. Consider only the simple case where the last
            # packet and current packet are in order and not retransmissions.
            pkt_loss_cur_estimate = (
                -1 if (recv_seq == -1 or prev_seq is None or prev_seq == -1
                       or prev_payload_B is None or prev_payload_B <= 0
                       or payload_B <= 0 or highest_seq is None or
                       # The last packet was a retransmission.
                       highest_seq != prev_seq or
                       # The current packet is a retransmission.
                       retrans) else round(
                           (recv_seq -
                            (1 if packet_seq else prev_payload_B) - prev_seq) /
                           (1 if packet_seq else payload_B)))

            if pkt_loss_cur_estimate != -1:
                pkt_loss_total_estimate += pkt_loss_cur_estimate
            loss_rate_cur = utils.safe_div(
                pkt_loss_cur_estimate, utils.safe_add(pkt_loss_cur_estimate,
                                                      1))

            output[j][features.PACKETS_LOST_FET] = pkt_loss_cur_estimate
            output[j][features.LOSS_RATE_FET] = loss_rate_cur

            # EWMA metrics.
            for (metric,
                 _), alpha in itertools.product(features.EWMAS,
                                                features.ALPHAS):
                if skip_smoothed:
                    continue

                metric = features.make_ewma_metric(metric, alpha)
                if metric.startswith(features.INTERARR_TIME_FET):
                    new = interarr_time_us
                elif metric.startswith(features.INV_INTERARR_TIME_FET):
                    # Do not use the interarrival time EWMA to calculate the
                    # inverse interarrival time. Instead, use the true inverse
                    # interarrival time so that the value used to update the
                    # inverse interarrival time EWMA is not "EWMA-ified" twice.
                    new = output[j][features.INV_INTERARR_TIME_FET]
                elif metric.startswith(features.RTT_FET):
                    new = rtt_us
                elif metric.startswith(features.RTT_RATIO_FET):
                    new = rtt_estimate_ratio
                elif metric.startswith(features.LOSS_RATE_FET):
                    new = loss_rate_cur
                elif metric.startswith(features.MATHIS_TPUT_FET):
                    # tput = (MSS / RTT) * (C / sqrt(p))
                    new = utils.safe_mul(
                        utils.safe_div(
                            utils.safe_mul(8, output[j][features.PAYLOAD_FET]),
                            utils.safe_div(output[j][features.RTT_FET], 1e6)),
                        utils.safe_div(MATHIS_C,
                                       utils.safe_sqrt(loss_rate_cur)))
                else:
                    raise Exception(f"Unknown EWMA metric: {metric}")
                # Update the EWMA. If this is the first value, then use 0 are
                # the old value.
                output[j][metric] = utils.safe_update_ewma(
                    -1 if first else output[j - 1][metric], new, alpha)

            # If we cannot estimate the min RTT, then we cannot compute any
            # windowed metrics.
            if min_rtt_us != -1:
                # Move the window start indices later in time. The min RTT
                # estimate will never increase, so we do not need to investigate
                # whether the start of the window moved earlier in time.
                for win in features.WINDOWS:
                    win_state[win]["window_start_idx"] = utils.find_bound(
                        output[features.ARRIVAL_TIME_FET],
                        target=recv_time_cur_us - (win * min_rtt_us),
                        min_idx=win_state[win]["window_start_idx"],
                        max_idx=j,
                        which="after")

            # Windowed metrics.
            for (metric, _), win in itertools.product(features.WINDOWED,
                                                      features.WINDOWS):
                # If we cannot estimate the min RTT, then we cannot compute any
                # windowed metrics.
                if skip_smoothed or min_rtt_us == -1:
                    continue

                # Calculate windowed metrics only if an entire window has
                # elapsed since the start of the flow.
                win_size_us = win * min_rtt_us
                if recv_time_cur_us - first_data_time_us < win_size_us:
                    continue

                # A window requires at least two packets. Note that this means
                # the the first packet will always be skipped.
                win_start_idx = win_state[win]["window_start_idx"]
                if win_start_idx == j:
                    continue

                metric = features.make_win_metric(metric, win)
                if metric.startswith(features.INTERARR_TIME_FET):
                    new = utils.safe_div(
                        utils.safe_sub(
                            recv_time_cur_us,
                            output[win_start_idx][features.ARRIVAL_TIME_FET]),
                        j - win_start_idx)
                elif metric.startswith(features.INV_INTERARR_TIME_FET):
                    new = utils.safe_mul(
                        8 * 1e6 * wirelen_B,
                        utils.safe_div(
                            1, output[j][features.make_win_metric(
                                features.INTERARR_TIME_FET, win)]))
                elif metric.startswith(features.TPUT_FET):
                    # Treat the first packet in the window as the beginning of
                    # time. Calculate the average throughput over all but the
                    # first packet.
                    #
                    # Sum up the payloads of the packets in the window.
                    total_bytes = utils.safe_sum(output[features.WIRELEN_FET],
                                                 start_idx=win_start_idx + 1,
                                                 end_idx=j)
                    # Divide by the duration of the window.
                    start_time_us = (
                        output[win_start_idx][features.ARRIVAL_TIME_FET]
                        if win_start_idx >= 0 else -1)
                    end_time_us = output[j][features.ARRIVAL_TIME_FET]
                    tput_bps = utils.safe_div(
                        utils.safe_mul(total_bytes, 8),
                        utils.safe_div(
                            utils.safe_sub(end_time_us, start_time_us), 1e6))
                    # If the throughput exceeds the bandwidth, then record a
                    # warning and do not record this throughput.
                    if tput_bps != -1 and tput_bps > exp.bw_bps:
                        win_to_errors[win] += 1
                        continue
                elif metric.startswith(features.TPUT_SHARE_FRAC_FET):
                    # This is calculated at the end.
                    continue
                elif metric.startswith(features.TOTAL_TPUT_FET):
                    # This is calcualted at the end.
                    continue
                elif metric.startswith(features.TPUT_FAIR_SHARE_BPS_FET):
                    # This is calculated at the end.
                    continue
                elif metric.startswith(features.TPUT_TO_FAIR_SHARE_RATIO_FET):
                    # This is calculated at the end.
                    continue
                elif metric.startswith(features.RTT_FET):
                    new = utils.safe_mean(output[features.RTT_FET],
                                          win_start_idx, j)
                elif metric.startswith(features.RTT_RATIO_FET):
                    new = utils.safe_mean(output[features.RTT_RATIO_FET],
                                          win_start_idx, j)
                elif metric.startswith(features.LOSS_EVENT_RATE_FET):
                    rtt_us = output[j][features.make_win_metric(
                        features.RTT_FET, win)]
                    if rtt_us == -1:
                        # The RTT estimate is -1 (unknown), so we
                        # cannot compute the loss event rate.
                        continue

                    cur_start_idx = win_state[win][
                        "current_loss_event_start_idx"]
                    cur_start_time = win_state[win][
                        "current_loss_event_start_time"]
                    if pkt_loss_cur_estimate > 0:
                        # There was a loss since the last packet.
                        #
                        # The index of the first packet in the current
                        # loss event.
                        new_start_idx = (j + pkt_loss_total_estimate -
                                         pkt_loss_cur_estimate)

                        if cur_start_idx == 0:
                            # This is the first loss event.
                            #
                            # Naive fix for the loss event rate
                            # calculation The described method in the
                            # RFC is complicated for the first event
                            # handling.
                            cur_start_idx = 1
                            cur_start_time = 0
                            new = 1 / j
                        else:
                            # This is not the first loss event. See if
                            # any of the newly-lost packets start a
                            # new loss event.
                            #
                            # The average time between when packets
                            # should have arrived, since we received
                            # the last packet.
                            loss_interval = (
                                (recv_time_cur_us - recv_time_prev_us) /
                                (pkt_loss_cur_estimate + 1))

                            # Look at each lost packet...
                            for k in range(pkt_loss_cur_estimate):
                                # Compute the approximate time at
                                # which the packet should have been
                                # received if it had not been lost.
                                loss_time = (recv_time_prev_us +
                                             (k + 1) * loss_interval)

                                # If the time of this loss is more
                                # than one RTT from the time of the
                                # start of the current loss event,
                                # then this is a new loss event.
                                if loss_time - cur_start_time >= rtt_us:
                                    # Record the number of packets
                                    # between the start of the new
                                    # loss event and the start of the
                                    # previous loss event.
                                    win_state[win][
                                        "loss_event_intervals"].appendleft(
                                            new_start_idx - cur_start_idx)
                                    # Potentially discard an old event.
                                    if len(win_state[win]
                                           ["loss_event_intervals"]) > win:
                                        win_state[win][
                                            "loss_event_intervals"].pop()

                                    cur_start_idx = new_start_idx
                                    cur_start_time = loss_time
                                # Calculate the index at which the
                                # new loss event begins.
                                new_start_idx += 1

                            new = compute_weighted_average(
                                (j + pkt_loss_total_estimate - cur_start_idx),
                                win_state[win]["loss_event_intervals"],
                                win_state[win]["loss_interval_weights"])
                    elif pkt_loss_total_estimate > 0:
                        # There have been no losses since the last
                        # packet, but the total loss is nonzero.
                        # Increase the size of the current loss event.
                        new = compute_weighted_average(
                            j + pkt_loss_total_estimate - cur_start_idx,
                            win_state[win]["loss_event_intervals"],
                            win_state[win]["loss_interval_weights"])
                    else:
                        # There have never been any losses, so the
                        # loss event rate is 0.
                        new = 0

                    # Record the new values of the state variables.
                    win_state[win][
                        "current_loss_event_start_idx"] = cur_start_idx
                    win_state[win][
                        "current_loss_event_start_time"] = cur_start_time
                elif metric.startswith(features.SQRT_LOSS_EVENT_RATE_FET):
                    # Use the loss event rate to compute
                    # 1 / sqrt(loss event rate).
                    new = utils.safe_div(
                        1,
                        utils.safe_sqrt(output[j][features.make_win_metric(
                            features.LOSS_EVENT_RATE_FET, win)]))
                elif metric.startswith(features.LOSS_RATE_FET):
                    win_losses = utils.safe_sum(
                        output[features.PACKETS_LOST_FET], win_start_idx + 1,
                        j)
                    new = utils.safe_div(win_losses,
                                         win_losses + (j - win_start_idx))
                elif metric.startswith(features.MATHIS_TPUT_FET):
                    # tput = (MSS / RTT) * (C / sqrt(p))
                    new = utils.safe_mul(
                        utils.safe_div(
                            utils.safe_mul(8, output[j][features.PAYLOAD_FET]),
                            utils.safe_div(output[j][features.RTT_FET], 1e6)),
                        utils.safe_div(
                            MATHIS_C,
                            utils.safe_sqrt(output[j][features.make_win_metric(
                                features.LOSS_EVENT_RATE_FET, win)])))
                else:
                    raise Exception(f"Unknown windowed metric: {metric}")
                output[j][metric] = new

            prev_seq = recv_seq
            prev_payload_B = payload_B
            highest_seq = (prev_seq if highest_seq is None else max(
                highest_seq, prev_seq))
            # In the event of sequence number wraparound, reset the sequence
            # number tracking.
            #
            # TODO: Test sequence number wraparound logic.
            if (recv_seq != -1
                    and recv_seq + (1 if packet_seq else payload_B) > 2**32):
                print(
                    "Warning: Sequence number wraparound detected for packet "
                    f"{j} of flow {flw} in: {exp_flp}")
                highest_seq = None
                prev_seq = None

        # Get the sequence number of the last received packet.
        last_seq = output[-1][features.SEQ_FET]
        if last_seq == -1:
            print("Warning: Unable to calculate retransmission or bottleneck "
                  "queue drop rates due to unknown last sequence number for "
                  f"(UDP?) flow {flw_idx} in: {exp_flp}")
        else:
            # Calculate the true number of retransmissions using the sender
            # traces.
            #
            # Truncate the sent packets at the last occurence of the last packet to
            # be received.
            #
            # Find when the last received packet was sent. Assume that if this
            # packet was retransmitted, then the last retransmission is the one
            # that arrived at the receiver (which may be an incorrect
            # assumption).
            snd_idx = len(snd_data_pkts) - 1
            while snd_idx >= 0:
                if snd_data_pkts[snd_idx][features.SEQ_FET] == last_seq:
                    # unique_snd_pkts, counts = np.unique(
                    #     snd_data_pkts[:snd_idx + 1][features.SEQ_FET],
                    #     return_counts=True)
                    # unique_snd_pkts = unique_snd_pkts.tolist()
                    # counts = counts.tolist()
                    # all_retrans = [
                    #     (seq, counts)
                    #     for seq, counts in zip(unique_snd_pkts, counts)
                    #     if counts > 1]

                    # tot_pkts = snd_idx + 1

                    # The retransmission rate is:
                    #     1 - unique packets / total packets.
                    output[-1][features.RETRANS_RATE_FET] = (
                        1 -
                        # Find the number of unique sequence numbers, from the
                        # beginning up until when the last received packet was
                        # sent.
                        np.unique(snd_data_pkts[:snd_idx + 1][features.SEQ_FET]
                                  ).shape[0] /
                        # Convert from index to packet count.
                        (snd_idx + 1))
                    break
                snd_idx -= 1
            else:
                print("Warning: Did not find when the last received packet "
                      f"(seq: {last_seq}) was sent for flow {flw_idx} in: "
                      f"{exp_flp}")

            # Calculate the true drop rate at the bottleneck queue using the
            # bottleneck queue logs.
            client_port = flw[0]
            deq_idx = None
            drop_rate = None
            if q_log is None:
                print(
                    f"Warning: Unable to find bottleneck queue log: {q_log_flp}"
                )
            else:
                # Find the dequeue log corresponding to the last packet that was
                # received.
                for record_idx, record in reversed(q_log):
                    if (record[0] == "deq" and record[2] == client_port
                            and record[3] == last_seq):
                        deq_idx = record_idx
                        break
            if deq_idx is None:
                print("Warning: Did not find when the last received packet "
                      f"(seq: {last_seq}) was dequeued for flow {flw_idx} in: "
                      f"{exp_flp}")
            else:
                # Find the most recent stats log before the last received
                # packet was dequeued.
                for _, record in reversed(q_log[:deq_idx]):
                    if record[0] == "stats" and record[1] == client_port:
                        drop_rate = record[4] / (record[2] + record[4])
                        break
            if drop_rate is None:
                print(
                    "Warning: Did not calculate the drop rate at the bottleneck "
                    f"queue for flow {flw_idx} in: {exp_flp}")
            else:
                output[-1][features.DROP_RATE_FET] = drop_rate

        # Make sure that all output rows were used.
        used_rows = np.sum(output[features.ARRIVAL_TIME_FET] != -1)
        total_rows = output.shape[0]
        assert used_rows == total_rows, \
            (f"Error: Used only {used_rows} of {total_rows} rows for flow "
             f"{flw_idx} in: {exp_flp}")

        flw_results[flw] = output

    # Save memory by explicitly deleting the sent and received packets
    # after they have been parsed. This happens outside of the above
    # for-loop because only the last iteration's packets are not
    # automatically cleaned up by now (they go out of scope when the
    # *_pkts variables are overwritten by the next loop).
    del snd_data_pkts
    del recv_data_pkts
    del recv_ack_pkts

    if not skip_smoothed:
        # Maps window the index of the packet at the start of that window.
        win_to_start_idx = {win: 0 for win in features.WINDOWS}

        # Merge the flow data into a unified timeline.
        combined = []
        for flw in flws:
            num_pkts = flw_results[flw].shape[0]
            merged = np.empty((num_pkts, ),
                              dtype=[(features.WIRELEN_FET, "int32"),
                                     (features.MIN_RTT_FET, "int32"),
                                     ("client port", "int32"),
                                     ("server port", "int32"),
                                     ("index", "int32")])
            merged[features.WIRELEN_FET] = flw_results[flw][
                features.WIRELEN_FET]
            merged[features.MIN_RTT_FET] = flw_results[flw][
                features.MIN_RTT_FET]
            merged["client port"].fill(flw[0])
            merged["server port"].fill(flw[1])
            merged["index"] = np.arange(num_pkts)
            combined.append(merged)
        zipped_arr_times, zipped_dat = utils.zip_timeseries(
            [flw_results[flw][features.ARRIVAL_TIME_FET] for flw in flws],
            combined)

        for j in range(zipped_arr_times.shape[0]):
            min_rtt_us = zipped_dat[j][features.MIN_RTT_FET]
            if min_rtt_us == -1:
                continue

            for win in features.WINDOWS:
                # The bounds should never go backwards, so start the
                # search at the current bound.
                win_to_start_idx[win] = utils.find_bound(
                    zipped_arr_times,
                    target=(zipped_arr_times[j] -
                            (win * zipped_dat[j][features.MIN_RTT_FET])),
                    min_idx=win_to_start_idx[win],
                    max_idx=j,
                    which="after")
                # If the window's trailing edge caught up with its
                # leading edge, then skip this flow.
                if win_to_start_idx[win] >= j:
                    continue

                total_tput_bps = utils.safe_div(
                    utils.safe_mul(
                        # Accumulate the bytes received by this flow during this
                        # window. When calculating the average throughput, we
                        # must exclude the first packet in the window.
                        utils.safe_sum(zipped_dat[features.WIRELEN_FET],
                                       start_idx=win_to_start_idx[win] + 1,
                                       end_idx=j),
                        8 * 1e6),
                    utils.safe_sub(zipped_arr_times[j],
                                   zipped_arr_times[win_to_start_idx[win]]))
                # Check if this throughput is erroneous.
                if total_tput_bps > exp.bw_bps:
                    win_to_errors[win] += 1
                else:
                    # Extract the flow to which this packet belongs, as well as
                    # its index in its flow.
                    flw = tuple(zipped_dat[j][["client port",
                                               "server port"]].tolist())
                    index = zipped_dat[j]["index"]
                    flw_results[flw][index][features.make_win_metric(
                        features.TOTAL_TPUT_FET, win)] = total_tput_bps
                    # Use the total throughput and the number of active flows to
                    # calculate the throughput fair share.
                    flw_results[flw][index][features.make_win_metric(
                        features.TPUT_FAIR_SHARE_BPS_FET,
                        win)] = (utils.safe_div(
                            total_tput_bps,
                            flw_results[flw][index][features.ACTIVE_FLOWS_FET])
                                 )
                    # Divide the flow's throughput by the total throughput.
                    tput_share = utils.safe_div(
                        flw_results[flw][index][features.make_win_metric(
                            features.TPUT_FET, win)], total_tput_bps)
                    flw_results[flw][index][features.make_win_metric(
                        features.TPUT_SHARE_FRAC_FET, win)] = tput_share
                    # Calculate the ratio of tput share to bandwidth fair share.
                    flw_results[flw][index][features.make_win_metric(
                        features.TPUT_TO_FAIR_SHARE_RATIO_FET,
                        win)] = (utils.safe_div(
                            tput_share, flw_results[flw][index][
                                features.BW_FAIR_SHARE_FRAC_FET]))

    print(f"\tFinal window durations in: {exp_flp}:")
    for win in features.WINDOWS:
        print(
            f"\t\t{win}:",
            ", ".join(f"{dur_us} us" if dur_us > 0 else "unknown"
                      for dur_us in (win * np.asarray([
                          res[-1][features.MIN_RTT_FET]
                          for res in flw_results.values()
                      ])).tolist()))
    print(f"\tWindow errors in: {exp_flp}")
    for win in features.WINDOWS:
        print(f"\t\t{win}:", win_to_errors[win])
    smallest_safe_win = 0
    for win in sorted(features.WINDOWS):
        if win_to_errors[win] == 0:
            print(f"\tSmallest safe window size is {win} in: {exp_flp}")
            smallest_safe_win = win
            break
    else:
        print(f"Warning: No safe window sizes in: {exp_flp}")

    # Determine if there are any NaNs or Infs in the results. For the results
    # for each flow, look through all features (columns) and make a note of the
    # features that bad values. Flatten these lists of feature names, using a
    # set comprehension to remove duplicates.
    bad_fets = {
        fet
        for flw_dat in flw_results.values() for fet in flw_dat.dtype.names
        if not np.isfinite(flw_dat[fet]).all()
    }
    if bad_fets:
        print(f"Warning: Experiment {exp_flp} has NaNs of Infs in features: "
              f"{bad_fets}")

    # Save the results.
    if path.exists(out_flp):
        print(f"\tOutput already exists: {out_flp}")
    else:
        print(f"\tSaving: {out_flp}")
        np.savez_compressed(
            out_flp, **{
                str(k + 1): v
                for k, v in enumerate(flw_results[flw] for flw in flws)
            })

    return smallest_safe_win
Beispiel #18
0
    had_data_title = "Had data (taken from SM MC)"
    had_data_raw = copy(had_data)
    if use_data is True :
        had_data = had_data/diff
        had_data_title = "Had data (trigger eff corrected)"

    tab.newpage("Hadronic trigger and data")
    tab.add_table(diff,"Rebinned had trigger effs")
    tab.add_table(had_data_raw,"Had data raw")
    tab.add_table(had_data,had_data_title)

    tab.newpage("Hadronic data and MC yields")
    tab.add_table(had_data,had_data_title)
    tab.add_table(had_sm,"Had SM MC")
    tab.add_table(utils.safe_div(had_data,had_sm),"Had data / SM MC")
    tab.add_table(utils.safe_div(had_data,had_ewk),"Had data / EWK MC")

    tab.newpage("Hadronic data and MC yields")
    had_data_raw = copy(had_data)
    if use_data is True :
        had_data = had_data/diff
        tab.add_table(had_data,"Had data (trigger eff corrected)")
    else :
        tab.add_table(had_data,"Had data (taken from SM MC)")
    tab.add_table(had_sm,"Had SM MC")
    tab.add_table(utils.safe_div(had_data,had_sm),"Had data / SM MC")

    tab.newpage("Hadronic EWK and QCD yields from MC")
    tab.add_table(had_ewk,"Had EWK MC")
    tab.add_table(had_qcd,"Had QCD MC")
Beispiel #19
0
def ewk_tf(signal, control):
    return utils.safe_div(signal, control)