def last_k_instalment_features_with_fractions(gr, periods, fraction_periods): gr_ = gr.copy() features = {} features_temp = {} for period in periods: gr_period = gr_[gr_['days'] <= period] features_temp = utils.add_features_in_group( features_temp, gr_period, 'installments', ['mean', 'var', 'skew', 'kurt', 'iqr'], 'last_{}_'.format(period)) features_temp = utils.add_features_in_group( features_temp, gr_period, 'purchase_amount', ['sum', 'max', 'mean', 'var', 'skew', 'kurt', 'iqr'], 'last_{}_'.format(period)) for short_period, long_period in fraction_periods: short_feature_names = utils._get_feature_names(features_temp, short_period) long_feature_names = utils._get_feature_names(features_temp, long_period) for short_feature, long_feature in zip(short_feature_names, long_feature_names): old_name_chunk = '_{}_'.format(short_period) new_name_chunk = '_{}by{}_fraction_'.format( short_period, long_period) fraction_feature_name = short_feature.replace( old_name_chunk, new_name_chunk) features[fraction_feature_name] = utils.safe_div( features_temp[short_feature], features_temp[long_feature]) return pd.Series(features)
def minmax(x, y=None): # batch-wise pre-processing axis = tuple(range(1, len(x.shape))) # MIN-MAX x_max = tf.math.reduce_max(x, axis=axis, keepdims=True) x_min = tf.math.reduce_min(x, axis=axis, keepdims=True) x = safe_div(x-x_min, x_max-x_min) if y is not None: return x, y return x
def qcd_ratio( qcd_only, method="" ) : ratio = None if method == "abcd" : q = qcd_only r = utils.safe_div(q[:-1,:],q[1:,:]) r = np.delete(r,r.shape[1]-1,1) # delete last row r = np.insert(r,0,ufloat(np.nan,np.nan),1) # insert first row r = np.insert(r,0,ufloat(np.nan,np.nan),0) # insert first column ratio = r elif method == "double" : q = qcd_only r = utils.safe_div(q[:-1,:],q[1:,:]) rr = utils.safe_div(r[:,1:-1]*r[:,1:-1],r[:,:-2]) rr = np.insert(rr,(0,0),ufloat(np.nan,np.nan),1) # insert first two rows rr = np.insert(rr,0,ufloat(np.nan,np.nan),0) # insert first column ratio = rr else : # "raw" q = qcd_only r = utils.safe_div(q[:-1,:],q[1:,:]) r = np.insert(r,0,ufloat(np.nan,np.nan),0) # insert first column ratio = r return ratio
def qcd_ratio(qcd_only, method=""): ratio = None if method == "abcd": q = qcd_only r = utils.safe_div(q[:-1, :], q[1:, :]) r = np.delete(r, r.shape[1] - 1, 1) # delete last row r = np.insert(r, 0, ufloat(np.nan, np.nan), 1) # insert first row r = np.insert(r, 0, ufloat(np.nan, np.nan), 0) # insert first column ratio = r elif method == "double": q = qcd_only r = utils.safe_div(q[:-1, :], q[1:, :]) rr = utils.safe_div(r[:, 1:-1] * r[:, 1:-1], r[:, :-2]) rr = np.insert(rr, (0, 0), ufloat(np.nan, np.nan), 1) # insert first two rows rr = np.insert(rr, 0, ufloat(np.nan, np.nan), 0) # insert first column ratio = rr else: # "raw" q = qcd_only r = utils.safe_div(q[:-1, :], q[1:, :]) r = np.insert(r, 0, ufloat(np.nan, np.nan), 0) # insert first column ratio = r return ratio
def top_conversations_by_chars(self_name: str, conversations: List[NamedConversation], exhaustive_lists: bool): """ Generates list of top conversations ordered by characters exchanged from specified list of conversations. :param self_name: name of the person which should be considered as "myself" :param conversations: list of conversations :param exhaustive_lists: whether the list should include all conversations :return: """ conversation_counts = {} total_messages = 0 # Used for computing threshold when not using exhaustive lists. for name, participants, messages in conversations: total_messages += len(messages) # Create counters for each conversation not present in list of conversations. if not str(participants) in conversation_counts: conversation_counts[name] = [0, 0] # others, me for sender, text, _ in messages: if sender == self_name: conversation_counts[name][1] += 0 if text is None else len( text) else: conversation_counts[name][0] += 0 if text is None else len( text) top_conversations = reversed( sorted((value[0] + value[1], key, value) for (key, value) in conversation_counts.items())) # Threshold is used to prevent outputting lot of conversations # with very little messages. Currently it is calculated as average # message count in conversation. threshold = safe_div(total_messages, len(conversation_counts)) print('Conversations by characters exchanged:') for characters, conversation_name, counts in top_conversations: if characters != 0: if exhaustive_lists or characters > threshold: print( f'{conversation_name}\t{characters} ({counts[1]} sent, {counts[0]} received)' ) if not exhaustive_lists: print('And more...')
def result(self): # Location-senstive detection performance ER = safe_div(self.S + self.D + self.I, self.Nref) prec = safe_div(self.TP, self.Nsys) recall = safe_div(self.TP, self.Nref) F = safe_div(2 * prec * recall, prec + recall) # Class-sensitive localization performance if self.DE_TP > 0: DE = safe_div(self.total_DE, self.DE_TP) else: # When the total number of prediction is zero DE = tf.ones([], tf.float32) * 180 DE_prec = safe_div(self.DE_TP, self.Nsys) DE_recall = safe_div(self.DE_TP, self.Nref) DE_F = safe_div(2 * DE_prec * DE_recall, DE_prec + DE_recall) return ER, F, DE, DE_F
def evaluate_net(model, test_data, ctx): triplet_loss = gluon.loss.TripletLoss(margin=0) sum_correct = 0 sum_all = 0 rate = 0.0 for i, (data, _) in enumerate(test_data): data = data.as_in_context(ctx) anc_ins, pos_ins, neg_ins = data[:, 0], data[:, 1], data[:, 2] inter1 = model(anc_ins) # 训练的时候组合 inter2 = model(pos_ins) inter3 = model(neg_ins) loss = triplet_loss(inter1, inter2, inter3) loss = loss.asnumpy() n_all = loss.shape[0] n_correct = np.sum(np.where(loss == 0, 1, 0)) sum_correct += n_correct sum_all += n_all rate = safe_div(sum_correct, sum_all) print('准确率: %.4f (%s / %s)' % (rate, sum_correct, sum_all)) return rate
def parse_pcap(sim_dir, out_dir): """ Parse a PCAP file. """ print(f"Parsing: {sim_dir}") sim = utils.Sim(sim_dir) assert sim.unfair_flws > 0, f"No unfair flows to analyze: {sim_dir}" # Construct the output filepaths. out_flp = path.join(out_dir, f"{sim.name}.npz") # If the output file exists, then we do not need to parse this file. if path.exists(out_flp): print(f" Already parsed: {sim_dir}") return # Process PCAP files from unfair senders and receivers. # # The final output, with one entry per unfair flow. unfair_flws = [] for unfair_idx in range(sim.unfair_flws): # Since this will not be used in practice, we can calculate # the min one-way delay using the simulation's parameters. one_way_us = sim.btl_delay_us + 2 * sim.edge_delays[unfair_idx] # Packet lists are of tuples of the form: # (seq, sender, timestamp us, timestamp option) sent_pkts = utils.parse_packets(path.join( sim_dir, f"{sim.name}-{unfair_idx + 2}-0.pcap"), sim.payload_B, direction="data") recv_pcap_flp = path.join( sim_dir, (f"{sim.name}-{unfair_idx + 2 + sim.unfair_flws + sim.fair_flws}-0" ".pcap")) recv_pkts = utils.parse_packets(recv_pcap_flp, sim.payload_B, direction="data") # Ack packets for RTT calculation ack_pkts = utils.parse_packets(recv_pcap_flp, sim.payload_B, direction="ack") # State that the windowed metrics need to track across packets. win_state = { win: { # The index at which this window starts. "window_start_idx": 0, # The "loss event rate". "loss_interval_weights": make_interval_weight(8), "loss_event_intervals": collections.deque(), "current_loss_event_start_idx": 0, "current_loss_event_start_time": 0, # For "loss rate true". "loss_queue_true": collections.deque(), # For "loss rate estimated". "loss_queue_estimate": collections.deque() } for win in WINDOWS } # The final output. -1 implies that a value was unable to be # calculated. output = np.empty(len(recv_pkts), dtype=DTYPE) output.fill(-1) # Total number of packet losses up to the current received # packet. pkt_loss_total_true = 0 pkt_loss_total_estimate = 0 # Loss rate estimation. prev_pkt_seq = 0 highest_seq = 0 # RTT estimation. ack_idx = 0 for j, recv_pkt in enumerate(recv_pkts): # Regular metrics. recv_pkt_seq = recv_pkt[0] output[j]["seq"] = recv_pkt_seq recv_time_cur = recv_pkt[2] output[j]["arrival time us"] = recv_time_cur if j > 0: # Receiver-side RTT estimation using the TCP timestamp # option. Attempt to find a new RTT estimate. Move # ack_idx to the first occurance of the timestamp # option TSval corresponding to the current packet's # TSecr. tsval = ack_pkts[ack_idx][3][0] tsecr = recv_pkt[3][1] ack_idx_old = ack_idx while tsval != tsecr and ack_idx < len(ack_pkts): ack_idx += 1 tsval = ack_pkts[ack_idx][3][0] if tsval == tsecr: # If we found a timestamp option match, then # update the RTT estimate. rtt_estimate_us = recv_time_cur - ack_pkts[ack_idx][2] else: # Otherwise, use the previous RTT estimate and # reset ack_idx to search again for the next # packet. rtt_estimate_us = output[j - 1][make_ewma_metric( "RTT estimate us", alpha=1.)] ack_idx = ack_idx_old # Update the min RTT estimate. min_rtt_us = utils.safe_min(output[j - 1]["min RTT us"], rtt_estimate_us) output[j]["min RTT us"] = min_rtt_us # Compute the new RTT ratio. rtt_estimate_ratio = utils.safe_div(rtt_estimate_us, min_rtt_us) # Calculate the inter-arrival time. recv_time_prev = recv_pkts[j - 1][2] interarr_time_us = recv_time_cur - recv_time_prev else: rtt_estimate_us = -1 rtt_estimate_ratio = -1 min_rtt_us = -1 recv_time_prev = -1 interarr_time_us = -1 # Calculate the true packet loss rate. Count the number of # dropped packets by checking if the sequence numbers at # sender and receiver are the same. If not, the packet is # dropped, and the pkt_loss_total_true counter increases # by one to keep the index offset at sender sent_pkt_seq = sent_pkts[j + pkt_loss_total_true][0] pkt_loss_total_true_prev = pkt_loss_total_true while sent_pkt_seq != recv_pkt_seq: # Packet loss pkt_loss_total_true += 1 sent_pkt_seq = sent_pkts[j + pkt_loss_total_true][0] # Calculate how many packets were lost since receiving the # last packet. pkt_loss_cur_true = pkt_loss_total_true - pkt_loss_total_true_prev # Receiver-side loss rate estimation. Estimate the losses # since the last packet. pkt_loss_cur_estimate = math.ceil( 0 if recv_pkt_seq == prev_pkt_seq + sim.payload_B else (( (recv_pkt_seq - highest_seq - sim.payload_B) / sim.payload_B ) if recv_pkt_seq > highest_seq + sim.payload_B else (1 if ( recv_pkt_seq < prev_pkt_seq and prev_pkt_seq != highest_seq ) else 0))) pkt_loss_total_estimate += pkt_loss_cur_estimate prev_pkt_seq = recv_pkt_seq highest_seq = max(highest_seq, prev_pkt_seq) # Calculate the true RTT and RTT ratio. Look up the send # time of this packet to calculate the true # sender-receiver delay. Assume that, on the reverse path, # packets will experience no queuing delay. rtt_true_us = (recv_time_cur - sent_pkts[j + pkt_loss_total_true][2] + one_way_us) rtt_true_ratio = rtt_true_us / (2 * one_way_us) # EWMA metrics. for (metric, _), alpha in itertools.product(EWMAS, ALPHAS): metric = make_ewma_metric(metric, alpha) if "interarrival time us" in metric: new = interarr_time_us elif ("throughput p/s" in metric and "mathis model" not in metric): # Do not use the existing interarrival EWMA to # calculate the throughput. Instead, use the true # interarrival time so that the value used to # update the throughput EWMA is not "EWMA-ified" # twice. Divide by 1e6 to convert from # microseconds to seconds. new = utils.safe_div(1, utils.safe_div(interarr_time_us, 1e6)) elif "RTT estimate us" in metric: new = rtt_estimate_us elif "RTT estimate ratio" in metric: new = rtt_estimate_ratio elif "RTT true us" in metric: new = rtt_true_us elif "RTT true ratio" in metric: new = rtt_true_ratio elif "loss rate estimate" in metric: # See comment in case for "loss rate true". new = pkt_loss_cur_estimate / (pkt_loss_cur_estimate + 1) elif "loss rate true" in metric: # Divide the pkt_loss_cur_true by # (pkt_loss_cur_true + 1) because over the course # of sending (pkt_loss_cur_true + 1) packets, one # got through and pkt_loss_cur_true were lost. new = pkt_loss_cur_true / (pkt_loss_cur_true + 1) elif "queue occupancy" in metric: # Queue occupancy is calculated using the router # logs, below. continue elif "mathis model throughput p/s" in metric: # Use the estimated loss rate to compute the # Mathis model fair throughput. Contrary to the # decision for interarrival time, above, here we # use the value of another EWMA (loss rate # estimate) to compute the new value for the # Mathis model throughput EWMA. I believe that # this is desirable because we want to see how the # metric as a whole reacts to a certain degree of # memory. loss_rate_estimate = (pkt_loss_total_estimate / j if j > 0 else -1) # Use "safe" operations in case any of the # supporting values are -1 (unknown). new = (-1 if loss_rate_estimate <= 0 else utils.safe_div( MATHIS_C, utils.safe_div( utils.safe_mul( min_rtt_us, utils.safe_sqrt(loss_rate_estimate)), 1e6))) elif "mathis model label" in metric: # Use the current throughput and the Mathis model # fair throughput to compute the Mathis model # label. output[j][metric] = utils.safe_mathis_label( output[j][make_ewma_metric("throughput p/s", alpha)], output[j][make_ewma_metric( "mathis model throughput p/s", alpha)]) # Continue because the value of this metric is not # an EWMA. continue else: raise Exception(f"Unknown EWMA metric: {metric}") # Update the EWMA. output[j][metric] = utils.safe_update_ewma( -1 if j == 0 else output[j - 1][metric], new, alpha) # Windowed metrics. for (metric, _), win in itertools.product(WINDOWED, WINDOWS): metric = make_win_metric(metric, win) # If we have not been able to estimate the min RTT # yet, then we cannot compute any of the windowed # metrics. if min_rtt_us == -1: continue win_size_us = win * min_rtt_us # Move the start of the window forward. while ((recv_time_cur - recv_pkts[win_state[win]["window_start_idx"]][2]) > win_size_us): win_state[win]["window_start_idx"] += 1 win_start_idx = win_state[win]["window_start_idx"] if "average interarrival time us" in metric: new = ((recv_time_cur - recv_pkts[win_start_idx][2]) / (j - win_start_idx + 1)) elif "average throughput p/s" in metric: # We base the throughput calculation on the # average interarrival time over the window. avg_interarr_time_us = output[j][make_win_metric( "average interarrival time us", win)] # Divide by 1e6 to convert from microseconds to # seconds. new = utils.safe_div( 1, utils.safe_div(avg_interarr_time_us, 1e6)) elif "average RTT estimate us" in metric: new = utils.safe_mean( output[make_ewma_metric("RTT estimate us", alpha=1.)], win_start_idx, j) elif "average RTT estimate ratio" in metric: new = utils.safe_mean( output[make_ewma_metric("RTT estimate ratio", alpha=1.)], win_start_idx, j) elif "average RTT true us" in metric: new = utils.safe_mean( output[make_ewma_metric("RTT true us", alpha=1.)], win_start_idx, j) elif "average RTT true ratio" in metric: new = utils.safe_mean( output[make_ewma_metric("RTT true ratio", alpha=1.)], win_start_idx, j) elif "loss event rate" in metric and "1/sqrt" not in metric: rtt_estimate_us = output[j][make_win_metric( "average RTT estimate us", win)] if rtt_estimate_us == -1: # The RTT estimate is -1 (unknown), so we # cannot compute the loss event rate. continue cur_start_idx = win_state[win][ "current_loss_event_start_idx"] cur_start_time = win_state[win][ "current_loss_event_start_time"] if pkt_loss_cur_estimate > 0: # There was a loss since the last packet. # # The index of the first packet in the current # loss event. new_start_idx = (j + pkt_loss_total_estimate - pkt_loss_cur_estimate) if cur_start_idx == 0: # This is the first loss event. # # Naive fix for the loss event rate # calculation The described method in the # RFC is complicated for the first event # handling. cur_start_idx = 1 cur_start_time = 0 new = 1 / j else: # This is not the first loss event. See if # any of the newly-lost packets start a # new loss event. # # The average time between when packets # should have arrived, since we received # the last packet. loss_interval = ((recv_time_cur - recv_time_prev) / (pkt_loss_cur_estimate + 1)) # Look at each lost packet... for k in range(pkt_loss_cur_estimate): # Compute the approximate time at # which the packet should have been # received if it had not been lost. loss_time = (recv_time_prev + (k + 1) * loss_interval) # If the time of this loss is more # than one RTT from the time of the # start of the current loss event, # then this is a new loss event. if (loss_time - cur_start_time >= rtt_estimate_us): # Record the number of packets # between the start of the new # loss event and the start of the # previous loss event. win_state[win][ "loss_event_intervals"].appendleft( new_start_idx - cur_start_idx) # Potentially discard an old event. if len(win_state[win] ["loss_event_intervals"]) > win: win_state[win][ "loss_event_intervals"].pop() cur_start_idx = new_start_idx cur_start_time = loss_time # Calculate the index at which the # new loss event begins. new_start_idx += 1 new = compute_weighted_average( (j + pkt_loss_total_estimate - cur_start_idx), win_state[win]["loss_event_intervals"], win_state[win]["loss_interval_weights"]) elif pkt_loss_total_estimate > 0: # There have been no losses since the last # packet, but the total loss is nonzero. # Increase the size of the current loss event. new = compute_weighted_average( j + pkt_loss_total_estimate - cur_start_idx, win_state[win]["loss_event_intervals"], win_state[win]["loss_interval_weights"]) else: # There have never been any losses, so the # loss event rate is 0. new = 0 # Record the new values of the state variables. win_state[win][ "current_loss_event_start_idx"] = cur_start_idx win_state[win][ "current_loss_event_start_time"] = cur_start_time elif "1/sqrt loss event rate" in metric: # Use the loss event rate to compute # 1 / sqrt(loss event rate). new = utils.safe_div( 1, utils.safe_sqrt(output[j][make_win_metric( "loss event rate", win)])) elif "loss rate estimate" in metric: # We do not need to check whether recv_time_prev # is -1 (unknown) because the windowed metrics # skip the case where j == 0. win_state[win]["loss_queue_estimate"], new = loss_rate( win_state[win]["loss_queue_estimate"], win_start_idx, pkt_loss_cur_estimate, recv_time_cur, recv_time_prev, win_size_us, j) elif "loss rate true" in metric: # We do not need to check whether recv_time_prev # is -1 (unknown) because the windowed metrics # skip the case where j == 0. win_state[win]["loss_queue_true"], new = loss_rate( win_state[win]["loss_queue_true"], win_start_idx, pkt_loss_cur_true, recv_time_cur, recv_time_prev, win_size_us, j) elif "queue occupancy" in metric: # Queue occupancy is calculated using the router # logs, below. continue elif "mathis model throughput p/s" in metric: # Use the loss event rate to compute the Mathis # model fair throughput. loss_rate_estimate = (pkt_loss_total_estimate / j if j > 0 else -1) new = utils.safe_div( MATHIS_C, utils.safe_div( utils.safe_mul( min_rtt_us, utils.safe_sqrt(loss_rate_estimate)), 1e6)) elif "mathis model label" in metric: # Use the current throughput and Mathis model # fair throughput to compute the Mathis model # label. new = utils.safe_mathis_label( output[j][make_win_metric("average throughput p/s", win)], output[j][make_win_metric( "mathis model throughput p/s", win)]) else: raise Exception(f"Unknown windowed metric: {metric}") output[j][metric] = new unfair_flws.append(output) # Save memory by explicitly deleting the sent and received packets # after they have been parsed. This happens outside of the above # for-loop because only the last iteration's sent and received # packets are not automatically cleaned up by now (they go out of # scope when the sent_pkts and recv_pkts variables are overwritten # by the next loop). del sent_pkts del recv_pkts # Process pcap files from the bottleneck router to determine queue # occupency. Packet lists are of tuples of the form: # (seq, sender, timestamp us, timestamp option) router_pkts = utils.parse_packets(path.join(sim_dir, f"{sim.name}-1-0.pcap"), sim.payload_B, direction="data") # State pertaining to each flow. flw_state = { flw: { # Index of the output array where the queue occupency # results should be appended. "output_idx": 0, # The number of other flows' packets that have arrived # since the last packet for this flow. "packets_since_last": 0, # The number of packets from this flow currently in the # window. "window_flow_packets": {win: 0 for win in WINDOWS} } for flw in range(sim.unfair_flws) } # The index of the first packet in the window, for every window # size. win_start_idxs = {win: 0 for win in WINDOWS} # Loop over all of the packets receiver by the bottleneck # router. Note that we process all flows at once. for j, router_pkt in enumerate(router_pkts): _, sender, curr_time, _ = router_pkt # Process only packets that are part of one of the unfair # flows. Discard packets that did not make it to the receiver # (e.g., at the end of the experiment). if (sender < sim.unfair_flws and flw_state[sender]["output_idx"] < unfair_flws[sender].shape[0]): # We cannot move this above the if-statement condition # because it is valid only if sender < sim.unfair_flws. output_idx = flw_state[sender]["output_idx"] # EWMA metrics. for (metric, _), alpha in itertools.product(EWMAS, ALPHAS): metric = make_ewma_metric(metric, alpha) if "interarrival time us" in metric: # The interarrival time is calculated using the # sender and/or receiver logs, above. continue if "throughput p/s" in metric: # The throughput is calculated using the sender # and/or receiver logs, above. continue if "RTT estimate us" in metric: # The RTT is calculated using the sender and/or # receiver logs, above. continue if "RTT estimate ratio" in metric: # The RTT ratio is calculated using the sender # and/or receiver logs, above. continue if "RTT true us" in metric: # The RTT is calculated using the sender and/or # receiver logs, above. continue if "RTT true ratio" in metric: # The RTT ratio is calculated using the sender # and/or receiver logs, above. continue if "loss rate estimate" in metric: # The estiamted loss rate is calculated using the # sender and/or receiver logs, above. continue if "loss rate true" in metric: # The true loss rate is calculated using the # sender and/or receiver logs, above. continue if "queue occupancy" in metric: # The instanteneous queue occupancy is 1 divided # by the number of packets that have entered the # queue since the last packet from the same # flow. This is the fraction of packets added to # the queue corresponding to this flow, over the # time since when the flow's last packet arrived. new = utils.safe_div( 1, flw_state[sender]["packets_since_last"]) elif "mathis model throughput p/s" in metric: # The Mathis model fair throughput is calculated # using the sender and/or receiver logs, above. continue elif "mathis model label" in metric: # The Mathis model label is calculated using the # sender and/or receiver logs, above. continue else: raise Exception(f"Unknown EWMA metric: {metric}") unfair_flws[sender][output_idx][metric] = ( utils.safe_update_ewma( unfair_flws[sender][output_idx - 1][metric], new, alpha)) # Windowed metrics. for (metric, _), win in itertools.product(WINDOWED, WINDOWS): metric = make_win_metric(metric, win) if "average interarrival time us" in metric: # The average interarrival time is calculated # using the sender and/or receiver logs, above. continue if "average throughput p/s" in metric: # The average throughput is calculated using the # sender and/or receiver logs, above. continue if "average RTT estimate us" in metric: # The average RTT is calculated using the sender # and/or receiver logs, above. continue if "average RTT estimate ratio" in metric: # The average RTT ratio is calculated using the # sender and/or receiver logs, above. continue if "average RTT true us" in metric: # The average RTT is calculated using the sender # and/or receiver logs, above. continue if "average RTT true ratio" in metric: # The average RTT ratio is calculated using the # sender and/or receiver logs, above. continue if "loss event rate" in metric: # The loss event rate is calcualted using the # sender and/or receiver logs, above. continue if "1/sqrt loss event rate" in metric: # The reciprocal of the square root of the loss # event rate is calculated using the sender and/or # receiver logs, above. continue if "loss rate estimate" in metric: # The estimated loss rate is calcualted using the # sender and/or reciever logs, above. continue if "loss rate true" in metric: # The true loss rate is calculated using the # sender and/or receiver logs, above. continue if "queue occupancy" in metric: win_start_idx = win_start_idxs[win] # By definition, the window now contains one more # packet from this flow. win_flw_pkts = ( flw_state[sender]["window_flow_packets"][win] + 1) # The current length of the window. win_cur_us = curr_time - router_pkts[win_start_idx][2] # Extract the RTT estimate. rtt_estimate_us = unfair_flws[sender][output_idx][ make_win_metric("average RTT estimate us", win)] if rtt_estimate_us == -1: # The RTT estimate is -1 (unknown), so we # cannot calculate the size of the window. We # must record the new value of # "window_flow_packets". flw_state[sender]["window_flow_packets"][ win] = win_flw_pkts continue # Calculate the target length of the window. win_target_us = win * rtt_estimate_us # If the current window size is greater than the # target window size, then shrink the window. while win_cur_us > win_target_us: # If the packet that will be removed from # the window is from this flow, then we # need to decrease our record of the # number of this flow's packets in the # window by one. if router_pkts[win_start_idx][1] == sender: win_flw_pkts -= 1 # Move the start of the window forward. win_start_idx += 1 win_cur_us = curr_time - router_pkts[win_start_idx][2] # If the current window size is smaller than the # target window size, then grow the window. while (win_start_idx > 0 and win_cur_us < win_target_us): # Move the start of the window backward. win_start_idx -= 1 win_cur_us = curr_time - router_pkts[win_start_idx][2] # If the new packet that was added to the # window is from this flow, then we need # to increase our record of the number of # this flow's packets in the window by # one. if router_pkts[win_start_idx][1] == sender: win_flw_pkts += 1 # The queue occupancy is the number of this flow's # packets in the window divided by the total # number of packets in the window. new = win_flw_pkts / (j - win_start_idx + 1) # Record the new values of the state variables. win_start_idxs[win] = win_start_idx flw_state[sender]["window_flow_packets"][ win] = win_flw_pkts elif "mathis model throughput p/s" in metric: # The Mathis model fair throughput is calculated # using the sender and/or receiver logs, above. continue elif "mathis model label" in metric: # The Mathis model label is calculated using the # sender and/or receiver logs, above. continue else: raise Exception(f"Unknown windowed metric: {metric}") unfair_flws[sender][output_idx][metric] = new flw_state[sender]["output_idx"] += 1 # For the current packet's flow, the number of packets # since the last packet in this flow is now 1. flw_state[sender]["packets_since_last"] = 1 # For each unfair flow except the current packet's flow, # increment the number of packets since the last packet from # that flow. for flw in range(sim.unfair_flws): if flw != sender: flw_state[flw]["packets_since_last"] += 1 # Determine if there are any NaNs or Infs in the results. For the # results for each unfair flow, look through all features # (columns) and make a note of the features that bad # values. Flatten these lists of feature names, using a set # comprehension to remove duplicates. bad_fets = { fet for flw_dat in unfair_flws for fet in flw_dat.dtype.names if not np.isfinite(flw_dat[fet]).all() } if bad_fets: print(f" Simulation {sim_dir} has NaNs of Infs in features: " f"{bad_fets}") # Save the results. if path.exists(out_flp): print(f" Output already exists: {out_flp}") else: print(f" Saving: {out_flp}") np.savez_compressed( out_flp, **{str(k + 1): v for k, v in enumerate(unfair_flws)})
def calc_avg(self, video): return utils.safe_div(sum(self.iouList[video]), len(self.iouList[video]))
had_data_title = "Had data (taken from SM MC)" had_data_raw = copy(had_data) if use_data is True: had_data = had_data / diff had_data_title = "Had data (trigger eff corrected)" tab.newpage("Hadronic trigger and data") tab.add_table(diff, "Rebinned had trigger effs") tab.add_table(had_data_raw, "Had data raw") tab.add_table(had_data, had_data_title) tab.newpage("Hadronic data and MC yields") tab.add_table(had_data, had_data_title) tab.add_table(had_sm, "Had SM MC") tab.add_table(utils.safe_div(had_data, had_sm), "Had data / SM MC") tab.add_table(utils.safe_div(had_data, had_ewk), "Had data / EWK MC") tab.newpage("Hadronic data and MC yields") had_data_raw = copy(had_data) if use_data is True: had_data = had_data / diff tab.add_table(had_data, "Had data (trigger eff corrected)") else: tab.add_table(had_data, "Had data (taken from SM MC)") tab.add_table(had_sm, "Had SM MC") tab.add_table(utils.safe_div(had_data, had_sm), "Had data / SM MC") tab.newpage("Hadronic EWK and QCD yields from MC") tab.add_table(had_ewk, "Had EWK MC") tab.add_table(had_qcd, "Had QCD MC")
def update_block_states(self, y_true_block, y_pred_block): sed_true, doa_true = y_true_block sed_pred, doa_pred = y_pred_block sed_pred = tf.cast(sed_pred > 0.5, sed_pred.dtype) # change doa shape from [..., n_classes*3] to [..., n_classes, 3] doa_true = tf.reshape(doa_true, (*doa_true.shape[:-1], 3, -1)) doa_pred = tf.reshape(doa_pred, (*doa_pred.shape[:-1], 3, -1)) perm = [ *range(doa_true.ndim - 2), doa_true.ndim - 1, doa_true.ndim - 2 ] doa_true = tf.transpose(doa_true, perm=perm) doa_pred = tf.transpose(doa_pred, perm=perm) # whether a particular class exists in a block # true_classes, pred_classes: [..., n_frames, n_classes] shaped Tensor true_classes = tf.math.reduce_max(sed_true, axis=-2, keepdims=True) pred_classes = tf.math.reduce_max(sed_pred, axis=-2, keepdims=True) self.Nref += tf.math.reduce_sum(true_classes) self.Nsys += tf.math.reduce_sum(pred_classes) self.TN += tf.math.reduce_sum((1 - true_classes) * (1 - pred_classes)) false_negative = true_classes * (1 - pred_classes) false_positive = (1 - true_classes) * pred_classes self.FN += tf.math.reduce_sum(false_negative) self.FP += tf.math.reduce_sum(false_positive) loc_FN = tf.math.reduce_sum(false_negative, axis=(-2, -1)) loc_FP = tf.math.reduce_sum(false_positive, axis=(-2, -1)) ''' when a class exists in both y_true and y_pred ''' true_positives = true_classes * pred_classes frames_true = sed_true * true_positives frames_pred = sed_pred * true_positives frames_matched = frames_true * frames_pred # [..., 1, n_classes] total_matched_frames = tf.reduce_sum(frames_matched, axis=-2, keepdims=True) matched_frames_exist = tf.cast(total_matched_frames > 0, total_matched_frames.dtype) self.DE_TP += tf.math.reduce_sum(matched_frames_exist) false_negative = true_positives * (1 - matched_frames_exist) self.FN += tf.math.reduce_sum(false_negative) loc_FN += tf.math.reduce_sum(false_negative, axis=(-2, -1)) # [..., n_frames, n_classes] angular_distances = distance_between_cartesian_coordinates( doa_true * tf.expand_dims(frames_matched, -1), doa_pred * tf.expand_dims(frames_matched, -1)) average_distances = safe_div( tf.reduce_sum(angular_distances, -2, keepdims=True), total_matched_frames) self.total_DE += tf.reduce_sum(average_distances) close_angles = tf.cast(average_distances <= self.doa_threshold, average_distances.dtype) self.TP += tf.reduce_sum(close_angles * matched_frames_exist) false_negative = (1 - close_angles) * matched_frames_exist self.FN += tf.reduce_sum(false_negative) loc_FN += tf.reduce_sum(false_negative, axis=(-2, -1)) self.S += tf.reduce_sum(tf.math.minimum(loc_FP, loc_FN)) self.D += tf.reduce_sum(tf.math.maximum(0, loc_FN - loc_FP)) self.I += tf.reduce_sum(tf.math.maximum(0, loc_FP - loc_FN))
def __init__(self, team_stats=None): self.goals = 0 self.ball_possession = 0 self.own_half_ball_losses = 0 self.opponent_half_ball_recoveries = 0 self.own_half_ball_recoveries = 0 self.successful_tackles = 0 self.fouls = 0 self.yellow_cards = 0 self.red_cards = 0 self.penalty_kick_goals = 0 self.shots_on_goal = 0 self.shots_inside_the_area = 0 self.shots_outside_the_area = 0 self.shots_on_target = 0 self.shots_off_target = 0 self.shots_after_left_side_attacks = 0 self.shots_after_center_attacks = 0 self.shots_after_right_side_attacks = 0 self.direct_crosses_into_the_area = 0 self.attacking_passes = 0 self.key_passes = 0 self.air_challenges_won = 0 self.ground_challenges_won = 0 self.dribbles_won = 0 if team_stats is None: return # Ball Possession tmp_lost_balls = team_stats.get('lostBall', 0) tmp_own_half_lost_ball = team_stats.get('ownHalfLostBall', 0) tmp_ball_possession = team_stats.get('ballPossession', 0) tmp_ball_recovery = team_stats.get('ballRecovery', 0) tmp_ball_recovery_in_opponent_half = team_stats.get( 'ballRecoveryInOppHalf', 0) tmp_ball_recovery_in_own_half = team_stats.get('ballRecoveryInOwnHalf', 0) ball_possession = tmp_ball_possession / 100 own_half_ball_losses = safe_div(tmp_own_half_lost_ball, tmp_lost_balls) opponent_half_ball_recoveries = safe_div( tmp_ball_recovery_in_opponent_half, tmp_ball_recovery) own_half_ball_recoveries = safe_div(tmp_ball_recovery_in_own_half, tmp_ball_recovery) # Cards tmp_tackles = team_stats.get('tackles', 0) tmp_successful_tackles = team_stats.get('tacklesSuccess', 0) tmp_fouls = team_stats.get('foul', 0) tmp_yellow_cards = team_stats.get('YellowCard', 0) tmp_red_cards = team_stats.get('RedCard', 0) successful_tackles = safe_div(tmp_successful_tackles, tmp_tackles) fouls = safe_div(tmp_fouls, tmp_tackles) yellow_cards = safe_div(tmp_yellow_cards, tmp_fouls) red_cards = safe_div(tmp_red_cards, tmp_fouls) # Penalties tmp_penalty_kicks = team_stats.get('PenaltyKick', 0) tmp_penalty_kick_goals = team_stats.get('PenaltyShot_Goal', 0) # tmp_missed_penalty = team_stats.get('MissedPenalty', 0) penalty_kick_goals = safe_div(tmp_penalty_kick_goals, tmp_penalty_kicks) # Goals tmp_regular_goals = team_stats.get('GoalRegular', 0) tmp_attempts_on_goal = team_stats.get('AttemptonGoal', 0) tmp_shots_inside_the_area = team_stats.get('ShotInsidetheArea', 0) tmp_shots_outside_the_area = team_stats.get('ShotOutsidetheArea', 0) tmp_shots_on_target = team_stats.get('OnTarget', 0) tmp_shots_off_target = team_stats.get('missedShot', 0) # tmp_blocked_shots = team_stats.get('blockedShot', 0) tmp_left_side_attacks = team_stats.get('leftSideAttack', 0) tmp_left_side_attacks_with_shot = team_stats.get( 'leftSideAttackWithShot', 0) tmp_center_attacks = team_stats.get('centerAttack', 0) tmp_center_attacks_with_shot = team_stats.get('centerAttackWithShot', 0) tmp_right_side_attacks = team_stats.get('rightSideAttack', 0) tmp_right_side_attacks_with_shot = team_stats.get( 'rightSideAttackWithShot', 0) shots_on_goal = safe_div(tmp_regular_goals, tmp_attempts_on_goal) shots_inside_the_area = safe_div(tmp_shots_inside_the_area, tmp_attempts_on_goal) shots_outside_the_area = safe_div(tmp_shots_outside_the_area, tmp_attempts_on_goal) shots_on_target = safe_div(tmp_shots_on_target, tmp_attempts_on_goal) shots_off_target = safe_div(tmp_shots_off_target, tmp_attempts_on_goal) shots_after_left_side_attacks = safe_div( tmp_left_side_attacks_with_shot, tmp_left_side_attacks) shots_after_center_attacks = safe_div(tmp_center_attacks_with_shot, tmp_center_attacks) shots_after_right_side_attacks = safe_div( tmp_right_side_attacks_with_shot, tmp_right_side_attacks) # Crossing tmp_crosses = team_stats.get('Cross', 0) tmp_direct_crosses_into_the_area = team_stats.get( 'DirectCrossintotheArea', 0) # tmp_headers = team_stats.get('Header', 0) direct_crosses_into_the_area = safe_div( tmp_direct_crosses_into_the_area, tmp_crosses) # Passing tmp_passes = team_stats.get('passes', 0) # tmp_non_attacking_passes = team_stats.get('nonAttackingPasses', 0) tmp_attacking_passes = team_stats.get('attackingPasses', 0) # tmp_accurate_passes = team_stats.get('accuratePasses', 0) tmp_key_passes = team_stats.get('keyPasses', 0) # tmp_long_balls = team_stats.get('longBall', 0) # tmp_accurate_long_balls = team_stats.get('accurateLongBall', 0) attacking_passes = safe_div(tmp_attacking_passes, tmp_passes) key_passes = safe_div(tmp_key_passes, tmp_attacking_passes) # Challenges tmp_air_challenges = team_stats.get('airChallenge', 0) tmp_air_challenges_won = team_stats.get('wonAirChallenge', 0) tmp_ground_challenges = team_stats.get('groundChallenge', 0) tmp_ground_challenges_won = team_stats.get('wonGroundChallenge', 0) tmp_dribbles = team_stats.get('dribble', 0) tmp_dribbles_won = team_stats.get('wonDribble', 0) air_challenges_won = safe_div(tmp_air_challenges_won, tmp_air_challenges) ground_challenges_won = safe_div(tmp_ground_challenges_won, tmp_ground_challenges) dribbles_won = safe_div(tmp_dribbles_won, tmp_dribbles) self.goals = tmp_regular_goals self.ball_possession = ball_possession self.own_half_ball_losses = own_half_ball_losses self.opponent_half_ball_recoveries = opponent_half_ball_recoveries self.own_half_ball_recoveries = own_half_ball_recoveries self.successful_tackles = successful_tackles self.fouls = fouls self.yellow_cards = yellow_cards self.red_cards = red_cards self.penalty_kick_goals = penalty_kick_goals self.shots_on_goal = shots_on_goal self.shots_inside_the_area = shots_inside_the_area self.shots_outside_the_area = shots_outside_the_area self.shots_on_target = shots_on_target self.shots_off_target = shots_off_target self.shots_after_left_side_attacks = shots_after_left_side_attacks self.shots_after_center_attacks = shots_after_center_attacks self.shots_after_right_side_attacks = shots_after_right_side_attacks self.direct_crosses_into_the_area = direct_crosses_into_the_area self.attacking_passes = attacking_passes self.key_passes = key_passes self.air_challenges_won = air_challenges_won self.ground_challenges_won = ground_challenges_won self.dribbles_won = dribbles_won
def add_player_stats(self, player_stats: Player): self.own_half_ball_losses += safe_div(player_stats.own_half_lost_ball, player_stats.lost_ball) / 11 self.opponent_half_ball_recoveries += safe_div( player_stats.ball_recovery_in_opp_half, player_stats.ball_recovery) / 11 self.own_half_ball_recoveries += safe_div( player_stats.ball_recovery_in_own_half, player_stats.ball_recovery) / 11 self.successful_tackles += safe_div(player_stats.tackles_success, player_stats.tackles) / 11 self.fouls += safe_div(player_stats.foul, player_stats.tackles) / 11 self.yellow_cards += safe_div(player_stats.yellow_card, player_stats.foul) / 11 self.red_cards += safe_div(player_stats.red_card, player_stats.foul) / 11 self.penalty_kick_goals += safe_div(player_stats.penalty_shot_goal, player_stats.penalty_kick) / 11 self.shots_on_goal += safe_div(player_stats.regular_goals, player_stats.attempts_on_goal) / 11 self.shots_inside_the_area += safe_div( player_stats.shots_inside_the_area, player_stats.attempts_on_goal) / 11 self.shots_outside_the_area += safe_div( player_stats.shots_outside_the_area, player_stats.attempts_on_goal) / 11 self.shots_on_target += safe_div(player_stats.shots_on_target, player_stats.attempts_on_goal) / 11 self.shots_off_target += safe_div(player_stats.shots_off_target, player_stats.attempts_on_goal) / 11 self.shots_after_right_side_attacks += safe_div( player_stats.right_side_attacks_with_shot, player_stats.right_side_attacks) / 11 self.shots_after_center_attacks += safe_div( player_stats.center_attacks_with_shot, player_stats.center_attacks) / 11 self.shots_after_left_side_attacks += safe_div( player_stats.left_side_attacks_with_shot, player_stats.left_side_attacks) / 11 self.direct_crosses_into_the_area += safe_div( player_stats.direct_crosses_into_the_area, player_stats.crosses) / 11 self.attacking_passes += safe_div(player_stats.attacking_passes, player_stats.passes) / 11 self.key_passes += safe_div(player_stats.key_passes, player_stats.attacking_passes) / 11 self.air_challenges_won += safe_div(player_stats.air_challenge_won, player_stats.air_challenge) / 11 self.ground_challenges_won += safe_div( player_stats.ground_challenge_won, player_stats.ground_challenge) / 11 self.dribbles_won += safe_div(player_stats.won_dribbles, player_stats.dribbles) / 11
tab.add_table(effs[index][3], "Had trigger effs (stat. uncert.)") tab.alphat_bins(alphat_bins) tab.mhtmet_bins(mhtmet_bins) tab.add_table(diff, "Rebinned had trigger effs (stat. uncert.)") #tab.add_table(effs[index][5],"Had trigger syst. uncert.") #tab.add_table(mu_effs*mu_syst,"Muon trigger effs (total uncert.)") tab.newpage("Hadronic data and MC yields") had_data_raw = copy(had_data) if use_data is True: had_data = had_data / diff tab.add_table(had_data, "Had data (trigger eff corrected)") else: tab.add_table(had_data, "Had data (taken from SM MC)") tab.add_table(had_sm, "Had SM MC (scaled to correct lumi)") tab.add_table(utils.safe_div(had_data, had_sm), "Had data / SM MC (stat. uncert.)") tab.newpage("Hadronic EWK and QCD yields from MC") tab.add_table(had_ewk, "Had EWK MC (scaled to correct lumi)") tab.add_table(had_qcd, "Had QCD MC (scaled to correct lumi)") tab.add_table(utils.safe_div(had_qcd, had_sm), "Fraction QCD/SM MC (scaled to correct lumi)") tab.newpage("Muon data and MC yields") mu_data_raw = copy(mu_data) if use_data is True: mu_data = mu_data / (mu_effs * mu_syst) tab.add_table(mu_data, "Mu data (trigger eff corrected)") else: tab.add_table(mu_data, "Mu data (taken from SM MC)")
def ewk_tf( signal, control ) : return utils.safe_div(signal,control)
tab.add_table(effs[index][3],"Had trigger effs (stat. uncert.)") tab.alphat_bins(alphat_bins) tab.mhtmet_bins(mhtmet_bins) tab.add_table(diff,"Rebinned had trigger effs (stat. uncert.)") #tab.add_table(effs[index][5],"Had trigger syst. uncert.") #tab.add_table(mu_effs*mu_syst,"Muon trigger effs (total uncert.)") tab.newpage("Hadronic data and MC yields") had_data_raw = copy(had_data) if use_data is True : had_data = had_data/diff tab.add_table(had_data,"Had data (trigger eff corrected)") else : tab.add_table(had_data,"Had data (taken from SM MC)") tab.add_table(had_sm,"Had SM MC (scaled to correct lumi)") tab.add_table(utils.safe_div(had_data,had_sm),"Had data / SM MC (stat. uncert.)") tab.newpage("Hadronic EWK and QCD yields from MC") tab.add_table(had_ewk,"Had EWK MC (scaled to correct lumi)") tab.add_table(had_qcd,"Had QCD MC (scaled to correct lumi)") tab.add_table(utils.safe_div(had_qcd,had_sm),"Fraction QCD/SM MC (scaled to correct lumi)") tab.newpage("Muon data and MC yields") mu_data_raw = copy(mu_data) if use_data is True : mu_data = mu_data/(mu_effs*mu_syst) tab.add_table(mu_data,"Mu data (trigger eff corrected)") else : tab.add_table(mu_data,"Mu data (taken from SM MC)") tab.add_table(mu_sm,"Mu SM MC (scaled to correct lumi)") tab.add_table(utils.safe_div(mu_data,mu_sm),"Mu data / SM MC (stat. uncert.)")
def parse_opened_exp(exp, exp_flp, exp_dir, out_flp, skip_smoothed): """ Parses an experiment. Returns the smallest safe window size. """ print(f"Parsing: {exp_flp}") if exp.name.startswith("FAILED"): print(f"Error: Experimant failed: {exp_flp}") return -1 if exp.tot_flws == 0: print(f"Error: No flows to analyze in: {exp_flp}") return -1 # Determine flow src and dst ports. params_flp = path.join(exp_dir, f"{exp.name}.json") if not path.exists(params_flp): print(f"Error: Cannot find params file ({params_flp}) in: {exp_flp}") return -1 with open(params_flp, "r") as fil: params = json.load(fil) # Dictionary mapping a flow to its flow's CCA. Each flow is a tuple of the # form: (client port, server port) # # { (client port, server port): CCA } flw_to_cca = {(client_port, flw[4]): flw[0] for flw in params["flowsets"] for client_port in flw[3]} flws = list(flw_to_cca.keys()) client_pcap = path.join(exp_dir, f"client-tcpdump-{exp.name}.pcap") server_pcap = path.join(exp_dir, f"server-tcpdump-{exp.name}.pcap") if not (path.exists(client_pcap) and path.exists(server_pcap)): print(f"Warning: Missing pcap file in: {exp_flp}") return -1 flw_to_pkts_client = utils.parse_packets(client_pcap, flw_to_cca) flw_to_pkts_server = utils.parse_packets(server_pcap, flw_to_cca) # Determine the path to the bottleneck queue log file. toks = exp.name.split("-") q_log_flp = path.join( exp_dir, "-".join(toks[:-1]) + "-forward-bottleneckqueue-" + toks[-1] + ".log") q_log = None if path.exists(q_log_flp): q_log = list(enumerate(utils.parse_queue_log(q_log_flp))) # Transform absolute times into relative times to make life easier. # # Determine the absolute earliest time observed in the experiment. earliest_time_us = min(first_time_us for bounds in [ get_time_bounds(flw_to_pkts_client, direction="data"), get_time_bounds(flw_to_pkts_client, direction="ack"), get_time_bounds(flw_to_pkts_server, direction="data"), get_time_bounds(flw_to_pkts_server, direction="ack") ] for first_time_us, _ in bounds) # Subtract the earliest time from all times. for flw in flws: flw_to_pkts_client[flw][0][ features.ARRIVAL_TIME_FET] -= earliest_time_us flw_to_pkts_client[flw][1][ features.ARRIVAL_TIME_FET] -= earliest_time_us flw_to_pkts_server[flw][0][ features.ARRIVAL_TIME_FET] -= earliest_time_us flw_to_pkts_server[flw][1][ features.ARRIVAL_TIME_FET] -= earliest_time_us assert (flw_to_pkts_client[flw][0][features.ARRIVAL_TIME_FET] >= 0).all() assert (flw_to_pkts_client[flw][1][features.ARRIVAL_TIME_FET] >= 0).all() assert (flw_to_pkts_server[flw][0][features.ARRIVAL_TIME_FET] >= 0).all() assert (flw_to_pkts_server[flw][1][features.ARRIVAL_TIME_FET] >= 0).all() flws_time_bounds = get_time_bounds(flw_to_pkts_server, direction="data") # Process PCAP files from senders and receivers. # The final output, with one entry per flow. flw_results = {} # Keep track of the number of erroneous throughputs (i.e., higher than the # experiment bandwidth) for each window size. win_to_errors = {win: 0 for win in features.WINDOWS} # Create the (super-complicated) dtype. The dtype combines each metric at # multiple granularities. dtype = (features.REGULAR + ([] if skip_smoothed else features.make_smoothed_features())) for flw_idx, flw in enumerate(flws): cca = flw_to_cca[flw] # Copa and PCC Vivace use packet-based sequence numbers as opposed to # TCP's byte-based sequence numbers. packet_seq = cca in {"copa", "vivace"} snd_data_pkts, snd_ack_pkts = flw_to_pkts_client[flw] recv_data_pkts, recv_ack_pkts = flw_to_pkts_server[flw] first_data_time_us = recv_data_pkts[0][features.ARRIVAL_TIME_FET] # The final output. -1 implies that a value could not be calculated. output = np.full(len(recv_data_pkts), -1, dtype=dtype) # If this flow does not have any packets, then skip it. skip = False if snd_data_pkts.shape[0] == 0: skip = True print(f"Warning: No data packets sent for flow {flw_idx} in: " f"{exp_flp}") if recv_data_pkts.shape[0] == 0: skip = True print(f"Warning: No data packets received for flow {flw_idx} in: " f"{exp_flp}") if recv_ack_pkts.shape[0] == 0: skip = True print(f"Warning: No ACK packets sent for flow {flw_idx} in: " f"{exp_flp}") if skip: flw_results[flw] = output continue # State that the windowed metrics need to track across packets. win_state = { win: { # The index at which this window starts. "window_start_idx": 0, # The "loss event rate". "loss_interval_weights": make_interval_weight(8), "loss_event_intervals": collections.deque(), "current_loss_event_start_idx": 0, "current_loss_event_start_time": 0, } for win in features.WINDOWS } # Total number of packet losses up to the current received # packet. pkt_loss_total_estimate = 0 # Loss rate estimation. prev_seq = None prev_payload_B = None highest_seq = None # Use for Copa RTT estimation. snd_ack_idx = 0 snd_data_idx = 0 # Use for TCP and PCC Vivace RTT estimation. recv_ack_idx = 0 # Track which packets are definitely retransmissions. Ignore these # packets when estimating the RTT. Note that because we are doing # receiver-side retransmission tracking, it is possible that there are # other retransmissions that we cannot detect. # # All sequence numbers that have been received. unique_pkts = set() # Sequence numbers that have been received multiple times. retrans_pkts = set() for j, recv_pkt in enumerate(recv_data_pkts): if j % 1000 == 0: print(f"\tFlow {flw_idx + 1}/{exp.tot_flws}: " f"{j}/{len(recv_data_pkts)} packets") # Whether this is the first packet. first = j == 0 # Note that Copa and Vivace use packet-level sequence numbers # instead of TCP's byte-level sequence numbers. recv_seq = recv_pkt[features.SEQ_FET] output[j][features.SEQ_FET] = recv_seq retrans = (recv_seq in unique_pkts or (prev_seq is not None and prev_payload_B is not None and (prev_seq + (1 if packet_seq else prev_payload_B)) > recv_seq)) if retrans: # If this packet is a multiple retransmission, then this line # has no effect. retrans_pkts.add(recv_seq) # If this packet has already been seen, then this line has no # effect. unique_pkts.add(recv_seq) recv_time_cur_us = recv_pkt[features.ARRIVAL_TIME_FET] output[j][features.ARRIVAL_TIME_FET] = recv_time_cur_us payload_B = recv_pkt[features.PAYLOAD_FET] wirelen_B = recv_pkt[features.WIRELEN_FET] output[j][features.PAYLOAD_FET] = payload_B output[j][features.WIRELEN_FET] = wirelen_B output[j][features.TOTAL_SO_FAR_FET] = ( (0 if first else output[j - 1][features.TOTAL_SO_FAR_FET]) + wirelen_B) output[j][features.PAYLOAD_SO_FAR_FET] = ( (0 if first else output[j - 1][features.PAYLOAD_SO_FAR_FET]) + payload_B) # Count how many flows were active when this packet was captured. active_flws = sum( 1 for first_time_us, last_time_us in flws_time_bounds if first_time_us <= recv_time_cur_us <= last_time_us) assert active_flws > 0, \ (f"Error: No active flows detected for packet {j} of " f"flow {flw_idx} in: {exp_flp}") output[j][features.ACTIVE_FLOWS_FET] = active_flws output[j][features.BW_FAIR_SHARE_FRAC_FET] = utils.safe_div( 1, active_flws) output[j][features.BW_FAIR_SHARE_BPS_FET] = utils.safe_div( exp.bw_bps, active_flws) # Calculate RTT-related metrics. rtt_us = -1 if not first and recv_seq != -1 and not retrans: if cca == "copa": # In a Copa ACK, the sender timestamp is the time at which # the corresponding data packet was sent. The receiver # timestamp is the time that the data packet was received # and the ACK was sent. This enables sender-side RTT # estimation. However, because the sender does not echo a # value back to the receiver, this cannot be used for # receiver-size RTT estimation. # # For now, we will just do sender-side RTT estimation. When # selecting which packets to use for the RTT estimate, we # will select the packet/ACK pair whose ACK arrived soonest # before packet j was sent. This means that the sender would # have been able to calculate this RTT estimate before # sending packet j, and could very well have included the # RTT estimate in packet j's header. # # First, find the index of the ACK that was received soonest # before packet j was sent. snd_ack_idx = utils.find_bound( snd_ack_pkts[features.SEQ_FET], recv_seq, snd_ack_idx, snd_ack_pkts.shape[0] - 1, which="before") snd_ack_seq = snd_ack_pkts[snd_ack_idx][features.SEQ_FET] # Then, find this ACK's data packet. snd_data_seq = snd_data_pkts[snd_data_idx][ features.SEQ_FET] while snd_data_idx < snd_data_pkts.shape[0]: snd_data_seq = snd_data_pkts[snd_data_idx][ features.SEQ_FET] if snd_data_seq == snd_ack_seq: # Third, the RTT is the difference between the # sending time of the data packet and the arrival # time of its ACK. rtt_us = (snd_ack_pkts[snd_ack_idx][ features.ARRIVAL_TIME_FET] - snd_data_pkts[snd_data_idx][ features.ARRIVAL_TIME_FET]) assert rtt_us >= 0, \ (f"Error: Calculated negative RTT ({rtt_us} " f"us) for packet {j} of flow {flw} in: " f"{exp_flp}") break snd_data_idx += 1 elif cca == "vivace": # UDT ACKs may contain the RTT. Find the last ACK to be sent # by the receiver before packet j was received. recv_ack_idx = utils.find_bound( recv_ack_pkts[features.ARRIVAL_TIME_FET], recv_time_cur_us, recv_ack_idx, recv_ack_pkts.shape[0] - 1, which="before") udt_rtt_us = recv_ack_pkts[recv_ack_idx][features.TS_1_FET] if udt_rtt_us > 0: # The RTT is an optional field in UDT ACK packets. I # assume that this means that if the RTT is not # included, then the field will be 0. rtt_us = udt_rtt_us else: # This is a TCP flow. Do receiver-side RTT estimation using # the TCP timestamp option. Attempt to find a new RTT # estimate. Move recv_ack_idx to the first occurance of the # timestamp option TSval corresponding to the current # packet's TSecr. recv_ack_idx_old = recv_ack_idx tsval = recv_ack_pkts[recv_ack_idx][features.TS_1_FET] tsecr = recv_pkt[features.TS_2_FET] while recv_ack_idx < recv_ack_pkts.shape[0]: tsval = recv_ack_pkts[recv_ack_idx][features.TS_1_FET] if tsval == tsecr: # If we found a timestamp option match, then update # the RTT estimate. rtt_us = (recv_time_cur_us - recv_ack_pkts[recv_ack_idx][ features.ARRIVAL_TIME_FET]) break recv_ack_idx += 1 else: # If we never found a matching tsval, then use the # previous RTT estimate and reset recv_ack_idx to search # again on the next packet. rtt_us = output[j - 1][features.RTT_FET] recv_ack_idx = recv_ack_idx_old recv_time_prev_us = (-1 if first else output[j - 1][features.ARRIVAL_TIME_FET]) interarr_time_us = utils.safe_sub(recv_time_cur_us, recv_time_prev_us) output[j][features.INTERARR_TIME_FET] = interarr_time_us output[j][features.INV_INTERARR_TIME_FET] = utils.safe_mul( 8 * 1e6 * wirelen_B, utils.safe_div(1, interarr_time_us)) output[j][features.RTT_FET] = rtt_us min_rtt_us = utils.safe_min( sys.maxsize if first else output[j - 1][features.MIN_RTT_FET], rtt_us) output[j][features.MIN_RTT_FET] = min_rtt_us rtt_estimate_ratio = utils.safe_div(rtt_us, min_rtt_us) output[j][features.RTT_RATIO_FET] = rtt_estimate_ratio # Receiver-side loss rate estimation. Estimate the number of lost # packets since the last packet. Do not try anything complex or # prone to edge cases. Consider only the simple case where the last # packet and current packet are in order and not retransmissions. pkt_loss_cur_estimate = ( -1 if (recv_seq == -1 or prev_seq is None or prev_seq == -1 or prev_payload_B is None or prev_payload_B <= 0 or payload_B <= 0 or highest_seq is None or # The last packet was a retransmission. highest_seq != prev_seq or # The current packet is a retransmission. retrans) else round( (recv_seq - (1 if packet_seq else prev_payload_B) - prev_seq) / (1 if packet_seq else payload_B))) if pkt_loss_cur_estimate != -1: pkt_loss_total_estimate += pkt_loss_cur_estimate loss_rate_cur = utils.safe_div( pkt_loss_cur_estimate, utils.safe_add(pkt_loss_cur_estimate, 1)) output[j][features.PACKETS_LOST_FET] = pkt_loss_cur_estimate output[j][features.LOSS_RATE_FET] = loss_rate_cur # EWMA metrics. for (metric, _), alpha in itertools.product(features.EWMAS, features.ALPHAS): if skip_smoothed: continue metric = features.make_ewma_metric(metric, alpha) if metric.startswith(features.INTERARR_TIME_FET): new = interarr_time_us elif metric.startswith(features.INV_INTERARR_TIME_FET): # Do not use the interarrival time EWMA to calculate the # inverse interarrival time. Instead, use the true inverse # interarrival time so that the value used to update the # inverse interarrival time EWMA is not "EWMA-ified" twice. new = output[j][features.INV_INTERARR_TIME_FET] elif metric.startswith(features.RTT_FET): new = rtt_us elif metric.startswith(features.RTT_RATIO_FET): new = rtt_estimate_ratio elif metric.startswith(features.LOSS_RATE_FET): new = loss_rate_cur elif metric.startswith(features.MATHIS_TPUT_FET): # tput = (MSS / RTT) * (C / sqrt(p)) new = utils.safe_mul( utils.safe_div( utils.safe_mul(8, output[j][features.PAYLOAD_FET]), utils.safe_div(output[j][features.RTT_FET], 1e6)), utils.safe_div(MATHIS_C, utils.safe_sqrt(loss_rate_cur))) else: raise Exception(f"Unknown EWMA metric: {metric}") # Update the EWMA. If this is the first value, then use 0 are # the old value. output[j][metric] = utils.safe_update_ewma( -1 if first else output[j - 1][metric], new, alpha) # If we cannot estimate the min RTT, then we cannot compute any # windowed metrics. if min_rtt_us != -1: # Move the window start indices later in time. The min RTT # estimate will never increase, so we do not need to investigate # whether the start of the window moved earlier in time. for win in features.WINDOWS: win_state[win]["window_start_idx"] = utils.find_bound( output[features.ARRIVAL_TIME_FET], target=recv_time_cur_us - (win * min_rtt_us), min_idx=win_state[win]["window_start_idx"], max_idx=j, which="after") # Windowed metrics. for (metric, _), win in itertools.product(features.WINDOWED, features.WINDOWS): # If we cannot estimate the min RTT, then we cannot compute any # windowed metrics. if skip_smoothed or min_rtt_us == -1: continue # Calculate windowed metrics only if an entire window has # elapsed since the start of the flow. win_size_us = win * min_rtt_us if recv_time_cur_us - first_data_time_us < win_size_us: continue # A window requires at least two packets. Note that this means # the the first packet will always be skipped. win_start_idx = win_state[win]["window_start_idx"] if win_start_idx == j: continue metric = features.make_win_metric(metric, win) if metric.startswith(features.INTERARR_TIME_FET): new = utils.safe_div( utils.safe_sub( recv_time_cur_us, output[win_start_idx][features.ARRIVAL_TIME_FET]), j - win_start_idx) elif metric.startswith(features.INV_INTERARR_TIME_FET): new = utils.safe_mul( 8 * 1e6 * wirelen_B, utils.safe_div( 1, output[j][features.make_win_metric( features.INTERARR_TIME_FET, win)])) elif metric.startswith(features.TPUT_FET): # Treat the first packet in the window as the beginning of # time. Calculate the average throughput over all but the # first packet. # # Sum up the payloads of the packets in the window. total_bytes = utils.safe_sum(output[features.WIRELEN_FET], start_idx=win_start_idx + 1, end_idx=j) # Divide by the duration of the window. start_time_us = ( output[win_start_idx][features.ARRIVAL_TIME_FET] if win_start_idx >= 0 else -1) end_time_us = output[j][features.ARRIVAL_TIME_FET] tput_bps = utils.safe_div( utils.safe_mul(total_bytes, 8), utils.safe_div( utils.safe_sub(end_time_us, start_time_us), 1e6)) # If the throughput exceeds the bandwidth, then record a # warning and do not record this throughput. if tput_bps != -1 and tput_bps > exp.bw_bps: win_to_errors[win] += 1 continue elif metric.startswith(features.TPUT_SHARE_FRAC_FET): # This is calculated at the end. continue elif metric.startswith(features.TOTAL_TPUT_FET): # This is calcualted at the end. continue elif metric.startswith(features.TPUT_FAIR_SHARE_BPS_FET): # This is calculated at the end. continue elif metric.startswith(features.TPUT_TO_FAIR_SHARE_RATIO_FET): # This is calculated at the end. continue elif metric.startswith(features.RTT_FET): new = utils.safe_mean(output[features.RTT_FET], win_start_idx, j) elif metric.startswith(features.RTT_RATIO_FET): new = utils.safe_mean(output[features.RTT_RATIO_FET], win_start_idx, j) elif metric.startswith(features.LOSS_EVENT_RATE_FET): rtt_us = output[j][features.make_win_metric( features.RTT_FET, win)] if rtt_us == -1: # The RTT estimate is -1 (unknown), so we # cannot compute the loss event rate. continue cur_start_idx = win_state[win][ "current_loss_event_start_idx"] cur_start_time = win_state[win][ "current_loss_event_start_time"] if pkt_loss_cur_estimate > 0: # There was a loss since the last packet. # # The index of the first packet in the current # loss event. new_start_idx = (j + pkt_loss_total_estimate - pkt_loss_cur_estimate) if cur_start_idx == 0: # This is the first loss event. # # Naive fix for the loss event rate # calculation The described method in the # RFC is complicated for the first event # handling. cur_start_idx = 1 cur_start_time = 0 new = 1 / j else: # This is not the first loss event. See if # any of the newly-lost packets start a # new loss event. # # The average time between when packets # should have arrived, since we received # the last packet. loss_interval = ( (recv_time_cur_us - recv_time_prev_us) / (pkt_loss_cur_estimate + 1)) # Look at each lost packet... for k in range(pkt_loss_cur_estimate): # Compute the approximate time at # which the packet should have been # received if it had not been lost. loss_time = (recv_time_prev_us + (k + 1) * loss_interval) # If the time of this loss is more # than one RTT from the time of the # start of the current loss event, # then this is a new loss event. if loss_time - cur_start_time >= rtt_us: # Record the number of packets # between the start of the new # loss event and the start of the # previous loss event. win_state[win][ "loss_event_intervals"].appendleft( new_start_idx - cur_start_idx) # Potentially discard an old event. if len(win_state[win] ["loss_event_intervals"]) > win: win_state[win][ "loss_event_intervals"].pop() cur_start_idx = new_start_idx cur_start_time = loss_time # Calculate the index at which the # new loss event begins. new_start_idx += 1 new = compute_weighted_average( (j + pkt_loss_total_estimate - cur_start_idx), win_state[win]["loss_event_intervals"], win_state[win]["loss_interval_weights"]) elif pkt_loss_total_estimate > 0: # There have been no losses since the last # packet, but the total loss is nonzero. # Increase the size of the current loss event. new = compute_weighted_average( j + pkt_loss_total_estimate - cur_start_idx, win_state[win]["loss_event_intervals"], win_state[win]["loss_interval_weights"]) else: # There have never been any losses, so the # loss event rate is 0. new = 0 # Record the new values of the state variables. win_state[win][ "current_loss_event_start_idx"] = cur_start_idx win_state[win][ "current_loss_event_start_time"] = cur_start_time elif metric.startswith(features.SQRT_LOSS_EVENT_RATE_FET): # Use the loss event rate to compute # 1 / sqrt(loss event rate). new = utils.safe_div( 1, utils.safe_sqrt(output[j][features.make_win_metric( features.LOSS_EVENT_RATE_FET, win)])) elif metric.startswith(features.LOSS_RATE_FET): win_losses = utils.safe_sum( output[features.PACKETS_LOST_FET], win_start_idx + 1, j) new = utils.safe_div(win_losses, win_losses + (j - win_start_idx)) elif metric.startswith(features.MATHIS_TPUT_FET): # tput = (MSS / RTT) * (C / sqrt(p)) new = utils.safe_mul( utils.safe_div( utils.safe_mul(8, output[j][features.PAYLOAD_FET]), utils.safe_div(output[j][features.RTT_FET], 1e6)), utils.safe_div( MATHIS_C, utils.safe_sqrt(output[j][features.make_win_metric( features.LOSS_EVENT_RATE_FET, win)]))) else: raise Exception(f"Unknown windowed metric: {metric}") output[j][metric] = new prev_seq = recv_seq prev_payload_B = payload_B highest_seq = (prev_seq if highest_seq is None else max( highest_seq, prev_seq)) # In the event of sequence number wraparound, reset the sequence # number tracking. # # TODO: Test sequence number wraparound logic. if (recv_seq != -1 and recv_seq + (1 if packet_seq else payload_B) > 2**32): print( "Warning: Sequence number wraparound detected for packet " f"{j} of flow {flw} in: {exp_flp}") highest_seq = None prev_seq = None # Get the sequence number of the last received packet. last_seq = output[-1][features.SEQ_FET] if last_seq == -1: print("Warning: Unable to calculate retransmission or bottleneck " "queue drop rates due to unknown last sequence number for " f"(UDP?) flow {flw_idx} in: {exp_flp}") else: # Calculate the true number of retransmissions using the sender # traces. # # Truncate the sent packets at the last occurence of the last packet to # be received. # # Find when the last received packet was sent. Assume that if this # packet was retransmitted, then the last retransmission is the one # that arrived at the receiver (which may be an incorrect # assumption). snd_idx = len(snd_data_pkts) - 1 while snd_idx >= 0: if snd_data_pkts[snd_idx][features.SEQ_FET] == last_seq: # unique_snd_pkts, counts = np.unique( # snd_data_pkts[:snd_idx + 1][features.SEQ_FET], # return_counts=True) # unique_snd_pkts = unique_snd_pkts.tolist() # counts = counts.tolist() # all_retrans = [ # (seq, counts) # for seq, counts in zip(unique_snd_pkts, counts) # if counts > 1] # tot_pkts = snd_idx + 1 # The retransmission rate is: # 1 - unique packets / total packets. output[-1][features.RETRANS_RATE_FET] = ( 1 - # Find the number of unique sequence numbers, from the # beginning up until when the last received packet was # sent. np.unique(snd_data_pkts[:snd_idx + 1][features.SEQ_FET] ).shape[0] / # Convert from index to packet count. (snd_idx + 1)) break snd_idx -= 1 else: print("Warning: Did not find when the last received packet " f"(seq: {last_seq}) was sent for flow {flw_idx} in: " f"{exp_flp}") # Calculate the true drop rate at the bottleneck queue using the # bottleneck queue logs. client_port = flw[0] deq_idx = None drop_rate = None if q_log is None: print( f"Warning: Unable to find bottleneck queue log: {q_log_flp}" ) else: # Find the dequeue log corresponding to the last packet that was # received. for record_idx, record in reversed(q_log): if (record[0] == "deq" and record[2] == client_port and record[3] == last_seq): deq_idx = record_idx break if deq_idx is None: print("Warning: Did not find when the last received packet " f"(seq: {last_seq}) was dequeued for flow {flw_idx} in: " f"{exp_flp}") else: # Find the most recent stats log before the last received # packet was dequeued. for _, record in reversed(q_log[:deq_idx]): if record[0] == "stats" and record[1] == client_port: drop_rate = record[4] / (record[2] + record[4]) break if drop_rate is None: print( "Warning: Did not calculate the drop rate at the bottleneck " f"queue for flow {flw_idx} in: {exp_flp}") else: output[-1][features.DROP_RATE_FET] = drop_rate # Make sure that all output rows were used. used_rows = np.sum(output[features.ARRIVAL_TIME_FET] != -1) total_rows = output.shape[0] assert used_rows == total_rows, \ (f"Error: Used only {used_rows} of {total_rows} rows for flow " f"{flw_idx} in: {exp_flp}") flw_results[flw] = output # Save memory by explicitly deleting the sent and received packets # after they have been parsed. This happens outside of the above # for-loop because only the last iteration's packets are not # automatically cleaned up by now (they go out of scope when the # *_pkts variables are overwritten by the next loop). del snd_data_pkts del recv_data_pkts del recv_ack_pkts if not skip_smoothed: # Maps window the index of the packet at the start of that window. win_to_start_idx = {win: 0 for win in features.WINDOWS} # Merge the flow data into a unified timeline. combined = [] for flw in flws: num_pkts = flw_results[flw].shape[0] merged = np.empty((num_pkts, ), dtype=[(features.WIRELEN_FET, "int32"), (features.MIN_RTT_FET, "int32"), ("client port", "int32"), ("server port", "int32"), ("index", "int32")]) merged[features.WIRELEN_FET] = flw_results[flw][ features.WIRELEN_FET] merged[features.MIN_RTT_FET] = flw_results[flw][ features.MIN_RTT_FET] merged["client port"].fill(flw[0]) merged["server port"].fill(flw[1]) merged["index"] = np.arange(num_pkts) combined.append(merged) zipped_arr_times, zipped_dat = utils.zip_timeseries( [flw_results[flw][features.ARRIVAL_TIME_FET] for flw in flws], combined) for j in range(zipped_arr_times.shape[0]): min_rtt_us = zipped_dat[j][features.MIN_RTT_FET] if min_rtt_us == -1: continue for win in features.WINDOWS: # The bounds should never go backwards, so start the # search at the current bound. win_to_start_idx[win] = utils.find_bound( zipped_arr_times, target=(zipped_arr_times[j] - (win * zipped_dat[j][features.MIN_RTT_FET])), min_idx=win_to_start_idx[win], max_idx=j, which="after") # If the window's trailing edge caught up with its # leading edge, then skip this flow. if win_to_start_idx[win] >= j: continue total_tput_bps = utils.safe_div( utils.safe_mul( # Accumulate the bytes received by this flow during this # window. When calculating the average throughput, we # must exclude the first packet in the window. utils.safe_sum(zipped_dat[features.WIRELEN_FET], start_idx=win_to_start_idx[win] + 1, end_idx=j), 8 * 1e6), utils.safe_sub(zipped_arr_times[j], zipped_arr_times[win_to_start_idx[win]])) # Check if this throughput is erroneous. if total_tput_bps > exp.bw_bps: win_to_errors[win] += 1 else: # Extract the flow to which this packet belongs, as well as # its index in its flow. flw = tuple(zipped_dat[j][["client port", "server port"]].tolist()) index = zipped_dat[j]["index"] flw_results[flw][index][features.make_win_metric( features.TOTAL_TPUT_FET, win)] = total_tput_bps # Use the total throughput and the number of active flows to # calculate the throughput fair share. flw_results[flw][index][features.make_win_metric( features.TPUT_FAIR_SHARE_BPS_FET, win)] = (utils.safe_div( total_tput_bps, flw_results[flw][index][features.ACTIVE_FLOWS_FET]) ) # Divide the flow's throughput by the total throughput. tput_share = utils.safe_div( flw_results[flw][index][features.make_win_metric( features.TPUT_FET, win)], total_tput_bps) flw_results[flw][index][features.make_win_metric( features.TPUT_SHARE_FRAC_FET, win)] = tput_share # Calculate the ratio of tput share to bandwidth fair share. flw_results[flw][index][features.make_win_metric( features.TPUT_TO_FAIR_SHARE_RATIO_FET, win)] = (utils.safe_div( tput_share, flw_results[flw][index][ features.BW_FAIR_SHARE_FRAC_FET])) print(f"\tFinal window durations in: {exp_flp}:") for win in features.WINDOWS: print( f"\t\t{win}:", ", ".join(f"{dur_us} us" if dur_us > 0 else "unknown" for dur_us in (win * np.asarray([ res[-1][features.MIN_RTT_FET] for res in flw_results.values() ])).tolist())) print(f"\tWindow errors in: {exp_flp}") for win in features.WINDOWS: print(f"\t\t{win}:", win_to_errors[win]) smallest_safe_win = 0 for win in sorted(features.WINDOWS): if win_to_errors[win] == 0: print(f"\tSmallest safe window size is {win} in: {exp_flp}") smallest_safe_win = win break else: print(f"Warning: No safe window sizes in: {exp_flp}") # Determine if there are any NaNs or Infs in the results. For the results # for each flow, look through all features (columns) and make a note of the # features that bad values. Flatten these lists of feature names, using a # set comprehension to remove duplicates. bad_fets = { fet for flw_dat in flw_results.values() for fet in flw_dat.dtype.names if not np.isfinite(flw_dat[fet]).all() } if bad_fets: print(f"Warning: Experiment {exp_flp} has NaNs of Infs in features: " f"{bad_fets}") # Save the results. if path.exists(out_flp): print(f"\tOutput already exists: {out_flp}") else: print(f"\tSaving: {out_flp}") np.savez_compressed( out_flp, **{ str(k + 1): v for k, v in enumerate(flw_results[flw] for flw in flws) }) return smallest_safe_win
had_data_title = "Had data (taken from SM MC)" had_data_raw = copy(had_data) if use_data is True : had_data = had_data/diff had_data_title = "Had data (trigger eff corrected)" tab.newpage("Hadronic trigger and data") tab.add_table(diff,"Rebinned had trigger effs") tab.add_table(had_data_raw,"Had data raw") tab.add_table(had_data,had_data_title) tab.newpage("Hadronic data and MC yields") tab.add_table(had_data,had_data_title) tab.add_table(had_sm,"Had SM MC") tab.add_table(utils.safe_div(had_data,had_sm),"Had data / SM MC") tab.add_table(utils.safe_div(had_data,had_ewk),"Had data / EWK MC") tab.newpage("Hadronic data and MC yields") had_data_raw = copy(had_data) if use_data is True : had_data = had_data/diff tab.add_table(had_data,"Had data (trigger eff corrected)") else : tab.add_table(had_data,"Had data (taken from SM MC)") tab.add_table(had_sm,"Had SM MC") tab.add_table(utils.safe_div(had_data,had_sm),"Had data / SM MC") tab.newpage("Hadronic EWK and QCD yields from MC") tab.add_table(had_ewk,"Had EWK MC") tab.add_table(had_qcd,"Had QCD MC")
def ewk_tf(signal, control): return utils.safe_div(signal, control)