Beispiel #1
0
    def train(self, fets, dat_in, dat_out):
        """ Fits this model to the provided dataset. """
        utils.assert_tensor(dat_in=dat_in, dat_out=dat_out)
        utils.check_fets(fets, self.in_spc)

        self.net.fit(dat_in, dat_out)
        # Oftentimes, the training log statements do not end with a newline.
        print()
Beispiel #2
0
 def _check_output_helper(self, out):
     utils.assert_tensor(out=out)
     # Remove a trailing dimension of size 1.
     out = torch.reshape(out, (out.size()[0], ))
     # Transform the output to report classes -1 and 1.
     # out[torch.where(out < 0)] = -1
     # out[torch.where(out >= 0)] = 1
     return out
Beispiel #3
0
 def _check_output_helper(self, out):
     """
     Convert the raw network output into classes. out must be a torch Tensor.
     """
     utils.assert_tensor(out=out)
     # Assume a one-hot encoding of class probabilities. The class
     # is the index of the output entry with greatest value (i.e.,
     # highest probability). Set dim=1 because the first dimension
     # is the batch.
     size_out = out.size()
     assert size_out[1] == self.num_clss, \
         (f"Expecting one-hot encoding for {self.num_clss} classes, but "
          f"found size: {size_out}")
     return torch.argmax(out, dim=1)
Beispiel #4
0
    def __evaluate_sliding_window(self, preds, raw, fair, arr_times,
                                  rtt_estimates_us):
        """
        Returns the sliding window accuracy of predictions based
        on the rtt estimate and the window size. all arguments must be Torch
        tensors.

        preds: Prediction produced by the model
        raw: Raw values of the queue occupancy
        fair: Fair share of the flow
        arr_times: The arrival times of each sample.
        rtt_estimates_us: Rtt estimates computed on the receiver side
        """
        utils.assert_tensor(preds=preds,
                            raw=raw,
                            fair=fair,
                            arr_times=arr_times,
                            rtt_estimates_us=rtt_estimates_us)
        num_pkts = len(raw)
        assert len(preds) == num_pkts
        assert len(fair) == num_pkts
        assert len(arr_times) == num_pkts
        assert len(rtt_estimates_us) == num_pkts

        # Compute sliding window accuracy based on arrival time and RTT
        sliding_window_accuracy = [0] * num_pkts
        window_head = 0
        max_packet = 0
        for i in range(num_pkts):
            recv_time = arr_times[i]
            window_size_us = SLIDING_WINDOW_NUM_RTT * rtt_estimates_us[i]
            while (window_head < num_pkts
                   and recv_time - arr_times[window_head] >= window_size_us):
                window_head += 1

            max_packet = max(max_packet, i - window_head)
            queue_occupancy = torch.mean(raw[window_head:i + 1])
            label = torch.mean(preds[window_head:i + 1].type(torch.float))
            sliding_window_accuracy[i] = (int(label >= SMOOTHING_THRESHOLD) if
                                          queue_occupancy > fair[i] else int(
                                              label < SMOOTHING_THRESHOLD))
        return sum(sliding_window_accuracy) / len(sliding_window_accuracy)
Beispiel #5
0
    def train(self, fets, dat_in, dat_out):
        """ Fits this model to the provided dataset. """
        utils.assert_tensor(dat_in=dat_in, dat_out=dat_out)
        utils.check_fets(fets, self.in_spc)

        # Calculate a weight for each class. Note that the weights do not need
        # to sum to 1. Avoid very large numbers to prevent overflow. Avoid very
        # small numbers to prevent floating point errors.
        tots = utils.get_class_popularity(dat_out, self.get_classes())
        tot = sum(tots)
        # Each class's weight is 1 minus its popularity ratio.
        weights = torch.Tensor([1 - tot_cls / tot for tot_cls in tots])
        # Average each weight with a weight of 1, which serves to smooth the
        # weights.
        weights = (weights + 1) / 2
        sample_weights = torch.Tensor([weights[label] for label in dat_out])

        self.net.fit(dat_in, dat_out, sample_weight=sample_weights)
        # Oftentimes, the training log statements do not end with a newline.
        print()
Beispiel #6
0
 def check_output(self, out, target):
     """
     Returns the number of examples from out that were classified correctly,
     according to target. out and target must be Torch tensors.
     """
     utils.assert_tensor(out=out, target=target)
     size_out = out.size()
     size_target = target.size()
     assert size_target
     assert size_out[0] == size_target[0], \
         ("Output and target have different batch sizes (first dimension): "
          f"{size_out} != {size_target}")
     # Transform the output into classes.
     out = self._check_output_helper(out)
     size_out = out.size()
     assert size_out == size_target, \
         f"Output and target sizes do not match: {size_out} != {size_target}"
     # eq(): Compare the outputs to the labels.
     # type(): Cast the resulting bools to ints.
     # sum(): Sum them up to get the total number of correct predictions.
     return out.eq(target).type(torch.int).sum().item()
Beispiel #7
0
    def test(self,
             fets,
             dat_in,
             dat_out_classes,
             dat_extra,
             graph_prms=copy.copy({
                 "sort_by_unfairness": True,
                 "dur_s": None
             })):
        """
        Tests this model on the provided dataset and returns the test accuracy
        (higher is better). Also, analyzes the model's feature coefficients and
        (if self.graph == True) visualizes various metrics. dat_in and
        dat_out_classes must be Torch tensors. dat_extra must be a Numpy array.

        fets: List of feature names.
        dat_in: Test data.
        dat_out_classes: Ground truth labels.
        dat_extra: Extra data for each sample.
        graph_prms: Graphing parameters. Used only if self.graph == True.
        """
        utils.assert_tensor(dat_in=dat_in, dat_out_classes=dat_out_classes)
        utils.check_fets(fets, self.in_spc)

        sort_by_unfairness = graph_prms["sort_by_unfairness"]
        dur_s = graph_prms["dur_s"]
        assert sort_by_unfairness or dur_s is not None, \
            ("If \"sort_by_unfairness\" is False, then \"dur_s\" must not be "
             "None.")

        # Run inference. Everything after the following line is just analysis.
        predictions = torch.tensor(self.net.predict(dat_in))

        # Compute the bandwidth fair share fraction. Convert from int to float
        # to avoid all values being rounded to 0.
        fair = dat_extra[features.make_win_metric(
            features.TPUT_FAIR_SHARE_BPS_FET, defaults.CHOSEN_WIN)]

        # Calculate the x limits. Determine the maximum unfairness.
        x_lim = (
            # Compute the maximum unfairness.
            (0, dat_extra["raw"].max().item()) if sort_by_unfairness else
            (0, graph_prms["dur_s"]))

        if self.graph:
            # Analyze, for each number of flows, accuracy vs. unfairness.
            flws_accs = []
            nums_flws = np.unique(dat_extra["num_flws"]).tolist()
            for num_flws_selected in nums_flws:
                self.log(f"Evaluating model for {num_flws_selected} flows:")
                valid = (dat_extra["num_flws"] == num_flws_selected).nonzero()
                flws_accs.append(
                    self.__evaluate(
                        torch.tensor(self.net.predict(dat_in[valid])),
                        dat_out_classes[valid],
                        torch.tensor(dat_extra["raw"][valid]),
                        torch.tensor(fair[valid]),
                        sort_by_unfairness,
                        graph_prms={
                            "flp":
                            path.join(self.out_dir, (
                                f"accuracy_vs_unfairness_{num_flws_selected}flows_"
                                f"{self.name}.pdf")),
                            "x_lim":
                            x_lim
                        }))

            # Analyze accuracy vs. number of flows.
            x_vals = list(range(len(flws_accs)))
            plt.bar(x_vals, flws_accs, align="center")
            plt.xticks(x_vals, nums_flws)
            plt.ylim((0, 1.1))
            plt.xlabel("Total flows (1 unfair)")
            plt.ylabel("Classification accuracy")
            plt.tight_layout()
            plt.savefig(
                path.join(self.out_dir,
                          f"accuracy_vs_num-flows_{self.name}.pdf"))
            plt.close()

            # Plot queue occupancy.
            # self.__plot_queue_occ(
            #     torch.tensor(dat_extra["raw"]),
            #     torch.tensor(fair),
            #     path.join(
            #         self.out_dir, f"queue_occ_vs_fair_queue_occ_{self.name}.pdf"),
            #     x_lim)

            # # Plot throughput
            # self.__plot_throughput(
            #     dat_out_classes, torch.tensor(self.net.predict(dat_in)),
            #     torch.tensor(fair),
            #     path.join(self.out_dir, f"throughput_{self.name}.pdf"),
            #     dat_extra["btk_throughput"],
            #     torch.tensor(dat_extra[features.THR_ESTIMATE_FET].copy()),
            #     x_lim=None)

        # Evaluate Mathis model.
        self.log("Evaluting Mathis Model:")
        # Compute Mathis model predictions by dividing the Mathis model
        # throughput, computed at the same granularity as the ground truth, by
        # the fair throughput. Then convert these fairness ratios into labels.
        mathis_tput = dat_extra[features.make_win_metric(
            features.MATHIS_TPUT_FET, defaults.CHOSEN_WIN)]
        mathis_raw = mathis_tput / fair
        mathis_preds = self.convert_to_class(mathis_raw)[features.LABEL_FET]
        # Select only rows for which a prediction can be made (i.e., discard
        # rows with unknown predictions). Convert to tensors.
        mathis_valid = np.logical_and(mathis_tput != -1, fair != -1)
        mathis_raw = torch.tensor(mathis_raw[mathis_valid])
        mathis_preds = torch.tensor(mathis_preds[mathis_valid])
        mathis_dat_out_classes = dat_out_classes[mathis_valid]
        mathis_fair = torch.tensor(fair[mathis_valid])
        mathis_skipped = dat_out_classes.size()[0] - mathis_preds.size()[0]
        self.log(
            f"Warning: Mathis model could not be evaluated on {mathis_skipped} "
            f"({mathis_skipped / fair.shape[0] * 100:.2f}%) samples due to "
            "unknown values.")

        self.__evaluate(mathis_preds,
                        mathis_dat_out_classes,
                        mathis_raw,
                        mathis_fair,
                        sort_by_unfairness,
                        graph_prms={
                            "flp":
                            path.join(self.out_dir,
                                      "accuracy_vs_unfairness_mathis.pdf"),
                            "x_lim":
                            x_lim
                        })

        # Analyze overall accuracy for our model itself.
        self.log(f"Evaluating {self.name} model:")
        raw = torch.tensor(np.copy(dat_extra["raw"]))
        fair = torch.tensor(np.copy(fair))
        model_acc = self.__evaluate(
            predictions,
            dat_out_classes,
            raw,
            fair,
            sort_by_unfairness,
            graph_prms={
                "flp":
                path.join(self.out_dir,
                          f"accuracy_vs_unfairness_{self.name}.pdf"),
                "x_lim":
                x_lim
            })

        # # Analyze accuracy of a sliding window method
        # sliding_window_accuracy = self.__evaluate_sliding_window(
        #     predictions, torch.tensor(raw), torch.tensor(fair),
        #     torch.tensor(dat_extra[features.ARRIVAL_TIME_FET].copy()),
        #     torch.tensor(dat_extra[features.RTT_ESTIMATE_FET].copy()))

        return model_acc  # sliding_window_accuracy
Beispiel #8
0
    def __evaluate(self,
                   preds,
                   labels,
                   raw,
                   fair,
                   sort_by_unfairness=False,
                   graph_prms=None):
        """
        Returns the accuracy of predictions compared to ground truth
        labels. If self.graph == True, then this function also graphs
        the accuracy. preds, labels, raw, and fair must be Torch tensors.
        """
        utils.assert_tensor(preds=preds, labels=labels, raw=raw, fair=fair)

        self.log("Test predictions:")
        utils.visualize_classes(self, preds)

        # Overall accuracy.
        acc = torch.sum(preds == labels) / preds.size()[0]
        self.log(f"Test accuracy: {acc * 100:.2f}%\n" +
                 "Classification report:\n" +
                 metrics.classification_report(labels, preds, digits=4))
        for cls in self.get_classes():
            # Break down the accuracy into false positives/negatives.
            labels_neg = labels != cls
            labels_pos = labels == cls
            preds_neg = preds != cls
            preds_pos = preds == cls

            false_pos_rate = (
                torch.sum(torch.logical_and(preds_pos, labels_neg)) /
                torch.sum(labels_neg))
            false_neg_rate = (
                torch.sum(torch.logical_and(preds_neg, labels_pos)) /
                torch.sum(labels_pos))

            self.log(f"Class {cls}:\n"
                     f"\tFalse negative rate: {false_neg_rate * 100:.2f}%\n"
                     f"\tFalse positive rate: {false_pos_rate * 100:.2f}%")

        if self.graph:
            assert graph_prms is not None, \
                "\"graph_prms\" must be a dict(), not None."
            assert "flp" in graph_prms, "\"flp\" not in \"graph_prms\"!"
            assert "x_lim" in graph_prms, "\"x_lim\" not in \"graph_prms\"!"

            # Compute the distance from fair, then divide by fair to
            # compute the relative unfairness.
            diffs = 1 - raw
            if sort_by_unfairness:
                # Sort based on unfairness.
                diffs, indices = torch.sort(diffs)
                preds = preds[indices]
                labels = labels[indices]
            # Bucketize and compute bucket accuracies.
            num_samples = preds.size()[0]
            num_buckets = min(20 * (1 if sort_by_unfairness else 4),
                              num_samples)
            num_per_bucket = math.floor(num_samples / num_buckets)
            assert num_per_bucket > 0, \
                ("There must be at least one sample per bucket, but there are "
                 f"{num_samples} samples and only {num_buckets} buckets!")
            # The resulting buckets are tuples of three values:
            #   (x-axis value for bucket, number predicted correctly, total)
            buckets = [
                (x, self.check_output(preds_, labels_), preds_.size()[0])
                for x, preds_, labels_ in [
                    # Each bucket is defined by a tuple of three values:
                    #   (x-axis value for bucket, predictions,
                    #    ground truth labels).
                    # The x-axis is the mean relative difference for this
                    # bucket. A few values at the end may be discarded.
                    (torch.mean(diffs[i:i + num_per_bucket]),
                     preds[i:i + num_per_bucket], labels[i:i + num_per_bucket])
                    for i in range(0, num_samples, num_per_bucket)
                ]
            ]

            # Plot each bucket's accuracy.
            plt.plot(([x for x, _, _ in buckets]
                      if sort_by_unfairness else list(range(len(buckets)))),
                     [c / t for _, c, t in buckets], "bo-")
            plt.ylim((-0.1, 1.1))
            x_lim = graph_prms["x_lim"]
            if x_lim is not None:
                plt.xlim(x_lim)
            plt.xlabel("Unfairness (fraction of fair)"
                       if sort_by_unfairness else "Time")
            plt.ylabel("Classification accuracy")
            plt.tight_layout()
            plt.savefig(graph_prms["flp"])
            plt.close()
        return acc