Ejemplo n.º 1
0
def process_one(idx, total, exp_flp, out_dir, net, warmup_prc, scl_prms_flp,
                standardize):
    """ Evaluate a single experiment. """
    if not path.exists(out_dir):
        os.makedirs(out_dir)

    # Load and parse the experiment.
    temp_path, exp = train.process_exp(idx,
                                       total,
                                       net,
                                       exp_flp,
                                       out_dir,
                                       warmup_prc,
                                       keep_prc=100,
                                       cca="bbr",
                                       sequential=True)
    dat_in, dat_out, dat_extra, _ = utils.load_tmp_file(temp_path)

    # Load and apply the scaling parameters.
    with open(scl_prms_flp, "r") as fil:
        scl_prms = json.load(fil)
    dat_in = utils.scale_all(dat_in, scl_prms, 0, 1, standardize)

    # Visualize the ground truth data.
    utils.visualize_classes(net, dat_out)

    # Test the experiment.
    accuracy = net.test(dat_in.dtype.names,
                        *utils.Dataset(fets=dat_in.dtype.names,
                                       dat_in=utils.clean(dat_in),
                                       dat_out=utils.clean(dat_out),
                                       dat_extra=dat_extra).raw(),
                        graph_prms={
                            "analyze_features": False,
                            "out_dir": out_dir,
                            "sort_by_unfairness": False,
                            "dur_s": exp.dur_s
                        })

    print("Accuracy:", accuracy)
Ejemplo n.º 2
0
def run_sklearn(args, out_dir, out_flp, ldrs):
    """
    Trains an sklearn model according to the supplied parameters. Returns the
    test error (lower is better).
    """
    # Unpack the dataloaders.
    ldr_trn, _, ldr_tst = ldrs
    # Construct the model.
    print("Building model...")
    net = models.MODELS[args["model"]](out_dir)
    net.log(f"\n\nArguments: {args}")

    if path.exists(out_flp):
        # The output file already exists with these parameters, so do not
        # retrain the model.
        print("Skipping training because a trained model already exists with "
              f"these parameters: {out_flp}")
        print(f"Loading model: {out_flp}")
        with open(out_flp, "rb") as fil:
            net.net = pickle.load(fil)
        tim_trn_s = 0
    else:
        net.new(**{param: args[param] for param in net.params})
        # Extract the training data from the training dataloader.
        print("Extracting training data...")
        dat_in, dat_out = list(ldr_trn)[0]
        print("Training data:")
        utils.visualize_classes(net, dat_out)

        # Training.
        print("Training...")
        tim_srt_s = time.time()
        net.train(ldr_trn.dataset.fets, dat_in, dat_out)
        tim_trn_s = time.time() - tim_srt_s
        print(f"Finished training - time: {tim_trn_s:.2f} seconds")
        # Save the model.
        print(f"Saving final model: {out_flp}")
        with open(out_flp, "wb") as fil:
            pickle.dump(net.net, fil)

    # Testing.
    #
    # Use .raw() instead of loading the dataloader because we need dat_extra.
    fets, dat_in, dat_out, dat_extra = ldr_tst.dataset.raw()
    print("Test data:")
    utils.visualize_classes(net, dat_out)

    print("Testing...")
    tim_srt_s = time.time()
    acc_tst = net.test(fets,
                       dat_in,
                       dat_out,
                       dat_extra,
                       graph_prms={
                           "out_dir": out_dir,
                           "sort_by_unfairness": True,
                           "dur_s": None
                       })
    print(f"Finished testing - time: {time.time() - tim_srt_s:.2f} seconds")

    # Optionally perform feature elimination.
    if args["analyze_features"]:
        utils.select_fets(
            utils.analyze_feature_correlation(net, out_dir, dat_in,
                                              args["clusters"]),
            utils.analyze_feature_importance(net, out_dir, dat_in, dat_out,
                                             args["fets_to_pick"],
                                             args["perm_imp_repeats"]))
    return acc_tst, tim_trn_s
Ejemplo n.º 3
0
    def _test(self, args, epoch, disc, gen, test_loader, test_output_dir):
        mse_criterion = nn.MSELoss()
        cls_criterion = nn.CrossEntropyLoss()
        _loss_g, _loss_cls_gen, _loss_adv_gen = 0., 0., 0.
        _loss_d, _loss_cls, _loss_adv = 0., 0., 0.
        _loss_recon, _loss_mse = 0., 0.
        _loss = 0.
        ypred, ypred_gen = [], []
        ytrue, ytrue_gen = [], []
        cls_count = [0] * 10

        class_featmaps = np.zeros((10, 1000, 256 * 14 * 14))
        class_featmaps_gen = np.zeros((10, 1000, 256 * 14 * 14))
        class_idx = [0] * 10

        for i, (inputs, featmaps, targets, indexes) in enumerate(test_loader):
            inputs, featmaps, targets = inputs.to(args.device), featmaps.to(
                args.device), targets.to(args.device)

            feats, gen_targets = self._sample_vecs_index(inputs.shape[0])
            feats, gen_targets = feats.to(args.device), gen_targets.to(
                args.device)
            gen_image = gen(feats.unsqueeze(2).unsqueeze(3).detach())

            for j, target in enumerate(
                    targets.detach().cpu().numpy().astype(int)):
                class_featmaps[target, class_idx[target]] = featmaps[j].view(
                    256 * 14 * 14).detach().cpu().numpy()
                class_featmaps_gen[target,
                                   class_idx[target]] = gen_image[j].view(
                                       256 * 14 * 14).detach().cpu().numpy()
                class_idx[target] += 1

        print(class_idx)
        plt.figure(figsize=(12, 12))
        plt.imshow(
            pairwise_distances(np.vstack(class_featmaps[0:10]), metric='l2'))
        plt.colorbar()
        plt.savefig("self.png")
        plt.close()

        print(class_idx)
        plt.figure(figsize=(12, 12))
        plt.imshow(
            pairwise_distances(np.vstack(class_featmaps_gen[0:10]),
                               metric='l2'))
        plt.colorbar()
        plt.savefig("self_gen.png")
        plt.close()

        plt.figure(figsize=(12, 12))
        plt.imshow(
            pairwise_distances(np.vstack(class_featmaps[0:10]),
                               np.vstack(class_featmaps_gen[0:10]),
                               metric='l2'))
        plt.colorbar()
        plt.savefig("pair.png")
        plt.close()

        plt.figure(figsize=(12, 12))
        plt.imshow(
            pairwise_distances(np.vstack(class_featmaps[0:10]),
                               metric='cosine'))
        plt.colorbar()
        plt.savefig("self_cosine.png")
        plt.close()

        plt.figure(figsize=(12, 12))
        plt.imshow(
            pairwise_distances(np.vstack(class_featmaps_gen[0:10]),
                               metric='cosine'))
        plt.colorbar()
        plt.savefig("self_gen_cosine.png")
        plt.close()

        plt.figure(figsize=(12, 12))
        plt.imshow(
            pairwise_distances(np.vstack(class_featmaps[0:10]),
                               np.vstack(class_featmaps_gen[0:10]),
                               metric='cosine'))
        plt.colorbar()
        plt.savefig("pair_cosine.png")
        plt.close()

        for i, (images, featmaps, targets, indexes) in enumerate(test_loader):
            loss = 0
            images, featmaps, targets = images.to(args.device), featmaps.to(
                args.device), targets.to(args.device)
            if args.data == "image":
                inputs = (images * 2) - 1
            else:
                inputs = featmaps
            feats, logits_cls, logits_adv = disc(inputs)
            loss_cls = cls_criterion(logits_cls, targets.long())
            loss = loss_cls
            _loss_cls += loss_cls.item()

            preds = F.softmax(logits_cls,
                              dim=1).argmax(dim=1).cpu().numpy().tolist()
            ypred.extend(preds)
            ytrue.extend(targets)

            feats, gen_targets = self._sample_vecs_index(inputs.shape[0])
            feats, gen_targets = feats.to(args.device), gen_targets.to(
                args.device)
            gen_image = gen(feats.unsqueeze(2).unsqueeze(3).detach())
            feats_gen, logits_cls_gen, logits_adv_gen = disc(gen_image)
            loss_cls_gen = cls_criterion(logits_cls_gen, gen_targets.long())
            loss += args.cls_w * loss_cls_gen
            _loss_cls_gen += loss_cls_gen.item()

            if args.adv:
                loss_adv = (adversarial_loss(logits_adv,
                                             is_real=True,
                                             is_disc=True,
                                             type_=args.adv_type) +
                            adversarial_loss(logits_adv_gen,
                                             is_real=False,
                                             is_disc=True,
                                             type_=args.adv_type))
                _loss_adv += loss_adv.item()
                loss += args.adv_w * loss_adv.clone() / 2.

                loss_adv_gen = adversarial_loss(logits_adv_gen,
                                                is_real=True,
                                                is_disc=False,
                                                type_=args.adv_type)
                _loss_adv_gen += loss_adv_gen.item()
                loss += args.adv_w * loss_adv_gen.clone()
            if args.recon:
                loss_recon = (1 - nn.CosineSimilarity(dim=1, eps=1e-6)(
                    feats_gen, feats).mean())
                loss += args.adv_r * loss_recon.clone()
                _loss_recon += loss_recon.item()
            if args.mse:
                loss_mse = nn.MSELoss()(gen_image, inputs)
                loss += args.mse_w * loss_mse.clone()
                _loss_mse += args.mse_w * loss_mse.item()

            preds_gen = F.softmax(logits_cls_gen,
                                  dim=1).argmax(dim=1).cpu().numpy().tolist()
            ypred_gen.extend(preds_gen)
            ytrue_gen.extend(gen_targets)
            _loss += loss.item()

            if i % 10 == 0:
                visualize(inputs[0],
                          gen_image[0],
                          out_dir=test_output_dir + str(epoch) + "_" + str(i) +
                          ".jpg",
                          featmap=(args.data == "featmap"))

            if sum(cls_count) < 50:
                cls_count = visualize_classes(inputs, gen_image, gen_targets,
                                              cls_count, test_output_dir,
                                              args.data == "featmap")

        acc = round((np.array(ypred) == np.array(ytrue)).sum() / len(ytrue), 4)
        acc_gen = round((np.array(ypred_gen) == np.array(ytrue_gen)).sum() /
                        len(ytrue_gen), 4)

        print("Test Set Epoch {}, Training Iteration {}".format(epoch, i))
        print("Accuracy: {}, Accuracy gen: {}".format(acc, acc_gen))
        print("Loss: {}, Loss_cls: {}, Loss_cls_gen: {}".format(
            _loss / (i + 1), _loss_cls / (i + 1), _loss_cls_gen / (i + 1)))
        if args.adv:
            print("Loss_adv: {}, Loss_adv_gen: {}".format(
                _loss_adv / (i + 1), _loss_adv_gen / (i + 1)))
        if args.mse:
            print("Loss_mse: {}".format(_loss_mse / (i + 1)))
        return return_statement(i, acc, acc_gen, _loss_cls, _loss_cls_gen,
                                _loss_adv, _loss_adv_gen, _loss_recon,
                                _loss_mse)
Ejemplo n.º 4
0
def process_one(idx, total, sim_flp, out_dir, net, warmup_prc, scl_prms_flp,
                standardize, all_accuracy, all_bucketized_accuracy, bw_dict,
                rtt_dict, queue_dict):
    """ Evaluate a single simulation. """
    if not path.exists(out_dir):
        os.makedirs(out_dir)

    # Load and parse the simulation.
    temp_path, sim = (train.process_sim(idx,
                                        total,
                                        net=net,
                                        sim_flp=sim_flp,
                                        tmp_dir=out_dir,
                                        warmup_prc=warmup_prc,
                                        keep_prc=100,
                                        sequential=True))

    (dat_in, dat_out, dat_out_raw, dat_out_oracle,
     _) = (utils.load_tmp_file(temp_path))

    # Load and apply the scaling parameters.
    with open(scl_prms_flp, "r") as fil:
        scl_prms = json.load(fil)
    dat_in = utils.scale_all(dat_in, scl_prms, 0, 1, standardize)

    # Visualize the ground truth data.
    utils.visualize_classes(net, dat_out)

    # Test the simulation.
    accuracy, bucketized_accuracy = net.test(*utils.Dataset(
        fets=dat_in.dtype.names,
        dat_in=utils.clean(dat_in),
        dat_out=utils.clean(dat_out),
        dat_out_raw=utils.clean(dat_out_raw),
        dat_out_oracle=utils.clean(dat_out_oracle),
        num_flws=np.array([sim.unfair_flws + sim.fair_flws] * dat_in.shape[0],
                          dtype=float)).raw(),
                                             graph_prms={
                                                 "out_dir": out_dir,
                                                 "sort_by_unfairness": False,
                                                 "dur_s": sim.dur_s
                                             })

    all_accuracy.append(accuracy)
    mean_accuracy = mean(all_accuracy)

    all_bucketized_accuracy.append(bucketized_accuracy)
    mean_bucketized_accuracy = mean(all_bucketized_accuracy)

    for bw_Mbps in bw_dict.keys():
        if sim.bw_Mbps <= bw_Mbps:
            bw_dict[bw_Mbps].append(accuracy)
            break

    rtt_us = (sim.btl_delay_us + 2 * sim.edge_delays[0]) * 2
    for rtt_us_ in rtt_dict.keys():
        if rtt_us <= rtt_us_:
            rtt_dict[rtt_us_].append(accuracy)
            break

    bdp = sim.bw_Mbps * rtt_us / sim.payload_B / sim.queue_p

    for queue_bdp in queue_dict.keys():
        if bdp <= queue_bdp:
            queue_dict[queue_bdp].append(accuracy)
            break

    print(
        f"Finish processing {sim.name}\n"
        "----Average accuracy for all the processed simulations: "
        f"{mean_accuracy}\n",
        "----Average bucketized accuracy for all the processed simulations: "
        f"{mean_bucketized_accuracy}\n")

    for bw_Mbps in bw_dict.keys():
        if bw_dict[bw_Mbps]:
            bw_accuracy = mean(bw_dict[bw_Mbps])
            print(
                f"----Bandwidth less than {bw_Mbps}Mbps accuracy {bw_accuracy}"
            )

    for rtt_us_ in rtt_dict.keys():
        if rtt_dict[rtt_us_]:
            rtt_accuracy = mean(rtt_dict[rtt_us_])
            print(f"----Rtt less than {rtt_us_}ns accuracy {rtt_accuracy}")

    for queue_bdp in queue_dict.keys():
        if queue_dict[queue_bdp]:
            queue_accuracy = mean(queue_dict[queue_bdp])
            print(f"----Queue size less than {queue_bdp} BDP accuracy "
                  f"{queue_accuracy}")
Ejemplo n.º 5
0
def run_trials(args):
    """
    Run args["conf_trials"] trials and survive args["max_attempts"] failed
    attempts.
    """
    print(f"Arguments: {args}")

    if args["no_rand"]:
        utils.set_rand_seed()

    out_dir = args["out_dir"]
    if not path.isdir(out_dir):
        print(f"Output directory does not exist. Creating it: {out_dir}")
        os.makedirs(out_dir)
    net_tmp = models.MODELS[args["model"]]()
    # Verify that the necessary supplemental parameters are present.
    for param in net_tmp.params:
        assert param in args, f"\"{param}\" not in args: {args}"
    # Assemble the output filepath.
    out_flp = path.join(
        args["out_dir"],
        (utils.args_to_str(args, order=sorted(defaults.DEFAULTS.keys()))) + (
            # Determine the proper extension based on the type of
            # model.
            ".pickle"
            if isinstance(net_tmp, models.SvmSklearnWrapper) else ".pth"))
    # If custom features are specified, then overwrite the model's
    # default features.
    fets = args["features"]
    if fets:
        net_tmp.in_spc = fets
    else:
        assert "arrival time us" not in args["features"]
        args["features"] = net_tmp.in_spc
    # If a trained model file already exists, then delete it.
    if path.exists(out_flp):
        os.remove(out_flp)

    # Load or geenrate training data.
    dat_flp = path.join(out_dir, "data.npz")
    scl_prms_flp = path.join(out_dir, "scale_params.json")
    # Check for the presence of both the data and the scaling
    # parameters because the resulting model is useless without the
    # proper scaling parameters.
    if (not args["regen_data"] and path.exists(dat_flp)
            and path.exists(scl_prms_flp)):
        print("Found existing data!")
        dat_in, dat_out, dat_out_raw, dat_out_oracle, num_flws = utils.load(
            dat_flp)
        dat_in_shape = dat_in.shape
        dat_out_shape = dat_out.shape
        assert dat_in_shape[0] == dat_out_shape[0], \
            f"Data has invalid shapes! in: {dat_in_shape}, out: {dat_out_shape}"
    else:
        print("Regenerating data...")
        dat_in, dat_out, dat_out_raw, dat_out_oracle, num_flws = (gen_data(
            net_tmp, args, dat_flp, scl_prms_flp))
    print(f"Number of input features: {len(dat_in.dtype.names)}")

    # Visualaize the ground truth data.
    utils.visualize_classes(net_tmp, dat_out)

    # TODO: Parallelize attempts.
    trls = args["conf_trials"]
    apts = 0
    apts_max = args["max_attempts"]
    ress = []
    while trls > 0 and apts < apts_max:
        apts += 1
        res = (run_sklearn if isinstance(net_tmp, models.SvmSklearnWrapper)
               else run_torch)(args, dat_in, dat_out, dat_out_raw,
                               dat_out_oracle, num_flws, out_dir, out_flp)
        if res[0] == 100:
            print((
                f"Training failed (attempt {apts}/{apts_max}). Trying again!"))
        else:
            ress.append(res)
            trls -= 1
    if ress:
        print(("Resulting accuracies: "
               f"{', '.join([f'{acc:.2f}' for acc, _ in ress])}"))
        max_acc, tim_s = max(ress, key=lambda p: p[0])
        print(f"Maximum accuracy: {max_acc:.2f}")
        # Return the minimum error instead of the maximum accuracy.
        return 1 - max_acc, tim_s
    print(f"Model cannot be trained with args: {args}")
    return float("NaN"), float("NaN")
Ejemplo n.º 6
0
    def __evaluate(self,
                   preds,
                   labels,
                   raw,
                   fair,
                   sort_by_unfairness=False,
                   graph_prms=None):
        """
        Returns the accuracy of predictions compared to ground truth
        labels. If self.graph == True, then this function also graphs
        the accuracy. preds, labels, raw, and fair must be Torch tensors.
        """
        utils.assert_tensor(preds=preds, labels=labels, raw=raw, fair=fair)

        self.log("Test predictions:")
        utils.visualize_classes(self, preds)

        # Overall accuracy.
        acc = torch.sum(preds == labels) / preds.size()[0]
        self.log(f"Test accuracy: {acc * 100:.2f}%\n" +
                 "Classification report:\n" +
                 metrics.classification_report(labels, preds, digits=4))
        for cls in self.get_classes():
            # Break down the accuracy into false positives/negatives.
            labels_neg = labels != cls
            labels_pos = labels == cls
            preds_neg = preds != cls
            preds_pos = preds == cls

            false_pos_rate = (
                torch.sum(torch.logical_and(preds_pos, labels_neg)) /
                torch.sum(labels_neg))
            false_neg_rate = (
                torch.sum(torch.logical_and(preds_neg, labels_pos)) /
                torch.sum(labels_pos))

            self.log(f"Class {cls}:\n"
                     f"\tFalse negative rate: {false_neg_rate * 100:.2f}%\n"
                     f"\tFalse positive rate: {false_pos_rate * 100:.2f}%")

        if self.graph:
            assert graph_prms is not None, \
                "\"graph_prms\" must be a dict(), not None."
            assert "flp" in graph_prms, "\"flp\" not in \"graph_prms\"!"
            assert "x_lim" in graph_prms, "\"x_lim\" not in \"graph_prms\"!"

            # Compute the distance from fair, then divide by fair to
            # compute the relative unfairness.
            diffs = 1 - raw
            if sort_by_unfairness:
                # Sort based on unfairness.
                diffs, indices = torch.sort(diffs)
                preds = preds[indices]
                labels = labels[indices]
            # Bucketize and compute bucket accuracies.
            num_samples = preds.size()[0]
            num_buckets = min(20 * (1 if sort_by_unfairness else 4),
                              num_samples)
            num_per_bucket = math.floor(num_samples / num_buckets)
            assert num_per_bucket > 0, \
                ("There must be at least one sample per bucket, but there are "
                 f"{num_samples} samples and only {num_buckets} buckets!")
            # The resulting buckets are tuples of three values:
            #   (x-axis value for bucket, number predicted correctly, total)
            buckets = [
                (x, self.check_output(preds_, labels_), preds_.size()[0])
                for x, preds_, labels_ in [
                    # Each bucket is defined by a tuple of three values:
                    #   (x-axis value for bucket, predictions,
                    #    ground truth labels).
                    # The x-axis is the mean relative difference for this
                    # bucket. A few values at the end may be discarded.
                    (torch.mean(diffs[i:i + num_per_bucket]),
                     preds[i:i + num_per_bucket], labels[i:i + num_per_bucket])
                    for i in range(0, num_samples, num_per_bucket)
                ]
            ]

            # Plot each bucket's accuracy.
            plt.plot(([x for x, _, _ in buckets]
                      if sort_by_unfairness else list(range(len(buckets)))),
                     [c / t for _, c, t in buckets], "bo-")
            plt.ylim((-0.1, 1.1))
            x_lim = graph_prms["x_lim"]
            if x_lim is not None:
                plt.xlim(x_lim)
            plt.xlabel("Unfairness (fraction of fair)"
                       if sort_by_unfairness else "Time")
            plt.ylabel("Classification accuracy")
            plt.tight_layout()
            plt.savefig(graph_prms["flp"])
            plt.close()
        return acc