def train_mlp_checkpoint(
    train_txt_path,
    train_embedding_path,
    test_txt_path,
    test_embedding_path,
    num_classes,
    seed_num,
    minibatch_size,
    num_epochs,
    train_size,
    criterion,
    checkpoint_folder,
):

    torch.manual_seed(seed_num)
    np.random.seed(seed_num)

    train_x, train_y = utils_processing.get_x_y(train_txt_path,
                                                train_embedding_path)
    train_x, ul_x, train_y, ul_y = train_test_split(train_x,
                                                    train_y,
                                                    train_size=train_size,
                                                    random_state=seed_num,
                                                    stratify=train_y)
    # test_x, test_y = utils_processing.get_x_y(test_txt_path, test_embedding_path)

    print(train_x.shape, train_y.shape, ul_x.shape, ul_y.shape)

    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(train_x, train_y)
    ul_y_predict = neigh.predict(ul_x)
    ul_acc = accuracy_score(ul_y, ul_y_predict)
    # print(f"{train_loss:.3f},{train_acc:.3f},{val_loss:.3f},{val_acc:.3f}\n")

    gc.collect()
    return ul_acc
Exemple #2
0
def evaluate_svm_baselines(
    train_txt_path,
    test_txt_path,
    train_embedding_path,
    test_embedding_path,
    insert_test_txt_path,
    insert_test_embedding_path,
    swap_test_txt_path,
    swap_test_embedding_path,
    delete_train_txt_path,
    delete_train_embedding_path,
    eda_train_txt_path,
    eda_train_embedding_path,
):

    train_x, train_y = utils_processing.get_x_y(train_txt_path,
                                                train_embedding_path)
    test_x, test_y = utils_processing.get_x_y(test_txt_path,
                                              test_embedding_path)
    insert_test_x, insert_test_y = utils_processing.get_x_y(
        insert_test_txt_path, insert_test_embedding_path)
    swap_test_x, swap_test_y = utils_processing.get_x_y(
        swap_test_txt_path, swap_test_embedding_path)
    delete_train_x, delete_train_y = utils_processing.get_x_y(
        delete_train_txt_path, delete_train_embedding_path)
    eda_train_x, eda_train_y = utils_processing.get_x_y(
        eda_train_txt_path, eda_train_embedding_path)

    train_eval_svm(train_x,
                   train_y,
                   test_x,
                   test_y,
                   insert_test_x,
                   insert_test_y,
                   swap_test_x,
                   swap_test_y,
                   "regular training",
                   n_reg_train_x=len(train_x))
Exemple #3
0
def train_mlp_checkpoint(
    train_txt_path,
    train_embedding_path,
    test_txt_path,
    test_embedding_path,
    num_classes,
    train_subset,
    seed_num,
    minibatch_size,
    num_epochs,
    criterion,
    checkpoint_folder,
):

    torch.manual_seed(seed_num)
    np.random.seed(seed_num)

    train_x, train_y = utils_processing.get_x_y(train_txt_path,
                                                train_embedding_path)
    test_x, test_y = utils_processing.get_x_y(test_txt_path,
                                              test_embedding_path)
    if train_subset:
        train_x, ul_x, train_y, ul_y = train_test_split(
            train_x,
            train_y,
            train_size=train_subset,
            random_state=seed_num,
            stratify=train_y)
        test_x = ul_x
        test_y = ul_y

    # print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

    model = MLP(num_classes=num_classes)
    optimizer = optim.Adam(
        params=model.parameters(), lr=0.001,
        weight_decay=0.05)  #wow, works for even large learning rates
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer,
                                                 gamma=0.9)
    autograd_hacks.add_hooks(model)
    top_group_list, bottom_group_list = [], []

    num_minibatches_train = int(train_x.shape[0] / minibatch_size)
    val_acc_list = []
    conf_acc_list = []

    if checkpoint_folder:

        epoch_output_path = checkpoint_folder.joinpath(f"e0.pt")
        epoch_output_path.parent.mkdir(parents=True, exist_ok=True)

        torch.save(obj={
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "scheduler_state_dict": scheduler.state_dict(),
            "epoch": 0,
        },
                   f=str(epoch_output_path))

    for epoch in range(1, num_epochs + 1):

        ######## training ########
        model.train(mode=True)
        train_running_loss, train_running_corrects = 0.0, 0

        train_x, train_y = shuffle(train_x, train_y, random_state=seed_num)

        for minibatch_num in range(num_minibatches_train):

            start_idx = minibatch_num * minibatch_size
            end_idx = start_idx + minibatch_size
            train_inputs = torch.from_numpy(train_x[start_idx:end_idx].astype(
                np.float32))
            train_labels = torch.from_numpy(train_y[start_idx:end_idx].astype(
                np.long))
            optimizer.zero_grad()

            # Forward and backpropagation.
            with torch.set_grad_enabled(mode=True):
                train_outputs = model(train_inputs)
                __, train_preds = torch.max(train_outputs, dim=1)
                train_loss = criterion(input=train_outputs,
                                       target=train_labels)
                train_loss.backward(retain_graph=True)
                autograd_hacks.compute_grad1(model)

                optimizer.step()
                autograd_hacks.clear_backprops(model)

            train_running_loss += train_loss.item() * train_inputs.size(0)
            train_running_corrects += int(
                torch.sum(train_preds == train_labels.data,
                          dtype=torch.double))

        train_loss = train_running_loss / (num_minibatches_train *
                                           minibatch_size)
        train_acc = train_running_corrects / (num_minibatches_train *
                                              minibatch_size)

        ######## validation ########
        model.train(mode=False)
        val_running_loss, val_running_corrects = 0.0, 0

        val_inputs = torch.from_numpy(test_x.astype(np.float32))
        val_labels = torch.from_numpy(test_y.astype(np.long))

        # Feed forward.
        with torch.set_grad_enabled(mode=False):
            val_outputs = model(val_inputs)
            val_confs, val_preds = torch.max(val_outputs, dim=1)
            val_loss = criterion(input=val_outputs, target=val_labels)
            val_loss_print = val_loss / val_inputs.shape[0]
            val_acc = accuracy_score(test_y, val_preds)
            val_acc_list.append(val_acc)

            val_preds = val_preds.tolist()
            val_confs = val_confs.numpy().tolist()
            val_threshold_idx = int(len(val_confs) / 10)
            val_conf_threshold = list(sorted(val_confs))[-val_threshold_idx]

            confident_predicted_labels = [
                val_preds[i] for i in range(len(val_confs))
                if val_confs[i] >= val_conf_threshold
            ]
            confident_ul_y = [
                ul_y[i] for i in range(len(val_confs))
                if val_confs[i] >= val_conf_threshold
            ]
            conf_acc = accuracy_score(confident_ul_y,
                                      confident_predicted_labels)
            conf_acc_list.append(conf_acc)

        # val_running_loss += val_loss.item() * val_inputs.size(0)
        # val_running_corrects += int(torch.sum(val_preds == val_labels.data, dtype=torch.double))

        # val_loss = val_running_loss / (num_minibatches_val * minibatch_size)
        # val_acc = val_running_corrects / (num_minibatches_val * minibatch_size)
        # val_acc_list.append(val_acc)

        # print(f"{train_loss:.3f},{train_acc:.3f},{val_loss_print:.3f},{val_acc:.3f}\n")

        if checkpoint_folder:

            epoch_output_path = checkpoint_folder.joinpath(
                f"e{epoch}_va{val_acc:.4f}.pt")
            epoch_output_path.parent.mkdir(parents=True, exist_ok=True)

            torch.save(obj={
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "scheduler_state_dict": scheduler.state_dict(),
                "epoch": epoch,
            },
                       f=str(epoch_output_path))

    gc.collect()
    return mean(val_acc_list[-5:]), mean(conf_acc_list[-5:])
def train_mlp_checkpoint(
    train_txt_path,
    train_embedding_path,
    test_txt_path,
    test_embedding_path,
    num_classes,
    train_subset,
    resume_checkpoint_path,
    num_epochs,
    seed_num,
    criterion,
    checkpoint_folder,
    train_minibatch_size=5,
    val_minibatch_size=256,
):

    torch.manual_seed(seed_num)
    np.random.seed(seed_num)

    train_x, train_y = utils_processing.get_x_y(train_txt_path,
                                                train_embedding_path)
    # test_x, test_y = utils_processing.get_x_y(test_txt_path, test_embedding_path)
    if train_subset:
        train_x, ul_x, train_y, ul_y = train_test_split(
            train_x,
            train_y,
            train_size=train_subset,
            random_state=seed_num,
            stratify=train_y)
        test_x = ul_x
        test_y = ul_y

    # print(train_x.shape, train_y.shape, ul_x.shape, ul_y.shape, test_x.shape, test_y.shape)

    model = MLP(num_classes=num_classes)
    optimizer = optim.Adam(
        params=model.parameters(), lr=0.001,
        weight_decay=0.05)  #wow, works for even large learning rates
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer,
                                                 gamma=0.9)

    autograd_hacks.add_hooks(model)

    train_inputs = torch.from_numpy(train_x.astype(np.float32))
    train_labels = torch.from_numpy(train_y.astype(np.long))
    optimizer.zero_grad()

    ######## first train the model ########
    num_minibatches_train = int(train_x.shape[0] / train_minibatch_size)

    for epoch in range(1, num_epochs + 1):

        ######## training ########
        model.train(mode=True)

        train_x, train_y = shuffle(train_x, train_y, random_state=seed_num)

        for minibatch_num in range(num_minibatches_train):

            start_idx = minibatch_num * train_minibatch_size
            end_idx = start_idx + train_minibatch_size
            train_inputs_mb = torch.from_numpy(
                train_x[start_idx:end_idx].astype(np.float32))
            train_labels_mb = torch.from_numpy(
                train_y[start_idx:end_idx].astype(np.long))
            optimizer.zero_grad()

            # Forward and backpropagation.
            with torch.set_grad_enabled(mode=True):
                train_outputs = model(train_inputs_mb)
                __, train_preds = torch.max(train_outputs, dim=1)
                train_loss = criterion(input=train_outputs,
                                       target=train_labels_mb)
                train_loss.backward(retain_graph=True)
                autograd_hacks.compute_grad1(model)

                optimizer.step()
                autograd_hacks.clear_backprops(model)

    ######## validation ########
    model.train(mode=False)

    val_inputs = torch.from_numpy(test_x.astype(np.float32))
    val_labels = torch.from_numpy(test_y.astype(np.long))
    val_acc_list = []
    conf_acc_list = []

    # Feed forward.
    with torch.set_grad_enabled(mode=False):
        val_outputs = model(val_inputs)
        val_confs, val_preds = torch.max(val_outputs, dim=1)
        val_loss = criterion(input=val_outputs, target=val_labels)
        val_loss_print = val_loss / val_inputs.shape[0]
        val_acc = accuracy_score(test_y, val_preds)
        val_acc_list.append(val_acc)

        val_preds = val_preds.tolist()
        val_confs = val_confs.numpy().tolist()
        val_threshold_idx = int(len(val_confs) / 10)
        val_conf_threshold = list(sorted(val_confs))[-val_threshold_idx]

        confident_predicted_labels = [
            val_preds[i] for i in range(len(val_confs))
            if val_confs[i] >= val_conf_threshold
        ]
        confident_ul_y = [
            ul_y[i] for i in range(len(val_confs))
            if val_confs[i] >= val_conf_threshold
        ]
        conf_acc = accuracy_score(confident_ul_y, confident_predicted_labels)
        conf_acc_list.append(conf_acc)

        # print(f"trained model has val acc {val_acc:.4f}, {conf_acc:.4f}")

    print(val_outputs.shape)

    ######## get train gradients ########
    model.train(mode=True)
    train_label_to_grads = {label: [] for label in range(num_classes)}

    with torch.set_grad_enabled(mode=True):
        train_outputs = model(train_inputs)
        __, train_preds = torch.max(train_outputs, dim=1)
        train_loss = criterion(input=train_outputs, target=train_labels)
        train_loss.backward(retain_graph=True)
        autograd_hacks.compute_grad1(model)

        train_grad_np = utils_grad.get_grad_np(model, global_normalize=True)
        for i in range(train_grad_np.shape[0]):
            train_grad = train_grad_np[i]
            label = int(train_labels[i])
            train_label_to_grads[label].append(train_grad)

        # optimizer.step()
        autograd_hacks.clear_backprops(model)

    ######## get unlabeled gradients ########
    ul_inputs = torch.from_numpy(ul_x.astype(np.float32))
    ul_labels = torch.from_numpy(ul_y.astype(np.long))
    optimizer.zero_grad()
    ul_grad_np_dict = {}

    for given_label in range(num_classes):

        with torch.set_grad_enabled(mode=True):
            ul_outputs = model(ul_inputs)
            __, ul_preds = torch.max(ul_outputs, dim=1)
            given_ul_labels = torch.from_numpy(
                (np.zeros(ul_labels.shape) + given_label).astype(np.long))
            ul_loss = criterion(input=ul_outputs, target=given_ul_labels)
            ul_loss.backward(retain_graph=True)
            autograd_hacks.compute_grad1(model)

            grad_np = utils_grad.get_grad_np(model, global_normalize=True)
            ul_grad_np_dict[given_label] = grad_np

            # optimizer.step()
            autograd_hacks.clear_backprops(model)

    ### for a given unlabeled example,
    ### try both labels and see which gradient produced is closer to an existing gradient
    def get_grad_comparison(given_label_to_grad, train_label_to_grads):
        label_to_max_sim = {}
        for label, train_grads in train_label_to_grads.items():
            grad_from_given_label = given_label_to_grad[label]
            sim_list = [
                np.dot(grad_from_given_label, train_grad)
                for train_grad in train_grads
            ]
            sim_list_sorted = list(sorted(sim_list))
            max_sim = mean(sim_list_sorted)
            label_to_max_sim[label] = max_sim
        signed_sim_diff = label_to_max_sim[1] - label_to_max_sim[0]
        sorted_label_to_max_sim = list(
            sorted(label_to_max_sim.items(), key=lambda x: x[1]))
        label, max_sim = sorted_label_to_max_sim[-1]
        sim_diff = sorted_label_to_max_sim[-1][-1] - sorted_label_to_max_sim[
            0][-1]
        return label, max_sim, sim_diff, signed_sim_diff

    predicted_labels = []
    sim_diff_list = []

    val_conf_i_list = []
    signed_sim_diff_list = []

    for i in range(ul_inputs.shape[0]):
        given_label_to_grad = {k: v[i] for k, v in ul_grad_np_dict.items()}
        predicted_label, max_sim, sim_diff, signed_sim_diff = get_grad_comparison(
            given_label_to_grad, train_label_to_grads)
        predicted_labels.append(predicted_label)
        sim_diff_list.append(sim_diff)

        val_conf_i_list.append(float(val_outputs[i, 0]))
        signed_sim_diff_list.append(signed_sim_diff)

    utils_common.plot_jasons_scatterplot(val_conf_i_list, signed_sim_diff_list,
                                         'plots/conf_and_grad_sim_500.png',
                                         'Confidence', 'Grad Sim - Mean',
                                         'Confidence and Grad Sim - Mean')

    predicted_labels = np.asarray(predicted_labels)
    acc = accuracy_score(ul_y, predicted_labels)
    sim_diff_threshold_idx = int(len(sim_diff_list) / 10)
    sim_diff_threshold = list(sorted(sim_diff_list))[-sim_diff_threshold_idx]

    confident_predicted_labels = [
        predicted_labels[i] for i in range(len(sim_diff_list))
        if sim_diff_list[i] >= sim_diff_threshold
    ]
    confident_ul_y = [
        ul_y[i] for i in range(len(sim_diff_list))
        if sim_diff_list[i] >= sim_diff_threshold
    ]
    conf_acc = accuracy_score(confident_ul_y, confident_predicted_labels)

    gc.collect()
    return acc, conf_acc, mean(val_acc_list[-5:]), mean(conf_acc_list[-5:])
def train_mlp_checkpoint(
    train_txt_path,
    train_embedding_path,
    test_txt_path,
    test_embedding_path,
    num_classes,
    train_subset,
    resume_checkpoint_path,
    num_epochs,
    seed_num,
    criterion,
    checkpoint_folder,
    val_minibatch_size=256,
):

    torch.manual_seed(seed_num)
    np.random.seed(seed_num)

    train_x, train_y = utils_processing.get_x_y(train_txt_path,
                                                train_embedding_path)
    test_x, test_y = utils_processing.get_x_y(test_txt_path,
                                              test_embedding_path)
    if train_subset:
        train_x, ul_x, train_y, ul_y = train_test_split(
            train_x,
            train_y,
            train_size=train_subset,
            random_state=42,
            stratify=train_y)

    print(train_x.shape, train_y.shape, ul_x.shape, ul_y.shape, test_x.shape,
          test_y.shape)

    model = Net(num_classes=num_classes)
    optimizer = optim.Adam(
        params=model.parameters(), lr=0.001,
        weight_decay=0.05)  #wow, works for even large learning rates
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer,
                                                 gamma=0.9)

    # if resume_checkpoint_path:
    #     ckpt = torch.load(f=resume_checkpoint_path)
    #     model.load_state_dict(state_dict=ckpt["model_state_dict"])
    #     optimizer.load_state_dict(state_dict=ckpt["optimizer_state_dict"])
    #     scheduler.load_state_dict(state_dict=ckpt["scheduler_state_dict"])
    #     print(f"loaded from {resume_checkpoint_path}")

    autograd_hacks.add_hooks(model)
    model.train(mode=True)

    ######## training ########
    train_inputs = torch.from_numpy(train_x.astype(np.float32))
    train_labels = torch.from_numpy(train_y.astype(np.long))
    optimizer.zero_grad()
    train_gradients = {
        k: {k: []
            for k in range(num_classes)}
        for k in range(num_classes)
    }

    for _ in range(10):

        with torch.set_grad_enabled(mode=True):
            train_outputs = model(train_inputs)
            __, train_preds = torch.max(train_outputs, dim=1)
            train_loss = criterion(input=train_outputs, target=train_labels)
            train_loss.backward(retain_graph=True)
            autograd_hacks.compute_grad1(model)

            optimizer.step()

            idx_to_grad = utils_grad.get_idx_to_grad(model,
                                                     global_normalize=True)
            print(idx_to_grad[1][:5])
            for idx, gradient in idx_to_grad.items():
                gt_label = int(train_labels[idx])
                given_label = gt_label
                train_gradients[gt_label][given_label].append(gradient)

            autograd_hacks.clear_backprops(model)
            # print('\n', idx_to_grad[1][-5:])

        # ######## ul ########
        ul_inputs = torch.from_numpy(ul_x.astype(np.float32))
        ul_labels = torch.from_numpy(ul_y.astype(np.long))
        optimizer.zero_grad()
        ul_gradients = {
            k: {k: []
                for k in range(num_classes)}
            for k in range(num_classes)
        }

        for given_label in range(num_classes):

            # Forward and backpropagation.
            with torch.set_grad_enabled(mode=True):
                ul_outputs = model(ul_inputs)
                __, ul_preds = torch.max(ul_outputs, dim=1)
                given_ul_labels = torch.from_numpy(
                    (np.zeros(ul_labels.shape) + given_label).astype(np.long))
                ul_loss = criterion(input=ul_outputs, target=given_ul_labels)
                ul_loss.backward(retain_graph=True)
                autograd_hacks.compute_grad1(model)

                idx_to_grad = utils_grad.get_idx_to_grad(model)
                print(idx_to_grad[1][:5])
                for idx, gradient in idx_to_grad.items():
                    gt_label = int(ul_labels[idx])
                    ul_gradients[gt_label][given_label].append(gradient)

                # optimizer.step()
                autograd_hacks.clear_backprops(model)

    # for given_label in range(num_classes):
    #     gt_grad_list = train_gradients[given_label][given_label]

    #     correct_gt_label = given_label
    #     correct_candidate_grad_list = ul_gradients[correct_gt_label][given_label]

    #     wrong_gt_label = 0 if given_label == 1 else 1
    #     wrong_candidate_grad_list = ul_gradients[wrong_gt_label][given_label]

    #     correct_agreement_list = utils_grad.get_agreement_list_avg(gt_grad_list, correct_candidate_grad_list)
    #     wrong_agreement_list = utils_grad.get_agreement_list_avg(gt_grad_list, wrong_candidate_grad_list)

    #     output_file = Path(f"plots/agreement_dist_avg_given={given_label}.png")
    #     utils_grad.plot_jasons_histogram(correct_agreement_list, wrong_agreement_list, output_file)
    #     print(gt_label, given_label, mean(correct_agreement_list), mean(wrong_agreement_list))

    ####### validation ########

        minibatch_size = 128
        num_minibatches_val = int(test_x.shape[0] / minibatch_size)
        model.train(mode=False)
        val_running_loss, val_running_corrects = 0.0, 0

        for minibatch_num in range(num_minibatches_val):

            start_idx = minibatch_num * minibatch_size
            end_idx = start_idx + minibatch_size
            val_inputs = torch.from_numpy(test_x[start_idx:end_idx].astype(
                np.float32))
            val_labels = torch.from_numpy(test_y[start_idx:end_idx].astype(
                np.long))

            # Feed forward.
            with torch.set_grad_enabled(mode=False):
                val_outputs = model(val_inputs)
                _, val_preds = torch.max(val_outputs, dim=1)
                val_loss = criterion(input=val_outputs, target=val_labels)
            val_running_loss += val_loss.item() * val_inputs.size(0)
            val_running_corrects += int(
                torch.sum(val_preds == val_labels.data, dtype=torch.double))

        val_loss = val_running_loss / (num_minibatches_val * minibatch_size)
        val_acc = val_running_corrects / (num_minibatches_val * minibatch_size)

        print(f"{val_loss:.3f},{val_acc:.3f}\n")

    gc.collect()
Exemple #6
0
def train_mlp(
    train_txt_path,
    train_embedding_path,
    test_txt_path,
    test_embedding_path,
    num_classes,
    train_size,
    seed_num,
    minibatch_size,
    num_epochs,
):

    torch.manual_seed(seed_num)
    np.random.seed(seed_num)

    # get all the data
    train_x, train_y, _ = utils_processing.get_split_train_x_y(
        train_txt_path, train_size, seed_num)
    test_x, test_y = utils_processing.get_x_y(test_txt_path,
                                              test_embedding_path)
    # print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

    model = MLP(num_classes=num_classes)
    optimizer = optim.Adam(
        params=model.parameters(), lr=0.001,
        weight_decay=0.05)  #wow, works for even large learning rates
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer,
                                                 gamma=0.95)

    num_minibatches_train = int(train_x.shape[0] / minibatch_size)
    val_acc_list = []

    ######## training loop ########
    for epoch in range(1, num_epochs + 1):

        ######## training ########
        model.train(mode=True)

        train_x, train_y = shuffle(train_x, train_y, random_state=seed_num)

        for minibatch_num in range(num_minibatches_train):

            start_idx = minibatch_num * minibatch_size
            end_idx = start_idx + minibatch_size
            train_inputs = torch.from_numpy(train_x[start_idx:end_idx].astype(
                np.float32))
            train_labels = torch.from_numpy(train_y[start_idx:end_idx].astype(
                np.long))

            # Forward and backpropagation.
            with torch.set_grad_enabled(mode=True):

                train_outputs = model(train_inputs)
                train_conf, train_preds = torch.max(train_outputs, dim=1)
                train_loss = nn.CrossEntropyLoss()(input=train_outputs,
                                                   target=train_labels)
                train_loss.backward()
                optimizer.step()
                optimizer.zero_grad()

        ######## validation ########
        model.train(mode=False)

        val_inputs = torch.from_numpy(test_x.astype(np.float32))
        val_labels = torch.from_numpy(test_y.astype(np.long))

        # Feed forward.
        with torch.set_grad_enabled(mode=False):
            val_outputs = model(val_inputs)
            val_confs, val_preds = torch.max(val_outputs, dim=1)
            val_loss = nn.CrossEntropyLoss()(input=val_outputs,
                                             target=val_labels)
            val_loss_print = val_loss / val_inputs.shape[0]
            val_acc = accuracy_score(test_y, val_preds)
            val_acc_list.append(val_acc)

    gc.collect()
    return mean(val_acc_list[-5:])
def train_mlp(
    train_txt_path,
    train_embedding_path,
    test_txt_path,
    test_embedding_path,
    flip_ratio,
    num_classes,
    top_k,
    annealling,
    seed_num,
    performance_writer,
    ranking_writer,
    minibatch_size,
    num_epochs,
    criterion,
):

    torch.manual_seed(seed_num)
    np.random.seed(seed_num)

    train_x, train_y = utils_processing.get_x_y(train_txt_path,
                                                train_embedding_path)
    test_x, test_y = utils_processing.get_x_y(test_txt_path,
                                              test_embedding_path)

    # print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

    model = Net(num_classes=num_classes)
    optimizer = optim.Adam(
        params=model.parameters(), lr=0.001,
        weight_decay=0.05)  #wow, works for even large learning rates
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer=optimizer,
                                                 gamma=0.9)
    autograd_hacks.add_hooks(model)
    top_group_list, bottom_group_list = [], []

    num_minibatches_train = int(train_x.shape[0] / minibatch_size)
    num_minibatches_val = int(test_x.shape[0] / minibatch_size)
    val_acc_list = []

    for epoch in tqdm(range(1, num_epochs + 1)):

        ######## training ########
        model.train(mode=True)
        train_running_loss, train_running_corrects = 0.0, 0

        train_x, train_y, train_y_orig = shuffle(train_x,
                                                 train_y,
                                                 train_y_orig,
                                                 random_state=seed_num)

        for minibatch_num in range(num_minibatches_train):

            start_idx = minibatch_num * minibatch_size
            end_idx = start_idx + minibatch_size
            train_inputs = torch.from_numpy(train_x[start_idx:end_idx].astype(
                np.float32))
            train_labels = torch.from_numpy(train_y[start_idx:end_idx].astype(
                np.long))
            idx_to_gt = utils_mlp_helper.get_idx_to_gt(train_y_orig, start_idx,
                                                       minibatch_size)
            flipped_indexes = utils_mlp_helper.get_flipped_indexes(
                train_y, train_y_orig, minibatch_num, minibatch_size)
            optimizer.zero_grad()

            # Forward and backpropagation.
            with torch.set_grad_enabled(mode=True):
                train_outputs = model(train_inputs)
                __, train_preds = torch.max(train_outputs, dim=1)
                train_loss = criterion(input=train_outputs,
                                       target=train_labels)
                train_loss.backward(retain_graph=True)
                autograd_hacks.compute_grad1(model)

                #update with weighted gradient
                if True:

                    idx_to_grad = utils_grad.get_idx_to_grad(model)
                    print(len(idx_to_grad))

                optimizer.step()
                autograd_hacks.clear_backprops(model)

            train_running_loss += train_loss.item() * train_inputs.size(0)
            train_running_corrects += int(
                torch.sum(train_preds == train_labels.data,
                          dtype=torch.double))

        train_loss = train_running_loss / (num_minibatches_train *
                                           minibatch_size)
        train_acc = train_running_corrects / (num_minibatches_train *
                                              minibatch_size)

        ######## validation ########
        model.train(mode=False)
        val_running_loss, val_running_corrects = 0.0, 0

        for minibatch_num in range(num_minibatches_val):

            start_idx = minibatch_num * minibatch_size
            end_idx = start_idx + minibatch_size
            val_inputs = torch.from_numpy(test_x[start_idx:end_idx].astype(
                np.float32))
            val_labels = torch.from_numpy(test_y[start_idx:end_idx].astype(
                np.long))

            # Feed forward.
            with torch.set_grad_enabled(mode=False):
                val_outputs = model(val_inputs)
                _, val_preds = torch.max(val_outputs, dim=1)
                val_loss = criterion(input=val_outputs, target=val_labels)
            val_running_loss += val_loss.item() * val_inputs.size(0)
            val_running_corrects += int(
                torch.sum(val_preds == val_labels.data, dtype=torch.double))

        val_loss = val_running_loss / (num_minibatches_val * minibatch_size)
        val_acc = val_running_corrects / (num_minibatches_val * minibatch_size)
        val_acc_list.append(val_acc)

        # performance_writer.write(f"{train_loss:.3f},{train_acc:.3f},{val_loss:.3f},{val_acc:.3f}\n")
        print(
            f"{train_loss:.3f},{train_acc:.3f},{val_loss:.3f},{val_acc:.3f}\n")

    gc.collect()
    return mean(val_acc_list[-5:])
Exemple #8
0
def evaluate_svm_big_ablation(
    train_txt_path,
    test_txt_path,
    train_embedding_path,
    test_embedding_path,
    insert_train_txt_path,
    # insert_test_txt_path,
    insert_train_embedding_path,
    # insert_test_embedding_path,
    swap_train_txt_path,
    # swap_test_txt_path,
    swap_train_embedding_path,
    # swap_test_embedding_path,
):

    train_x, train_y = utils_processing.get_x_y(train_txt_path,
                                                train_embedding_path)
    test_x, test_y = utils_processing.get_x_y(test_txt_path,
                                              test_embedding_path)
    insert_train_x, insert_train_y = utils_processing.get_x_y(
        insert_train_txt_path, insert_train_embedding_path)
    # insert_test_x, insert_test_y = utils_processing.get_x_y(insert_test_txt_path, insert_test_embedding_path)
    swap_train_x, swap_train_y = utils_processing.get_x_y(
        swap_train_txt_path, swap_train_embedding_path)
    # swap_test_x, swap_test_y = utils_processing.get_x_y(swap_test_txt_path, swap_test_embedding_path)
    # insertswap_aug_train_x = np.concatenate((insert_train_x, swap_train_x), axis=0)
    # insertswap_aug_train_y = np.concatenate((insert_train_y, swap_train_y), axis=0)

    for aug_train_x, aug_train_y, aug_type in [
        (insert_train_x, insert_train_y, 'delete'),
        (swap_train_x, swap_train_y, 'eda'),
            # (insertswap_aug_train_x, insertswap_aug_train_y, 'insertswap'),
    ]:

        # train_eval_svm( aug_train_x, aug_train_y,
        #                 test_x, test_y,
        #                 insert_test_x, insert_test_y,
        #                 swap_test_x, swap_test_y,
        #                 f"only {aug_type}",
        #                 n_reg_train_x = len(train_x)
        #                 )

        for reg_ratio in [0.5
                          ]:  #[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:

            combined_train_x, combined_train_y = combine_training_sets(
                train_x,
                train_y,
                aug_train_x,
                aug_train_y,
                reg_ratio=reg_ratio)
            train_eval_svm(
                combined_train_x,
                combined_train_y,
                test_x,
                test_y,
                # insert_test_x, insert_test_y,
                # swap_test_x, swap_test_y,
                f"reg {reg_ratio} + {aug_type}",
                n_reg_train_x=len(train_x))
            gc.collect()