def MakePerceptrons(train, test, label):
    """ Create and evaluate different types of perceptrons

    Arguments:
        train   - pandas.DataFrame containing the training data

        test    - pandas.DataFrame containing the testing data

        label   - Name of the label column in the DataFrame's
    
    Returns:
        (List of Perceptron, List of AveragedPerceptron,
         List of KernelizedPerceptron) generated for the data
    """

    true = test[label].values
    pts = []
    apts = []
    kpts = []

    for i in [1,5,10,25,50]:
        print "i = " + str(i)
        print "PT working"
        ptron = pt.Perceptron(train, i)
        pred = ptron.predict(test, label)
        acc = an.accuracy(pred, true)
        pts.append((i, ptron, acc))
        print "PT " + str(acc)

        print "APT working"
        ap = apt.AveragedPerceptron(train, i)
        pred = ap.predict(test, label)
        acc = an.accuracy(pred, true)
        apts.append((i, ap, acc))
        print "APT " + str(acc)

        print "KPT working"
        kp = kpt.KernelizedPerceptron(train, label, i, 2)
        pred = kp.predict(test, label)
        acc = an.accuracy(pred, true)
        kpts.append((i, kp, acc))
        print "KPT " + str(acc)

    p.dump(pts, open('classifiers/perceptrons.p','wb'))
    p.dump(apts, open('classifiers/averaged_perceptrons.p','wb'))
    p.dump(kpts, open('classifiers/kernelized_perceptrons.p','wb'))

    return pts, apts, kpts
Exemple #2
0
    def __fit_grad_boost(self):
        print("\tFitting a gradient boosting machine")

        if self.optimize:
            print("\t\tFinding optimal hyperparameter values")

            grid = {"n_estimators": [250, 500, 750, 1000],
                    "learning_rate": [1, .1, .05, .01],
                    "max_depth": [3, 8, 12, 15],
                    }
            models = []
            for n in grid["n_estimators"]:
                for lr in grid["learning_rate"]:
                    for d in grid["max_depth"]:
                            grad_boost = GradientBoostingClassifier(n_estimators=n, learning_rate=lr, max_depth=d)
                            grad_boost.fit(self.scaler.transform(self.x_train), self.y_train)

                            y_pred = grad_boost.predict(self.scaler.transform(self.x_test))

                            models.append({"model": grad_boost, "accuracy": analysis.accuracy(y_pred, self.y_test), "hyperparameters": {"n_estimators": n, "learning_rate": lr, "max_depth": d}})

            best_model = max(models, key=lambda model: model["accuracy"])

            hyperparam_vals = best_model["hyperparameters"]
            print("\t\t\tNumber of estimators: " + str(hyperparam_vals["n_estimators"]))
            print("\t\t\tLearning rate: " + str(hyperparam_vals["learning_rate"]))
            print("\t\t\tMax depth: " + str(hyperparam_vals["max_depth"]))

            return best_model["model"]
        else:
            grad_boost = GradientBoostingClassifier(n_estimators=1000, learning_rate=.01, max_depth=8)
            grad_boost.fit(self.scaler.transform(self.x_train), self.y_train)

            return grad_boost
Exemple #3
0
    def __fit_rand_forest(self):
        print("\tFitting a random forest algorithm")

        if self.optimize:
            print("\t\tFinding optimal hyperparameter values")

            grid = {"n_estimators": [250, 500, 750, 1000]}
            models = []
            for n in grid["n_estimators"]:
                            rand_forest = RandomForestClassifier(n_estimators=n)
                            rand_forest.fit(self.scaler.transform(self.x_train), self.y_train)

                            y_pred = rand_forest.predict(self.scaler.transform(self.x_test))

                            models.append({"model": rand_forest, "accuracy": analysis.accuracy(y_pred, self.y_test), "hyperparameters": {"n_estimators": n}})

            best_model = max(models, key=lambda model: model["accuracy"])

            print("\t\t\tNumber of estimators: " + str(best_model["hyperparameters"]["n_estimators"]))

            return best_model["model"]
        else:
            rand_forest = RandomForestClassifier(n_estimators=500)
            rand_forest.fit(self.scaler.transform(self.x_train), self.y_train)

            return rand_forest
Exemple #4
0
def run_epoch_simple(model, optimizer, loader, loss_meter, acc_meter,
                     criterion, args, is_training):
    """
    A -> Y: Predicting class labels using only attributes with MLP
    """
    if is_training:
        model.train()
    else:
        model.eval()
    for _, data in enumerate(loader):
        inputs, labels = data
        if isinstance(inputs, list):
            # inputs = [i.long() for i in inputs]
            inputs = torch.stack(inputs).t().float()
        inputs = torch.flatten(inputs, start_dim=1).float()
        inputs_var = torch.autograd.Variable(inputs)
        inputs_var = inputs_var.cuda() if torch.cuda.is_available(
        ) else inputs_var
        labels_var = torch.autograd.Variable(labels)
        labels_var = labels_var.cuda() if torch.cuda.is_available(
        ) else labels_var

        outputs = model(inputs_var)
        loss = criterion(outputs, labels_var)
        acc = accuracy(outputs, labels, topk=(1, ))
        loss_meter.update(loss.item(), inputs.size(0))
        acc_meter.update(acc[0], inputs.size(0))

        if is_training:
            optimizer.zero_grad()  # zero the parameter gradients
            loss.backward()
            optimizer.step()  # optimizer step to update parameters
    return loss_meter, acc_meter
def LoadAndTreeEval():
    """ Load the data and create a decision tree
    
    Returns:
        DTree generated for the data
    """

    train, test, tune = l.CreateDataFrames('data/Build229Data.txt', 'data/FeatureNames.txt')
    tree = dt.get_tree(train, 'Winner', 10)
    p.dump(tree, open('classifiers/dtree03.p','wb'))
    pred = tree.predict(test)
    true = test['Winner'].values
    print an.accuracy(pred, true)
    print an.f1_score(pred, true)

    return tree
Exemple #6
0
    def __rolling_window_test(self, data, window_size, test_size, step=1):
        print("\t\tRolling Window Validation Results:")

        # TODO: Hide the STDOUT of pp.split() and __fit_model(), and prevent __fit_model() from saving a .pkl on each run

        windows = [data.loc[idx * step:(idx * step) + round(window_size * len(data))] for idx in range(int((len(data) - round(window_size * len(data))) / step))]
        decoupled_windows = [pp.split(window, test_size=test_size, balanced=False) for window in windows] # TODO: Do a nonrandom split to respect the temporal order of observations

        results = {"accuracy": [], "precision": [], "specificity": [], "sensitivity": []}
        for feature_set in decoupled_windows:
            self.x_train, self.x_test, self.y_train, self.y_test = feature_set

            self.scaler = StandardScaler()
            self.scaler.fit(self.x_train)

            self.__fit_model()

            self.y_pred = self.model.predict(self.scaler.transform(self.x_test))

            results["accuracy"].append(analysis.accuracy(self.y_pred, self.y_test))
            results["precision"].append(analysis.precision(self.y_pred, self.y_test))
            results["specificity"].append(analysis.specificity(self.y_pred, self.y_test))
            results["sensitivity"].append(analysis.sensitivity(self.y_pred, self.y_test))

        print("\t\t\tAccuracy: ", str(sum(results["accuracy"]) / float(len(results["accuracy"]))))
        print("\t\t\tPrecision: ", str(sum(results["precision"]) / float(len(results["precision"]))))
        print("\t\t\tSpecificity: ", str(sum(results["specificity"]) / float(len(results["specificity"]))))
        print("\t\t\tSensitivity: ", str(sum(results["sensitivity"]) / float(len(results["sensitivity"]))))
Exemple #7
0
    def __holdout_test(self):
        """Calculates the model's classification accuracy, sensitivity, precision, and specificity."""
        print("\t\tHoldout Validation Results:")

        print("\t\t\tAccuracy: ", analysis.accuracy(self.y_pred, self.y_test))
        print("\t\t\tPrecision: ", analysis.precision(self.y_pred, self.y_test))
        print("\t\t\tSpecificity: ", analysis.specificity(self.y_pred, self.y_test))
        print("\t\t\tSensitivity: ", analysis.sensitivity(self.y_pred, self.y_test))
Exemple #8
0
    def __fit_log_reg(self):
        print("\tFitting a logistic regression algorithm")

        if self.optimize:
            print("\t\tFinding optimal hyperparameter values")

            grid = {
                "penalty": ["l1", "l2"],
                "tol": [.00001, .0001, .001, .01, .1],
                "C": [.01, .1, 1.0, 10, 100, 1000],
                "max_iter": [100, 150, 175, 200, 300, 500]
            }
            models = []
            for p in grid["penalty"]:
                for t in grid["tol"]:
                    for c in grid["C"]:
                        for i in grid["max_iter"]:
                            log_reg = LogisticRegression(penalty=p,
                                                         tol=t,
                                                         C=c,
                                                         max_iter=i)
                            log_reg.fit(self.scaler.transform(self.x_train),
                                        self.y_train)

                            y_pred = log_reg.predict(
                                self.scaler.transform(self.x_test))

                            models.append({
                                "model":
                                log_reg,
                                "accuracy":
                                analysis.accuracy(y_pred, self.y_test),
                                "hyperparameters": {
                                    "penalty": p,
                                    "tol": t,
                                    "C": c,
                                    "max_iter": i
                                }
                            })

            best_model = max(models, key=lambda model: model["accuracy"])

            hyperparam_vals = best_model["hyperparameters"]
            print("\t\t\tPenalization norm: " + hyperparam_vals["penalty"])
            print("\t\t\tTolerance: " + str(hyperparam_vals["tol"]))
            print("\t\t\tRegularization: " + str(hyperparam_vals["C"]))
            print("\t\t\tMax iterations: " + str(hyperparam_vals["max_iter"]))

            return best_model["model"]
        else:
            log_reg = LogisticRegression(penalty="l2",
                                         tol=.01,
                                         C=10,
                                         max_iter=100)
            log_reg.fit(self.scaler.transform(self.x_train), self.y_train)

            return log_reg
    def __holdout_test(self):
        """Calculates the model's classification accuracy, recall, precision, and specificity."""
        print("\t\tHoldout Validation Results:")

        print("\t\t\tAccuracy: ", analysis.accuracy(self.y_test, self.y_pred))
        print("\t\t\tPrecision: ",
              analysis.precision(self.y_test, self.y_pred, weighted_avg=True))
        print("\t\t\tRecall: ",
              analysis.recall(self.y_test, self.y_pred, weighted_avg=True))
        print("\t\t\tF1: ", analysis.f1(self.y_test, self.y_pred))
def predict(dataset, nb_teachers, teacher_id):
    if dataset == 'mnist':
        train_data, train_labels, test_data, test_labels = Input.load_mnist()
    filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
    ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename

    ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1)
    # 读取教师模型对测试数据进行验证
    teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final)
    precision = analysis.accuracy(teacher_preds, test_labels)
    print('Precision of teacher after training: ' + str(precision))
Exemple #11
0
def test_accuracy_with_partial_overlap():
    o_mask = np.zeros((25, 25, 1))
    o_mask[5:20, 5:20, 0] = 1

    i_mask = np.zeros((25, 25, 1))
    i_mask[10:15, 10:15, 0] = 1

    pred_i_mask = np.zeros((25, 25, 1))
    pred_i_mask[9:14, 10:15, 0] = 1

    score = analysis.accuracy(o_mask, i_mask, pred_i_mask)
    assert math.isclose(score, .955, rel_tol=1e-3, abs_tol=0.0)
Exemple #12
0
def test_accuracy_with_complete_overlap():
    o_mask = np.zeros((25, 25, 1))
    o_mask[5:20, 5:20, 0] = 1

    i_mask = np.zeros((25, 25, 1))
    i_mask[10:15, 10:15, 0] = 1

    pred_i_mask = np.zeros((25, 25, 1))
    pred_i_mask[i_mask == 1] = 1

    score = analysis.accuracy(o_mask, i_mask, pred_i_mask)
    assert score == 1.0
Exemple #13
0
def test_accuracy_with_no_overlap():
    o_mask = np.zeros((25, 25, 1))
    o_mask[5:20, 5:20, 0] = 1

    i_mask = np.zeros((25, 25, 1))
    i_mask[15:20, 15:20, 0] = 1

    pred_i_mask = np.ones((25, 25, 1))
    pred_i_mask[i_mask == 1] = 0

    score = analysis.accuracy(o_mask, i_mask, pred_i_mask)
    assert score == 0.0
    def __fit_support_vector_classifier(self):
        print("\tFitting a support vector classifier")

        if self.optimize:
            print("\t\tFinding optimal hyperparameter values")

            grid = {
                "kernel": ["linear", "poly", "rbf", "sigmoid", "precomputed"],
                "probability": [True, False],
                "tol": [1e-5, 1e-4, 1e-3],
            }
            models = []
            for ker in grid["kernel"]:
                for prob in grid["probability"]:
                    for tolerance in grid["tol"]:
                        svc = SupportVectorClassifier(kernel=ker,
                                                      probability=prob,
                                                      tol=tolerance)
                        svc.fit(self.x_train, self.y_train)

                        y_pred = svc.predict(self.x_test)

                        models.append({
                            "model":
                            svc,
                            "accuracy":
                            analysis.accuracy(y_pred, self.y_test),
                            "hyperparameters": {
                                "kernel": ker,
                                "probability": prob,
                                "tol": tolerance
                            }
                        })

            best_model = max(models, key=lambda model: model["accuracy"])

            hyperparam_vals = best_model["hyperparameters"]
            print("\t\t\tNumber of estimators: " +
                  str(hyperparam_vals["n_estimators"]))
            print("\t\t\tLearning rate: " +
                  str(hyperparam_vals["learning_rate"]))
            print("\t\t\tMax depth: " + str(hyperparam_vals["max_depth"]))

            return best_model["model"]
        else:
            svc = SupportVectorClassifier(kernel="poly",
                                          probability=True,
                                          tol=1e-5,
                                          verbose=1)
            svc.fit(self.x_train, self.y_train)

            return svc
    def __rolling_window_test(self, data, window_size, test_size, step=1):
        print("\t\tRolling Window Validation Results:")

        # TODO: Hide the STDOUT of pp.split() and __fit_model(), and prevent __fit_model() from saving a .pkl on each run

        windows = [
            data.loc[idx * step:(idx * step) + round(window_size * len(data))]
            for idx in range(
                int((len(data) - round(window_size * len(data))) / step))
        ]
        decoupled_windows = [
            pp.split(window, test_size=test_size, balanced=False)
            for window in windows
        ]

        results = {"accuracy": [], "precision": [], "f1": [], "recall": []}
        for feature_set in decoupled_windows:
            self.x_train, self.x_test, self.y_train, self.y_test = feature_set

            self.__fit_model()

            self.y_pred = self.model.predict(self.x_test)

            results["accuracy"].append(
                analysis.accuracy(self.y_test, self.y_pred))
            results["precision"].append(
                analysis.precision(self.y_test, self.y_pred,
                                   weighted_avg=True))
            results["recall"].append(
                analysis.recall(self.y_test, self.y_pred, weighted_avg=True))
            results["f1"].append(analysis.f1(self.y_test, self.y_pred))

        print("\t\t\tAccuracy: ",
              str(sum(results["accuracy"]) / float(len(results["accuracy"]))))
        print(
            "\t\t\tPrecision: ",
            str(sum(results["precision"]) / float(len(results["precision"]))))
        print("\t\t\tRecall: ",
              str(sum(results["recall"]) / float(len(results["recall"]))))
        print("\t\t\tF1: ",
              str(sum(results["f1"]) / float(len(results["f1"]))))
def main():
    parser = argparse.ArgumentParser(
        description='Evaluate a model on a test file.')
    parser.add_argument('--model', help='Path to the model file.')
    parser.add_argument('--test_file', help='Path to the test file.')
    args = parser.parse_args()

    # Load the test data.
    xtest, ytest = dataproc.load_data(args.test_file)
    ytest = dataproc.to_one_hot(ytest, int(1 + np.max(ytest[0, :])))

    # Load the mlp.
    nn = mlp.MLP.load_mlp(args.model)

    # Apply the model.
    yhat = nn.eval(xtest)

    # Print the stats.
    print('mse:  %f' % (analysis.mse(ytest, yhat)))
    print('mce:  %f' % (analysis.mce(ytest, yhat)))
    print('acc:  %f' % (analysis.accuracy(ytest, yhat) * 100))
def train_teacher(dataset, nb_teachers, teacher_id):
    """
    训练指定ID的教师模型
    :param dataset: 数据集名称
    :param nb_teachers: 老师数量
    :param teacher_id: 老师ID
    :return:
    """
    # 如果目录不存在就创建对应的目录
    assert Input.create_dir_if_needed(FLAGS.data_dir)
    assert Input.create_dir_if_needed(FLAGS.train_dir)
    # 加载对应的数据集
    if dataset == 'mnist':
        train_data, train_labels, test_data, test_labels = Input.load_mnist()
    else:
        print("没有对应的数据集")
        return False

    # 给对应的老师分配对应的数据
    data, labels = Input.partition_dataset(train_data, train_labels,
                                           nb_teachers, teacher_id)
    print("Length of training data: " + str(len(labels)))

    filename = str(nb_teachers) + '_teachers_' + str(teacher_id) + '.ckpt'
    ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + filename

    # 开始训练,并保存训练模型
    assert deep_cnn.train(data, labels, ckpt_path)

    # 拼接得到训练后的模型位置
    ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1)

    # 读取教师模型对测试数据进行验证
    teacher_preds = deep_cnn.softmax_preds(test_data, ckpt_path_final)
    # 计算教师模型准确率
    precision = analysis.accuracy(teacher_preds, test_labels)
    print('Precision of teacher after training: ' + str(precision))

    return True
Exemple #18
0
def train_student(dataset, nb_teachers):

    assert Input.create_dir_if_needed(FLAGS.train_dir)

    # 准备学生模型数据
    student_dataset = prepare_student_data(dataset,nb_teachers,save=True)
    # 解压学生数据
    stdnt_data, stdnt_labels, stdnt_test_data, stdnt_test_labels = student_dataset

    ckpt_path = FLAGS.train_dir + '/' + str(dataset) + '_' + str(nb_teachers) + '_student.ckpt'
    # 训练
    assert deep_cnn.train(stdnt_data, stdnt_labels, ckpt_path)

    ckpt_path_final = ckpt_path + '-' + str(FLAGS.max_steps - 1)

    # 预测
    student_preds = deep_cnn.softmax_preds(stdnt_test_data,ckpt_path_final)

    precision = analysis.accuracy(student_preds,stdnt_test_labels)
    print('Precision of student after training: ' + str(precision))

    return True
Exemple #19
0
def prepare_student_data(dataset, nb_teachers, save):
    """
    准备学生模型数据,在这里进行聚合的修改
    """
    assert Input.create_dir_if_needed(FLAGS.train_dir)

    if dataset == 'mnist':
        test_data,test_labels = Input.load_mnist(test_only=True)
    else:
        return False

    assert FLAGS.stdnt_share < len(test_data)
    stdnt_data = test_data[:FLAGS.stdnt_share]
    # 得到的数据是 教师id,无标签数据个数,以及每个标签的概率
    teacher_preds = ensemble_preds(dataset,nb_teachers,stdnt_data)
    # 得到教师模型聚合后的结果 不可信的数据标签为-1
    student_labels = Aggregation.noisy_max_plus(teacher_preds,FLAGS.lap_scale,reliability=0.1,gap=10)
    ans_labels = test_labels[:FLAGS.stdnt_share]
    indexs = [i for i in range(len(student_labels)) if student_labels[i] == -1]
    print("the -1 indexs are")
    print(indexs)
    # 删除对应元素
    student_data = test_data[:FLAGS.stdnt_share]
    student_data = np.delete(student_data,indexs,axis=0)
    print("len of student_data is"+str(len(student_data)))
    ans_labels = np.delete(ans_labels,indexs)
    student_labels = np.delete(student_labels,indexs)
    print("len of student_labels is"+str(len(student_labels)))

    ac_ag_labels = analysis.accuracy(student_labels,ans_labels)
    print("Accuracy of the aggregated labels: " + str(ac_ag_labels))

    stdnt_test_data = test_data[FLAGS.stdnt_share:]
    stdnt_test_labels = test_labels[FLAGS.stdnt_share:]

    return student_data, student_labels, stdnt_test_data, stdnt_test_labels
Exemple #20
0

def MakeKernelizedPerceptrons(train, test, label):
     """ Create and evaluate kernelized perceptrons

    Arguments:
        train   - pandas.DataFrame containing the training data

        test    - pandas.DataFrame containing the testing data

        label   - Name of the label column in the DataFrame's
    
    Returns:
        (List of KernelizedPerceptron) generated for the data
    """

    true = test[label].values
    kpts = []

    for i in [2,3,5,10,25,50]:

        print "KPT working"
        kp = kpt.KernelizedPerceptron(train, label, 25, i)
        pred = kp.predict(test, label)
        acc = an.accuracy(pred, true)
        kpts.append((i, kp, acc))
        print "KPT " + str(acc)

    p.dump(kpts, open('classifiers/kernelized_perceptrons01.p','wb'))

    return kpts
Exemple #21
0
def run_epoch(model, optimizer, loader, loss_meter, acc_meter, criterion,
              attr_criterion, args, is_training):
    """
    For the rest of the networks (X -> A, cotraining, simple finetune)
    """
    if is_training:
        model.train()
    else:
        model.eval()

    for _, data in enumerate(loader):
        if attr_criterion is None:
            inputs, labels = data
            attr_labels, attr_labels_var = None, None
        else:  # JM: this branch executes
            inputs, labels, attr_labels = data
            if args.n_attributes > 1:
                attr_labels = [i.long() for i in attr_labels]
                attr_labels = torch.stack(attr_labels).t()  # .float() #N x 312
            else:
                if isinstance(attr_labels, list):
                    attr_labels = attr_labels[0]
                attr_labels = attr_labels.unsqueeze(1)
            attr_labels_var = torch.autograd.Variable(attr_labels).float()
            attr_labels_var = attr_labels_var.cuda(
            ) if torch.cuda.is_available() else attr_labels_var

        inputs_var = torch.autograd.Variable(inputs)
        inputs_var = inputs_var.cuda() if torch.cuda.is_available(
        ) else inputs_var
        labels_var = torch.autograd.Variable(labels)
        labels_var = labels_var.cuda() if torch.cuda.is_available(
        ) else labels_var

        if is_training and args.use_aux:  # JM: True and True
            losses = []
            out_start = 0

            if args.use_vae:
                # JM This is where we must update to use the VAE loss
                encoder_outputs, outputs, aux_outputs = model(inputs_var)
                mean, logvar = encoder_outputs
                # print('Mean, logvar shapes: ', mean.shape, logvar.shape)

                # Reparameterise, take single sample
                eps = torch.randn_like(logvar)
                eps = eps.cuda() if torch.cuda.is_available() else eps
                # print(eps.device, logvar.device, mean.device)
                z = eps * torch.exp(logvar * .5) + mean
                decoder_outputs = model.decoder(z)
                # print(decoder_outputs.size())
                batch_size, img_width = decoder_outputs.shape[
                    0], decoder_outputs.shape[2]
                # print(decoder_outputs[0][])
                # print('real', 0.5+2*inputs_var[0])
                # print('decoder output shape', decoder_outputs.shape, inputs_var.shape)
                logpx_z = criterion(
                    decoder_outputs, 0.5 + 2 * inputs_var
                )  # JM: scaling because inputs_var seems to be [-0.25, 0.25]

                KL = -0.5 * torch.sum(1 + logvar - mean.pow(2) - logvar.exp())
                KL /= batch_size * img_width * img_width
                # -.5 * ((sample - mean) ** 2. * torch.exp(-logvar) + logvar + log2pi),
                print(logpx_z, KL)
                loss_vae = logpx_z + KL
                losses.append(loss_vae)
                out_start = 1
                if attr_criterion is not None and args.attr_loss_weight > 0:  # X -> A, cotraining, end2end
                    # print(len(attr_criterion), mean[:,1].shape,type(mean[:,1]), attr_labels_var.shape)
                    for i in range(len(attr_criterion)):
                        loss = args.attr_loss_weight * (attr_criterion[i](
                            mean[:, i].squeeze().type(torch.cuda.FloatTensor),
                            attr_labels_var[:, i]))
                        losses.append(loss)
                    print(losses[1])
                    #     + 0.4 * attr_criterion[i](
                    # aux_outputs[i + out_start].squeeze().type(torch.cuda.FloatTensor), attr_labels_var[:, i])))

            else:  #JM: TODO: re-add the main label loss
                outputs, aux_outputs = model(inputs_var)
                if not args.bottleneck:  # loss main is for the main task label (always the first output)
                    loss_main = 1.0 * criterion(outputs[0],
                                                labels_var) + 0.4 * criterion(
                                                    aux_outputs[0], labels_var)
                    losses.append(loss_main)
                    out_start = 1
            if not args.use_vae and attr_criterion is not None and args.attr_loss_weight > 0:  # X -> A, cotraining, end2end
                for i in range(len(attr_criterion)):
                    losses.append(
                        args.attr_loss_weight *
                        (1.0 * attr_criterion[i]
                         (outputs[i + out_start].squeeze().type(
                             torch.cuda.FloatTensor), attr_labels_var[:, i]) +
                         0.4 * attr_criterion[i]
                         (aux_outputs[i + out_start].squeeze().type(
                             torch.cuda.FloatTensor), attr_labels_var[:, i])))
        else:  # testing or no aux logits
            outputs = model(inputs_var)
            losses = []
            out_start = 0
            if not args.bottleneck:
                loss_main = criterion(outputs[0], labels_var)
                losses.append(loss_main)
                out_start = 1
            if attr_criterion is not None and args.attr_loss_weight > 0:  # X -> A, cotraining, end2end
                for i in range(len(attr_criterion)):
                    losses.append(args.attr_loss_weight * attr_criterion[i](
                        outputs[i + out_start].squeeze().type(
                            torch.cuda.FloatTensor), attr_labels_var[:, i]))

        if args.use_vae:
            encoder_outputs, outputs, aux_outputs = model(inputs_var)
            mean, logvar = encoder_outputs
            eps = torch.randn_like(logvar)
            eps = eps.cuda() if torch.cuda.is_available() else eps
            z = eps * torch.exp(logvar * .5) + mean
            decoder_outputs = model.decoder(z)
            logpx_z = criterion(
                decoder_outputs, 0.5 + 2 * inputs_var
            )  # JM: scaling because inputs_var seems to be [-0.25, 0.25]
            acc = logpx_z
            acc_meter.update(acc, inputs.size(0))
        elif args.bottleneck:  # attribute accuracy
            sigmoid_outputs = torch.nn.Sigmoid()(torch.cat(outputs, dim=1))
            acc = binary_accuracy(sigmoid_outputs, attr_labels)
            acc_meter.update(acc.data.cpu().numpy(), inputs.size(0))
        else:
            acc = accuracy(
                outputs[0], labels,
                topk=(1, ))  # only care about class prediction accuracy
            acc_meter.update(acc[0], inputs.size(0))

        if attr_criterion is not None:  # JM: this is executed
            if args.bottleneck:  # JM: false
                total_loss = sum(losses) / args.n_attributes
            else:  # cotraining, loss by class prediction and loss by attribute prediction have the same weight
                total_loss = losses[0] + sum(losses[1:])
                if args.normalize_loss:
                    total_loss = total_loss / (
                        1 + args.attr_loss_weight * args.n_attributes)
                # print('cotraining, loss', total_loss)

        else:  # finetune
            total_loss = sum(losses)
        loss_meter.update(total_loss.item(), inputs.size(0))
        if is_training:
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
    return loss_meter, acc_meter
Exemple #22
0
def main():
    parser = argparse.ArgumentParser(
        description='Program to build and train a neural network.')
    parser.add_argument('--train_file',
                        default=None,
                        help='Path to the training data.')
    parser.add_argument('--dev_file',
                        default=None,
                        help='Path to the development data.')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        help='The number of epochs to train. (default 10)')
    parser.add_argument(
        '--learn_rate',
        type=float,
        default=1e-1,
        help='The learning rate to use for SGD (default 1e-1).')
    parser.add_argument('--hidden_units',
                        type=int,
                        default=30,
                        help='The number of hidden units to use. (default 0)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=25,
                        help='The batch size to use for SGD. (default 1)')
    args = parser.parse_args()

    if args.hidden_units == 0:
        print("Can't process with 0 hidden units.")
        exit()

    # Load training and development data and convert labels to 1-hot representation.
    xtrain, ytrain = dataproc.load_data(args.train_file)
    ytrain = dataproc.to_one_hot(ytrain, int(1 + np.max(ytrain[0, :])))
    if (args.dev_file is not None):
        xdev, ydev = dataproc.load_data(args.dev_file)
        ydev = dataproc.to_one_hot(ydev, int(1 + np.max(ytrain[0, :])))

    # Record dimensions and size of dataset.
    N = xtrain.shape[1]
    din = xtrain.shape[0]
    dout = ytrain.shape[0]

    batch_size = args.batch_size
    if (batch_size == 0):
        batch_size = N

    # Create an MLP object for training.
    nn = mlp.MLP(din, dout, args.hidden_units)

    # Evaluate MLP after initialization; yhat is matrix of dim (Dout x N).
    yhat = nn.eval(xtrain)

    best_train = (analysis.mse(ytrain, yhat), analysis.mce(ytrain, yhat),
                  analysis.accuracy(ytrain, yhat) * 100)
    print('Initial conditions~~~~~~~~~~~~~')
    print('mse(train):  %f' % (best_train[0]))
    print('mce(train):  %f' % (best_train[1]))
    print('acc(train):  %f' % (best_train[2]))
    print('')

    if (args.dev_file is not None):
        best_dev = (analysis.mse(ydev, yhat), analysis.mce(ydev, yhat),
                    analysis.accuracy(ydev, yhat) * 100)
        print('mse(dev):  %f' % (best_dev[0]))
        print('mce(dev):  %f' % (best_dev[1]))
        print('acc(dev):  %f' % (best_dev[2]))

    dev_acc = 0

    for epoch in range(args.epochs):
        if dev_acc >= 90:
            exit()
        for batch in range(int(N / batch_size)):
            ids = random.choices(list(range(N)), k=batch_size)
            xbatch = np.array([xtrain[:, n] for n in ids]).transpose()
            ybatch = np.array([ytrain[:, n] for n in ids]).transpose()
            nn.sgd_step(xbatch, ybatch, args.learn_rate)

        yhat = nn.eval(xtrain)
        train_ss = analysis.mse(ytrain, yhat)
        train_ce = analysis.mce(ytrain, yhat)
        train_acc = analysis.accuracy(ytrain, yhat) * 100
        best_train = (min(best_train[0],
                          train_ss), min(best_train[1], train_ce),
                      max(best_train[2], train_acc))

        if epoch % 500 == 0:
            print('After %d epochs ~~~~~~~~~~~~~' % (epoch + 1))
            print('mse(train):  %f  (best= %f)' % (train_ss, best_train[0]))
            print('mce(train):  %f  (best= %f)' % (train_ce, best_train[1]))
            print('acc(train):  %f  (best= %f)' % (train_acc, best_train[2]))

        if (args.dev_file is not None):
            yhat = nn.eval(xdev)
            dev_ss = analysis.mse(ydev, yhat)
            dev_ce = analysis.mce(ydev, yhat)
            dev_acc = analysis.accuracy(ydev, yhat) * 100
            best_dev = (min(best_dev[0],
                            dev_ss), min(best_dev[1],
                                         dev_ce), max(best_dev[2], dev_acc))
            if epoch % 500 == 0:
                print('mse(dev):  %f  (best= %f)' % (dev_ss, best_dev[0]))
                print('mce(dev):  %f  (best= %f)' % (dev_ce, best_dev[1]))
                print('acc(dev):  %f  (best= %f)' % (dev_acc, best_dev[2]))

        nn.save('modelFile')
Exemple #23
0
def run_epoch(model, optimizer, loader, loss_meter, acc_meter, criterion,
              attr_criterion, args, is_training):
    """
    For the rest of the networks (X -> A, cotraining, simple finetune)
    """
    if is_training:
        model.train()
    else:
        model.eval()

    for _, data in enumerate(loader):
        if attr_criterion is None:
            inputs, labels = data
            attr_labels, attr_labels_var = None, None
        else:
            inputs, labels, attr_labels = data
            if args.n_attributes > 1:
                attr_labels = [i.long() for i in attr_labels]
                attr_labels = torch.stack(attr_labels).t()  #.float() #N x 312
            else:
                if isinstance(attr_labels, list):
                    attr_labels = attr_labels[0]
                attr_labels = attr_labels.unsqueeze(1)
            attr_labels_var = torch.autograd.Variable(attr_labels).float()
            attr_labels_var = attr_labels_var.cuda(
            ) if torch.cuda.is_available() else attr_labels_var

        inputs_var = torch.autograd.Variable(inputs)
        inputs_var = inputs_var.cuda() if torch.cuda.is_available(
        ) else inputs_var
        labels_var = torch.autograd.Variable(labels)
        labels_var = labels_var.cuda() if torch.cuda.is_available(
        ) else labels_var

        if is_training and args.use_aux:
            outputs, aux_outputs = model(inputs_var)
            losses = []
            out_start = 0
            if not args.bottleneck:  #loss main is for the main task label (always the first output)
                loss_main = 1.0 * criterion(outputs[0],
                                            labels_var) + 0.4 * criterion(
                                                aux_outputs[0], labels_var)
                losses.append(loss_main)
                out_start = 1
            if attr_criterion is not None and args.attr_loss_weight > 0:  #X -> A, cotraining, end2end
                for i in range(len(attr_criterion)):
                    losses.append(args.attr_loss_weight * (1.0 * attr_criterion[i](outputs[i+out_start].squeeze().type(torch.cuda.FloatTensor), attr_labels_var[:, i]) \
                                                            + 0.4 * attr_criterion[i](aux_outputs[i+out_start].squeeze().type(torch.cuda.FloatTensor), attr_labels_var[:, i])))
        else:  #testing or no aux logits
            outputs = model(inputs_var)
            losses = []
            out_start = 0
            if not args.bottleneck:
                loss_main = criterion(outputs[0], labels_var)
                losses.append(loss_main)
                out_start = 1
            if attr_criterion is not None and args.attr_loss_weight > 0:  #X -> A, cotraining, end2end
                for i in range(len(attr_criterion)):
                    losses.append(args.attr_loss_weight * attr_criterion[i](
                        outputs[i + out_start].squeeze().type(
                            torch.cuda.FloatTensor), attr_labels_var[:, i]))

        if args.bottleneck:  #attribute accuracy
            sigmoid_outputs = torch.nn.Sigmoid()(torch.cat(outputs, dim=1))
            acc = binary_accuracy(sigmoid_outputs, attr_labels)
            acc_meter.update(acc.data.cpu().numpy(), inputs.size(0))
        else:
            acc = accuracy(
                outputs[0], labels,
                topk=(1, ))  #only care about class prediction accuracy
            acc_meter.update(acc[0], inputs.size(0))

        if attr_criterion is not None:
            if args.bottleneck:
                total_loss = sum(losses) / args.n_attributes
            else:  #cotraining, loss by class prediction and loss by attribute prediction have the same weight
                total_loss = losses[0] + sum(losses[1:])
                if args.normalize_loss:
                    total_loss = total_loss / (
                        1 + args.attr_loss_weight * args.n_attributes)
        else:  #finetune
            total_loss = sum(losses)
        loss_meter.update(total_loss.item(), inputs.size(0))
        if is_training:
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
    return loss_meter, acc_meter
def eval(args):
    """
    Run inference using model (and model2 if bottleneck)
    Returns: (for notebook analysis)
    all_class_labels: flattened list of class labels for each image
    topk_class_outputs: array of top k class ids predicted for each image. Shape = size of test set * max(K)
    all_class_outputs: array of all logit outputs for class prediction, shape = N_TEST * N_CLASS
    all_attr_labels: flattened list of labels for each attribute for each image (length = N_ATTRIBUTES * N_TEST)
    all_attr_outputs: flatted list of attribute logits (after ReLU/ Sigmoid respectively) predicted for each attribute for each image (length = N_ATTRIBUTES * N_TEST)
    all_attr_outputs_sigmoid: flatted list of attribute logits predicted (after Sigmoid) for each attribute for each image (length = N_ATTRIBUTES * N_TEST)
    wrong_idx: image ids where the model got the wrong class prediction (to compare with other models)
    """
    if args.model_dir:
        model = torch.load(args.model_dir)
    else:
        model = None

    if not hasattr(model, 'use_relu'):
        if args.use_relu:
            model.use_relu = True
        else:
            model.use_relu = False
    if not hasattr(model, 'use_sigmoid'):
        if args.use_sigmoid:
            model.use_sigmoid = True
        else:
            model.use_sigmoid = False
    if not hasattr(model, 'cy_fc'):
        model.cy_fc = None
    model.eval()

    if args.model_dir2:
        if 'rf' in args.model_dir2:
            model2 = joblib.load(args.model_dir2)
        else:
            model2 = torch.load(args.model_dir2)
        if not hasattr(model2, 'use_relu'):
            if args.use_relu:
                model2.use_relu = True
            else:
                model2.use_relu = False
        if not hasattr(model2, 'use_sigmoid'):
            if args.use_sigmoid:
                model2.use_sigmoid = True
            else:
                model2.use_sigmoid = False
        model2.eval()
    else:
        model2 = None

    if args.use_attr:
        attr_acc_meter = [AverageMeter()]
        if args.feature_group_results:  # compute acc for each feature individually in addition to the overall accuracy
            for _ in range(args.n_attributes):
                attr_acc_meter.append(AverageMeter())
    else:
        attr_acc_meter = None

    class_acc_meter = []
    for j in range(len(K)):
        class_acc_meter.append(AverageMeter())

    data_dir = os.path.join(BASE_DIR, args.data_dir, args.eval_data + '.pkl')
    loader = load_data([data_dir],
                       args.use_attr,
                       args.no_img,
                       args.batch_size,
                       image_dir=args.image_dir,
                       n_class_attr=args.n_class_attr)
    all_outputs, all_targets = [], []
    all_attr_labels, all_attr_outputs, all_attr_outputs_sigmoid, all_attr_outputs2 = [], [], [], []
    all_class_labels, all_class_outputs, all_class_logits = [], [], []
    topk_class_labels, topk_class_outputs = [], []

    for data_idx, data in enumerate(loader):
        if args.use_attr:
            if args.no_img:  # A -> Y
                inputs, labels = data
                if isinstance(inputs, list):
                    inputs = torch.stack(inputs).t().float()
                inputs = inputs.float()
                # inputs = torch.flatten(inputs, start_dim=1).float()
            else:
                inputs, labels, attr_labels = data
                attr_labels = torch.stack(attr_labels).t()  # N x 312
        else:  # simple finetune
            inputs, labels = data

        inputs_var = torch.autograd.Variable(inputs).cuda()
        labels_var = torch.autograd.Variable(labels).cuda()

        if args.attribute_group:
            outputs = []
            f = open(args.attribute_group, 'r')
            for line in f:
                attr_model = torch.load(line.strip())
                outputs.extend(attr_model(inputs_var))
        else:
            outputs = model(inputs_var)
        if args.use_attr:
            if args.no_img:  # A -> Y
                class_outputs = outputs
            else:
                if args.bottleneck:
                    if args.use_relu:
                        attr_outputs = [torch.nn.ReLU()(o) for o in outputs]
                        attr_outputs_sigmoid = [
                            torch.nn.Sigmoid()(o) for o in outputs
                        ]
                    elif args.use_sigmoid:
                        attr_outputs = [torch.nn.Sigmoid()(o) for o in outputs]
                        attr_outputs_sigmoid = attr_outputs
                    else:
                        attr_outputs = outputs
                        attr_outputs_sigmoid = [
                            torch.nn.Sigmoid()(o) for o in outputs
                        ]
                    if model2:
                        stage2_inputs = torch.cat(attr_outputs, dim=1)
                        class_outputs = model2(stage2_inputs)
                    else:  # for debugging bottleneck performance without running stage 2
                        class_outputs = torch.zeros(
                            [inputs.size(0), N_CLASSES],
                            dtype=torch.float64).cuda()  # ignore this
                else:  # cotraining, end2end
                    if args.use_relu:
                        attr_outputs = [
                            torch.nn.ReLU()(o) for o in outputs[1:]
                        ]
                        attr_outputs_sigmoid = [
                            torch.nn.Sigmoid()(o) for o in outputs[1:]
                        ]
                    elif args.use_sigmoid:
                        attr_outputs = [
                            torch.nn.Sigmoid()(o) for o in outputs[1:]
                        ]
                        attr_outputs_sigmoid = attr_outputs
                    else:
                        attr_outputs = outputs[1:]
                        attr_outputs_sigmoid = [
                            torch.nn.Sigmoid()(o) for o in outputs[1:]
                        ]

                    class_outputs = outputs[0]

                for i in range(args.n_attributes):
                    acc = binary_accuracy(attr_outputs_sigmoid[i].squeeze(),
                                          attr_labels[:, i])
                    acc = acc.data.cpu().numpy()
                    # acc = accuracy(attr_outputs_sigmoid[i], attr_labels[:, i], topk=(1,))
                    attr_acc_meter[0].update(acc, inputs.size(0))
                    if args.feature_group_results:  # keep track of accuracy of individual attributes
                        attr_acc_meter[i + 1].update(acc, inputs.size(0))

                attr_outputs = torch.cat(
                    [o.unsqueeze(1) for o in attr_outputs], dim=1)
                attr_outputs_sigmoid = torch.cat(
                    [o for o in attr_outputs_sigmoid], dim=1)
                all_attr_outputs.extend(
                    list(attr_outputs.flatten().data.cpu().numpy()))
                all_attr_outputs_sigmoid.extend(
                    list(attr_outputs_sigmoid.flatten().data.cpu().numpy()))
                all_attr_labels.extend(
                    list(attr_labels.flatten().data.cpu().numpy()))
        else:
            class_outputs = outputs[0]

        _, topk_preds = class_outputs.topk(max(K), 1, True, True)
        _, preds = class_outputs.topk(1, 1, True, True)
        all_class_outputs.extend(list(preds.detach().cpu().numpy().flatten()))
        all_class_labels.extend(list(labels.data.cpu().numpy()))
        all_class_logits.extend(class_outputs.detach().cpu().numpy())
        topk_class_outputs.extend(topk_preds.detach().cpu().numpy())
        topk_class_labels.extend(labels.view(-1, 1).expand_as(preds))

        np.set_printoptions(threshold=sys.maxsize)
        class_acc = accuracy(class_outputs, labels,
                             topk=K)  # only class prediction accuracy
        for m in range(len(class_acc_meter)):
            class_acc_meter[m].update(class_acc[m], inputs.size(0))

    all_class_logits = np.vstack(all_class_logits)
    topk_class_outputs = np.vstack(topk_class_outputs)
    topk_class_labels = np.vstack(topk_class_labels)
    wrong_idx = np.where(
        np.sum(topk_class_outputs == topk_class_labels, axis=1) == 0)[0]

    for j in range(len(K)):
        print('Average top %d class accuracy: %.5f' %
              (K[j], class_acc_meter[j].avg))

    if args.use_attr and not args.no_img:  # print some metrics for attribute prediction performance
        print('Average attribute accuracy: %.5f' % attr_acc_meter[0].avg)
        all_attr_outputs_int = np.array(all_attr_outputs_sigmoid) >= 0.5
        if args.feature_group_results:
            n = len(all_attr_labels)
            all_attr_acc, all_attr_f1 = [], []
            for i in range(args.n_attributes):
                acc_meter = attr_acc_meter[1 + i]
                attr_acc = float(acc_meter.avg)
                attr_preds = [
                    all_attr_outputs_int[j] for j in range(n)
                    if j % args.n_attributes == i
                ]
                attr_labels = [
                    all_attr_labels[j] for j in range(n)
                    if j % args.n_attributes == i
                ]
                attr_f1 = f1_score(attr_labels, attr_preds)
                all_attr_acc.append(attr_acc)
                all_attr_f1.append(attr_f1)
            '''
            fig, axs = plt.subplots(1, 2, figsize=(20,10))
            for plt_id, values in enumerate([all_attr_acc, all_attr_f1]):
                axs[plt_id].set_xticks(np.arange(0, 1.1, 0.1))
                if plt_id == 0:
                    axs[plt_id].hist(np.array(values)/100.0, bins=np.arange(0, 1.1, 0.1), rwidth=0.8)
                    axs[plt_id].set_title("Attribute accuracies distribution")
                else:
                    axs[plt_id].hist(values, bins=np.arange(0, 1.1, 0.1), rwidth=0.8)
                    axs[plt_id].set_title("Attribute F1 scores distribution")
            plt.savefig('/'.join(args.model_dir.split('/')[:-1]) + '.png')
            '''
            bins = np.arange(0, 1.01, 0.1)
            acc_bin_ids = np.digitize(np.array(all_attr_acc) / 100.0, bins)
            acc_counts_per_bin = [
                np.sum(acc_bin_ids == (i + 1)) for i in range(len(bins))
            ]
            f1_bin_ids = np.digitize(np.array(all_attr_f1), bins)
            f1_counts_per_bin = [
                np.sum(f1_bin_ids == (i + 1)) for i in range(len(bins))
            ]
            print("Accuracy bins:")
            print(acc_counts_per_bin)
            print("F1 bins:")
            print(f1_counts_per_bin)
            np.savetxt(os.path.join(args.log_dir, 'concepts.txt'),
                       f1_counts_per_bin)

        balanced_acc, report = multiclass_metric(all_attr_outputs_int,
                                                 all_attr_labels)
        f1 = f1_score(all_attr_labels, all_attr_outputs_int)
        print(
            "Total 1's predicted:",
            sum(np.array(all_attr_outputs_sigmoid) >= 0.5) /
            len(all_attr_outputs_sigmoid))
        print('Avg attribute balanced acc: %.5f' % (balanced_acc))
        print("Avg attribute F1 score: %.5f" % f1)
        print(report + '\n')
    return class_acc_meter, attr_acc_meter, all_class_labels, topk_class_outputs, all_class_logits, all_attr_labels, all_attr_outputs, all_attr_outputs_sigmoid, wrong_idx, all_attr_outputs2