Beispiel #1
0
def train_logisticregressoin(info: TrainInformation, split, fold):
    """주어진 split에 대한 학습과 테스트를 진행한다."""
    bs = info.BS
    init_lr = info.INIT_LR
    lr_decay = info.LR_DECAY
    momentum = info.MOMENTUM
    weight_decay = info.WEIGHT_DECAY
    optimizer_method = info.OPTIMIZER_METHOD
    epoch = info.EPOCH
    nchs = info.NCHS
    filename = info.FILENAME
    model_name = info.MODEL_NAME
    exp_name = info.NAME

    print("Using File {}".format(filename))

    train_dataset = Dataset(split=split,
                            fold=fold,
                            phase="train",
                            filename=filename,
                            use_data_dropout=info.USE_DATA_DROPOUT)
    #val_dataset = Dataset(split=split, fold=fold, phase="val", filename=filename)
    test_dataset = Dataset(split=split,
                           fold=fold,
                           phase="test",
                           filename=filename,
                           use_data_dropout=False)

    import sklearn.linear_model

    regressor = sklearn.linear_model.LogisticRegression()
    regressor.fit(train_dataset.train_data[:, 1:],
                  test_dataset.train_data[:, :1])
    preds = regressor.predict_proba(test_dataset.data[:, 1:])[:, 1]
    auc = train_utils.compute_AUC(test_dataset.data[:, :1], preds)
    print(auc)
    savepath = "/content/drive/My Drive/research/frontiers/checkpoints/logistic_regression/split_%02d.png" % split
    os.makedirs(os.path.dirname(savepath), exist_ok=True)
    #train_utils.plot_AUC_v2(preds, test_dataset.data[:, :1], savepath=savepath)

    model = get_classifier_model(model_name, train_dataset.feature_size, nchs,
                                 info.ACTIVATION)
    savedir = "/content/drive/My Drive/research/frontiers/checkpoints/%s" % exp_name
    best_test_epoch = 25
    loadpath = "%s/epoch_%04d_fold_%02d.pt" % (savedir, best_test_epoch,
                                               train_dataset.split)
    #model.load_state_dict(torch.load(savepath))
    model = torch.load(loadpath)
    model.eval()

    test_preds = train_utils.get_preds(test_dataset.data[:, 1:], model)
    train_utils.plot_AUC_v2([('Deep Neural Network', test_preds),
                             ('Logistic Regression', preds)],
                            test_dataset.data[:, :1],
                            savepath=savepath)
Beispiel #2
0
def train_ml_compare(info: TrainInformation, split, fold):
    """주어진 split에 대한 학습과 테스트를 진행한다."""
    bs = info.BS
    init_lr = info.INIT_LR
    lr_decay = info.LR_DECAY
    momentum = info.MOMENTUM
    weight_decay = info.WEIGHT_DECAY
    optimizer_method = info.OPTIMIZER_METHOD
    epoch = info.EPOCH
    nchs = info.NCHS
    filename = info.FILENAME
    model_name = info.MODEL_NAME
    exp_name = info.NAME

    print("Using File {}".format(filename))

    train_dataset = Dataset(split=split, fold=fold, phase="train", filename=filename, use_data_dropout=info.USE_DATA_DROPOUT)
    #val_dataset = Dataset(split=split, fold=fold, phase="val", filename=filename)
    test_dataset = Dataset(split=split, fold=fold, phase="test", filename=filename, use_data_dropout=False)

    train_input = train_dataset.train_data[:, 1:]
    train_label = test_dataset.train_data[:, :1]

    # logisticregressoin ######################

    import sklearn.linear_model

    regressor = sklearn.linear_model.LogisticRegression()
    regressor.fit(train_input, train_label)
    preds_regressor = regressor.predict_proba(test_dataset.data[:, 1:])[:, 1]
    auc_regressor = train_utils.compute_AUC(test_dataset.data[:, :1], preds_regressor)
    TP, TN, FN, FP = confusion_matrix(test_dataset.data[:, :1], regressor.predict(test_dataset.data[:, 1:])).ravel()
    print(f'auc_regressor is {auc_regressor}')
    print("logistic regression TP, TN, FN, FP : {}, {}, {}, {}".format( TP, TN, FN, FP))
    ###########################################


    # randomforest ############################

    from sklearn.ensemble import RandomForestClassifier
    from sklearn.datasets import make_classification

    forest = RandomForestClassifier()
    forest.fit(train_input, train_label)
    preds_forest = forest.predict_proba(test_dataset.data[:, 1:])[:, 1]
    auc_forest = train_utils.compute_AUC(test_dataset.data[:, :1], preds_forest)
    TP, TN, FN, FP = confusion_matrix(test_dataset.data[:, :1], forest.predict(test_dataset.data[:, 1:])).ravel()
    print(f'auc_forest is {auc_forest}')
    print("random forest TP, TN, FN, FP : {}, {}, {}, {}".format(TP, TN, FN, FP))

    ###########################################


    # svc #####################################

    from sklearn.svm import LinearSVC

    svc = LinearSVC()
    svc.fit(train_input, train_label)
    Y = svc.decision_function(test_dataset.data[:, 1:])
    preds_svc = (Y - Y.min()) / (Y.max() - Y.min())
    TP, TN, FN, FP = confusion_matrix(test_dataset.data[:, :1], svc.predict(test_dataset.data[:, 1:])).ravel()
    auc_svc = train_utils.compute_AUC(test_dataset.data[:, :1], preds_svc)
    print(f'auc_svc is {auc_svc}')
    print("svc TP, TN, FN, FP : {}, {}, {}, {}".format(TP, TN, FN, FP))

    ###########################################

    # kneighbors ############################

    from sklearn.neighbors import KNeighborsClassifier

    kneighbors = KNeighborsClassifier()
    kneighbors.fit(train_input, train_label)
    preds_kneighbors = kneighbors.predict_proba(test_dataset.data[:, 1:])[:, 1]
    auc_kneighbors = train_utils.compute_AUC(test_dataset.data[:, :1], preds_kneighbors)
    TP, TN, FN, FP = confusion_matrix(test_dataset.data[:, :1], kneighbors.predict(test_dataset.data[:, 1:])).ravel()
    print(f'auc_kneighbors is {auc_kneighbors}')
    print("kneighbors TP, TN, FN, FP : {}, {}, {}, {}".format(TP, TN, FN, FP))

    ###########################################

    savepath = "/content/drive/My Drive/research/frontiers/checkpoints/ml_compare/split_%02d.tiff" % split
    os.makedirs(os.path.dirname(savepath), exist_ok=True)

    model = get_classifier_model(model_name, train_dataset.feature_size, nchs, info.ACTIVATION)
    savedir = "/content/drive/My Drive/research/frontiers/checkpoints/%s" % exp_name
    best_test_epoch = 25 # train_result.best_test_epoch
    loadpath = "%s/epoch_%04d_fold_%02d.pt" % (savedir, best_test_epoch, train_dataset.split)
    #model.load_state_dict(torch.load(savepath))
    model = torch.load(loadpath)
    model.eval()

    test_preds = train_utils.get_preds(test_dataset.data[:, 1:], model)
    train_utils.plot_AUC_v2([('Deep learning (AUC 0.870)', test_preds), ('Logistic regression (AUC 0.858)', preds_regressor), ('Linear SVM (AUC 0.849)', preds_svc), ('Random forest classifier (AUC 0.810)', preds_forest), ('K-nearest neighbors (AUC 0.740)', preds_kneighbors)], test_dataset.data[:, :1], savepath=savepath)