Ejemplo n.º 1
0
def run(filename):
    """실험할 세팅을 불러오고, 그에 따라서 실험을 수행한다."""
    info = TrainInformation(filename)
    np.random.seed(info.SEED)
    torch.manual_seed(info.SEED)
    fold = info.FOLD

    test_AUCs_by_split = []
    for split in range(fold):

        #if split % 3 > 0:
        #    print("Skipping split %d" % split)
        #    continue
        if False:
            train_logisticregressoin(info, split, fold)
            continue
        result = train(info, split, fold)
        test_AUCs = [float(auc) for auc in result.test_AUC_list]
        test_AUCs_by_split.append(test_AUCs)

    with open("result.txt", "a") as f:
        test_AUCs_by_split = np.array(test_AUCs_by_split)
        test_AUCs_by_epoch = test_AUCs_by_split.mean(axis=0)
        best_test_epoch = np.argmax(test_AUCs_by_epoch)
        best_test_AUC = test_AUCs_by_epoch[best_test_epoch]
        #f.write(str(info) + "/n")
        f.write("Name: %s\n" % info.NAME)
        f.write("average test AUC: %f %d\n" % (best_test_AUC, best_test_epoch))
Ejemplo n.º 2
0
def train(info: TrainInformation, split, fold):
    """주어진 split에 대한 학습과 테스트를 진행한다."""
    bs = info.BS
    init_lr = info.INIT_LR
    lr_decay = info.LR_DECAY
    momentum = info.MOMENTUM
    weight_decay = info.WEIGHT_DECAY
    optimizer_method = info.OPTIMIZER_METHOD
    epoch = info.EPOCH
    nchs = info.NCHS
    filename = info.FILENAME
    model_name = info.MODEL_NAME
    exp_name = info.NAME

    print("Using File {}".format(filename))

    train_dataset = Dataset(split=split, fold=fold, phase="train", filename=filename, use_data_dropout=info.USE_DATA_DROPOUT)
    #val_dataset = Dataset(split=split, fold=fold, phase="val", filename=filename)
    test_dataset = Dataset(split=split, fold=fold, phase="test", filename=filename, use_data_dropout=False)

    model = get_classifier_model(model_name, train_dataset.feature_size, nchs, info.ACTIVATION)
    

    print(model)

    # Optimizer 설정
    optimizer = set_optimizer(
        optimizer_method, model, init_lr, weight_decay, momentum=momentum
    )

    data_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=bs, shuffle=True, num_workers=0, drop_last=True
    )

    bce_loss = torch.nn.BCEWithLogitsLoss().cuda()
    train_result = TrainResult()
    train_result.set_sizes(
        len(train_dataset.data), 0, len(test_dataset.data)
    )

    for ep in range(epoch):
        global prev_plot
        prev_plot = 0
        train_step(
            exp_name,
            ep,
            model,
            train_dataset,
            test_dataset,
            optimizer,
            init_lr,
            lr_decay,
            data_loader,
            bce_loss,
            train_result,
        )

    savedir = "/content/drive/My Drive/research/frontiers/checkpoints/%s" % exp_name
    best_test_epoch = train_result.best_test_epoch #25
    savepath = "%s/epoch_%04d_fold_%02d.pt" % (savedir, best_test_epoch, train_dataset.split)
    #model.load_state_dict(torch.load(savepath))
    model = torch.load(savepath)
    model.eval()

    test_preds = train_utils.get_preds(test_dataset.data[:, 1:], model)
    test_AUC = train_utils.compute_AUC(test_dataset.data[:, :1], test_preds)
    test_PRAUC = train_utils.compute_PRAUC(test_dataset.data[:, :1], test_preds)

    train_utils.plot_AUC(test_dataset, test_preds, test_AUC, savepath=savepath.replace(".pt", "_AUC.tiff"))

    contributing_variables = compute_contributing_variables(model, test_dataset)
    with open(os.path.join(savedir, "contributing_variables_epoch_%04d_fold_%02d.txt" % (best_test_epoch, train_dataset.split)), "w") as f:
        for (v, auc) in contributing_variables:
            f.write("%s %f\n" % (v, auc))

    
    info.split_index = split
    info.result_dict = train_result
    info.save_result()
    return train_result