예제 #1
0
def run_experiment(build_model, args):
    x_train, y_train, x_valid, y_valid, x_test, y_test = get_data(args)

    output = open("../results/results.txt", "a")

    'First learning rate'
    base_model = build_model()

    start = time.time()
    output.write("Start Training %s - %s \n" % (base_model.model_name, start))
    print('Train first learning rate')
    lr = learning_rates[0]
    weight_name = '../results/best_weights_%s_%s.hdf5' % (base_model.model_name, lr)
    model = base_model.train(x_train, y_train, x_valid, y_valid, epoch_size=100, lr=lr, weight_name=weight_name)

    print("Testing")
    model.load_weights(weight_name)
    x_pred = evaluator.predict(base_model, model, x_test)

    'Save predictions'
    np.save("../results/predictions_%s_%s_%s.npy" % (base_model.model_name, args.d, lr), x_pred)

    test_result = evaluator.mean_roc_auc(x_pred, y_test)
    print("Mean ROC-AUC: %s" % test_result)
    output.write("%s -  Mean ROC-AUC: %s, %s \n" % (lr, test_result, time.time()))

    'For each learning rate'
    for lr_index in range(1, len(learning_rates)):
        lr = learning_rates[lr_index]

        base_model = build_model()

        print('Train %s' % lr)
        weight_name = '../results/best_weights_%s_%s.hdf5' % (base_model.model_name, lr)
        model = base_model.retrain(x_train, y_train, x_valid, y_valid, epoch_size=100, lr=lr,
                                   lr_prev=learning_rates[lr_index - 1], weight_name=weight_name)

        print("Testing")
        model.load_weights(weight_name)
        x_pred = evaluator.predict(base_model, model, x_test)

        'Save predictions'
        np.save("../results/predictions_%s_%s_%s.npy" % (base_model.model_name, args.d, lr), x_pred)

        test_result = evaluator.mean_roc_auc(x_pred, y_test)
        print("Mean ROC-AUC: %s" % test_result)
        output.write("%s -  Mean ROC-AUC: %s, %s \n" % (lr, test_result, time.time()))

    end = time.time()
    output.write("End Training %s - %s" % (base_model.model_name, end))

    output.close()
예제 #2
0
def main(config):
    logger = load_logger(config)
    try:
        np.random.seed(config.random_seed)  # 可复现
        data_original = Dataset(config)
        Net = Net_LSTM

        if config.model == 1:
            Net = Net_LSTM
        elif config.model == 2:
            Net = Net_BidirectionalLSTM

        if config.phase == "train":
            print("The soothsayer will train")
            train_X, valid_X, train_Y, valid_Y = data_original.get_train_and_valid_data(
            )
            train(Net, config, logger, [train_X, train_Y, valid_X, valid_Y])

        if config.phase == "predict":
            print("The soothsayer will predict")
            test_X, test_Y = data_original.get_test_data(
                return_label_data=True)

            pred_result = predict(Net, config, test_X)  # 这里输出的是未还原的归一化预测数据
            draw(config, data_original, logger, pred_result)
    except Exception:
        logger.error("Run Error", exc_info=True)
예제 #3
0
def main(config):
    logger = load_logger(config)
    try:
        np.random.seed(config.random_seed)  # 可复现
        data_original = Dataset(config)
        Net = LinearRegression

        if config.phase == "train":
            print("The soothsayer will train")
            train_X, valid_X, train_Y, valid_Y = data_original.get_train_and_valid_data(
            )
            train(Net, config, logger, [train_X, train_Y, valid_X, valid_Y])

        if config.phase == "predict":
            print("The soothsayer will predict")
            test_X, test_Y = data_original.get_test_data(
                return_label_data=True)

            pred_result = predict(Net, config, test_X)  # 这里输出的是未还原的归一化预测数据
            print("pred_result:", pred_result)

            pred_result = pred_result * 89.067 + 283.0  #还原
            #result = result * std + mean

            print("pred_result restore data:", pred_result)

    except Exception:
        logger.error("Run Error", exc_info=True)
예제 #4
0
batch_size = 25

parser = argparse.ArgumentParser()
parser.add_argument("-d", "-data", help="gtzan, mtat or msd")
parser.add_argument("-logging", help="Logs to csv file")
parser.add_argument("-gpu", type=list, help="Run on gpu's, and which")
parser.add_argument("-local", help="Whether to run local or on server")

args = parser.parse_args()

base_path = "../data/mtat/"
x_test = [song.rstrip() for song in open(base_path + "test_path.txt")]
y_test = np.load(base_path + "y_test_pub.npy")

base_model = SampleCNN39(640512,
                         dim=(3 * 3**9, ),
                         n_channels=1,
                         batch_size=batch_size,
                         weight_name='../results/best_weights_%s_%s.hdf5',
                         args=args)

model = multi_gpu_model(base_model.model, gpus=2)

print("Testing")
x_pred = evaluator.predict(base_model, model, x_test, None)

'Save predictions'
np.save("../results/predictions_baseline.npy", x_pred)

test_result = evaluator.mean_roc_auc(x_pred, y_test)
print("Mean ROC-AUC: %s" % test_result)
예제 #5
0
                                        optimizer, batch_size)
                epoch_loss += batch_loss

                if batch % 100 == 0:
                    print("Epoch {} Batch {} Loss {:.4f}".format(
                        epoch + 1, batch, batch_loss.numpy()))

            # Saving (checkpoint) every epochs
            checkpoint.save(file_prefix=checkpoint_prefix)

            print("Epoch {} Loss {:.4f}".format(epoch + 1,
                                                epoch_loss / steps_per_epoch))
            print("Time taken for 1 epoch {} sec\n".format(time.time() -
                                                           start))

    # Restoring the latest checkpoint in checkpoint_dir
    checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
    print("CHECKPOINT: ", tf.train.latest_checkpoint(checkpoint_dir))

    # Get length of tensors
    max_length_inp = max(len(t) for t in input_tensor)
    max_length_targ = max(len(t) for t in target_tensor)

    # Evaluate the trained model with user inputs
    while True:
        inp = input()
        predict(inp, encoder, decoder, input_tokenizer, target_tokenizer,
                max_length_inp, max_length_targ)
        if inp == "":
            break
예제 #6
0
def main():
    '''
    Main logic for program. Trains the model and evaluates discovery and prediction performance.
    '''
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Training model on: {device}")
    writer = SummaryWriter(comment=f"_{Config.data_type}_selector_training")

    x_train, y_train, g_train = generate_dataset(n=Config.train_no,
                                                 dim=Config.dim,
                                                 data_type=Config.data_type,
                                                 seed=0)
    x_test, y_test, g_test = generate_dataset(n=Config.test_no,
                                              dim=Config.dim,
                                              data_type=Config.data_type,
                                              seed=0)

    # random sampler with replacement for training
    train_dataset = torch.utils.data.TensorDataset(torch.Tensor(x_train),
                                                   torch.Tensor(y_train))
    rand_sampler = torch.utils.data.RandomSampler(
        train_dataset, num_samples=Config.batch_size, replacement=True)
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset, batch_size=Config.batch_size, sampler=rand_sampler)

    model_params = {
        'input_dim': x_train.shape[1],
        'selector_hdim': Config.selector_hdim,
        'predictor_hdim': Config.predictor_hdim,
        'output_dim': y_train.shape[1],
        'temp_anneal': Config.temp_anneal
    }
    net = invase_plus(model_params).to(device)

    # weight loss function by class support
    class_weight = [
        1 / (np.sum(y_train[:, 0] / y_train.shape[0])),
        1 / (np.sum(y_train[:, 1] / y_train.shape[0]))
    ]
    loss_func = ce_loss_with_reg(Config.l2, Config.weight_decay, device,
                                 class_weight)
    optimiser = torch.optim.Adam(net.parameters(), lr=Config.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimiser,
                                                step_size=500,
                                                gamma=0.5)

    if Config.save_model:
        save_model_dir = f"trained_models/{Config.data_type}_invase_net_plus.pt"

    best_loss = float("inf")
    early_stopping_iters = 0
    print("Initialising training")
    for iteration in range(Config.iterations):
        loss, acc = train_one_iter(net,
                                   train_dataloader,
                                   loss_func,
                                   optimiser,
                                   device,
                                   iteration,
                                   log_interval=100)
        scheduler.step()
        writer.add_scalar("Loss/train", loss, iteration)
        writer.add_scalar("Acc/train", acc, iteration)
        if loss < best_loss:
            best_loss, best_acc = loss, acc
            print(
                f"Loss/accuracy improved: {best_loss:.4f}/{acc:.2f}%, saving model..."
            )
            if Config.save_model:
                torch.save(net.state_dict(), save_model_dir)
            early_stopping_iters = 0
        else:
            early_stopping_iters += 1

        if early_stopping_iters == Config.patience:
            print(f"Early stopping after iteration {iteration}")
            break

    print("Training complete\n")

    # evaluate performance of feature importance
    net.load_state_dict(
        torch.load(save_model_dir, map_location=torch.device(device)))
    g_hat = feature_importance_score(net, x_test, device)
    importance_score = 1. * (g_hat > 0.5)
    mean_tpr, std_tpr, mean_fdr, std_fdr = feature_performance_metric(
        g_test, importance_score)
    print("Feature importance evaluation: ")
    print(
        f"TPR mean: {np.round(mean_tpr,1)}%, TPR std: {np.round(std_tpr,1)}%")
    print(
        f"FDR mean: {np.round(mean_fdr,1)}%, FDR std: {np.round(std_fdr,1)}%")

    # evaluate performance in prediction
    y_hat = predict(net, x_test, device)
    auc, apr, acc = prediction_performance_metric(y_test, y_hat)
    print("Prediction evaluation: ")
    print(
        f"AUC: {np.round(auc, 3)}%, APR: {np.round(apr, 3)}%, ACC: {np.round(acc, 3)}%"
    )
예제 #7
0
def run_cross_experiment(build_model, args):
    base_path = "../data/mtat/cross/%s_%s"

    for i in range(0, 5):
        train_ids = [song.rstrip() for song in open(base_path % (i, "train.txt"))]
        Y_train = np.load(base_path % (i, "train.npy"))
        x_test = [song.rstrip() for song in open(base_path % (i, "test.txt"))]
        y_test = np.load((base_path % (i, "test.npy")))

        # Split into train and validation set
        mskf = MultilabelStratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=0)
        for train_idx, valid_idx in mskf.split(train_ids, Y_train):
            x_train = []
            y_train = Y_train[train_idx]
            for idx in train_idx:
                x_train.append(train_ids[idx])

            x_valid = []
            y_valid = Y_train[valid_idx]
            for idx in valid_idx:
                x_valid.append(train_ids[idx])

        output = open("../results/cross/results.txt", "a")

        'First learning rate'
        base_model = build_model()

        start = time.time()
        output.write("Start Cross %s Training %s - %s \n" % (i, base_model.model_name, start))

        print('Train first learning rate for cross validation')
        lr = learning_rates[0]
        weight_name = '../results/cross/%s_best_weights_%s_%s.hdf5' % (i, base_model.model_name, lr)
        model = base_model.train(x_train, y_train, x_valid, y_valid, epoch_size=100, lr=lr, weight_name=weight_name)

        print("Testing")
        model.load_weights(weight_name)
        x_pred = evaluator.predict(base_model, model, x_test)

        'Save predictions'
        np.save("../results/cross/%s_predictions_%s_%s_%s.npy" % (i, base_model.model_name, args.d, lr), x_pred)

        test_result = evaluator.mean_roc_auc(x_pred, y_test)
        print("Mean ROC-AUC: %s" % test_result)
        output.write("%s -  Mean ROC-AUC: %s, %s \n" % (lr, test_result, time.time()))

        'For each learning rate'
        for lr_index in range(1, len(learning_rates)):
            lr = learning_rates[lr_index]

            base_model = build_model()

            print('Train %s' % lr)
            weight_name = '../results/cross/%s_best_weights_%s_%s.hdf5' % (i, base_model.model_name, lr)
            model = base_model.retrain(x_train, y_train, x_valid, y_valid, epoch_size=100, lr=lr,
                                       lr_prev=learning_rates[lr_index - 1], weight_name=weight_name)

            print("Testing")
            model.load_weights(weight_name)
            x_pred = evaluator.predict(base_model, model, x_test)

            'Save predictions'
            np.save("../results/cross/%s_predictions_%s_%s_%s.npy" % (i, base_model.model_name, args.d, lr), x_pred)

            test_result = evaluator.mean_roc_auc(x_pred, y_test)
            print("Mean ROC-AUC: %s" % test_result)
            output.write("%s -  Mean ROC-AUC: %s, %s \n" % (lr, test_result, time.time()))

        end = time.time()
        output.write("End Training %s - %s" % (base_model.model_name, end))

        output.close()