def test_random():
    # Sanity test to make sure that feature number positively impacts least squares error.
    num_points = 100
    num_data_per_point = 55
    learning_rate = 0.5
    x_in = np.random.normal(size=(num_data_per_point, num_points))
    for num_features in [1, 5, 10, 15, 20, 40, 70]:
        ae = AutoEncoder(x_in, num_features, random_seed=1234)
        w_in = np.random.normal(size=(num_data_per_point, num_features))
        z_out, least_squares_test = ae.psi(w_in)
        print(
            f"(# features : Least squares error = ({num_features} : {least_squares_test})"
        )
        print("Starting gradient decent...")
        loss_values = []  # Keep track of loss values over epochs
        for epoch in range(1000):
            z_grd, ls_grd, grd = ae.calc_g(
                w_in)  # Calculate Z, Error, and Gradient Matrix
            w_in = w_in - (learning_rate * grd
                           )  # Update W using Gradient Matrix
            loss_values.append(ls_grd)  # Log loss
            print(f"Epoch: {epoch}\t----------\tLoss: {ls_grd}")

        # print(loss_values)
        plotter.plot_loss(
            loss_values,
            f"Gradient Loss Over Epochs (test) (num_features: {num_features})")
Example #2
0
def test_mnist():
    # Gradient check using MNIST
    (train_x, _), (_, _) = mnist.load_data()
    train_x = train_x / 255  # Normalizing images
    # plotter.plot_mnist(train_x, "original")                           # Show original mnist images

    num_img, img_dim, _ = train_x.shape  # Get number of images and # pixels per square img
    num_features = 500
    mnist_in = np.reshape(
        train_x, (img_dim * img_dim,
                  num_img))  # Reshape images to match autoencoder input
    ga = Algorithm(x=mnist_in, num_features=num_features, debug=1, pop_size=20)
    w_out, best_cost, logs = ga.run()

    print(
        f"Average time/generation (sec): {sum(logs['times']) / len(logs['times'])}"
    )
    print(f"Total time to run GA (sec): {logs['times']}")

    ae = AutoEncoder(mnist_in, num_features, random_seed=1234, use_gpu=True)
    z, _ = ae.psi(w_out)
    phi_w_img = ae.phi(w_out)  # Calculate phi(W)
    new_mnist = z @ phi_w_img  # Recreate original images using Z and phi(W)
    new_imgs = np.reshape(
        new_mnist, train_x.shape)  # Reshape new images have original shape
    plotter.plot_mnist(new_imgs,
                       f"{num_features}_features_ga")  # Show new images

    # print(loss_values)
    plotter.plot_loss(logs['min'], "MNIST_Gradient_Loss_Over_Generations")
def test_mnist(num_epochs=None):
    # Gradient check using MNIST
    (train_x, _), (_, _) = mnist.load_data()
    train_x = train_x / 255  # Normalizing images
    # plotter.plot_mnist(train_x, "original")                           # Show original mnist images

    num_img, img_dim, _ = train_x.shape  # Get number of images and # pixels per square img
    learning_rate = 0.5
    num_features = 200
    loss_values = []  # Keep track of loss values over epochs
    loss_values_less = []
    loss_diffs = []

    w_in = np.random.normal(
        size=(img_dim * img_dim,
              num_features))  # Generate random W matrix to test
    mnist_in = np.reshape(
        train_x, (img_dim * img_dim,
                  num_img))  # Reshape images to match autoencoder input
    ae = AutoEncoder(mnist_in, num_features, random_seed=1234, use_gpu=True)
    start_time = time.time()
    times = []
    if num_epochs:
        for epoch in range(num_epochs):
            w_in, z_grd = do_epoch(ae, w_in, learning_rate, loss_values, times,
                                   loss_values_less, loss_diffs, epoch,
                                   start_time)
    else:
        epoch_history_check = 5
        epoch = 0
        loss_avg = 1000
        tol = 0.03
        while loss_avg > tol:
            w_in, z_grd = do_epoch(ae, w_in, learning_rate, loss_values, times,
                                   loss_values_less, loss_diffs, epoch,
                                   start_time)
            loss_check = loss_diffs[-epoch_history_check:]
            loss_avg = sum(loss_check) / len(loss_check)
            epoch += 1

    print(
        f"Total time to run gradient decent (sec): {time.time() - start_time}")
    phi_w_img = ae.phi(w_in)  # Calculate phi(W)
    new_mnist = z_grd @ phi_w_img  # Recreate original images using Z and phi(W)
    new_imgs = np.reshape(
        new_mnist, train_x.shape)  # Reshape new images have original shape
    plotter.plot_mnist(new_imgs,
                       f"{num_features}_features_gradient")  # Show new images

    # print(loss_values)
    plotter.plot_loss(loss_values, "MNIST_Gradient_Loss_Over_Epochs")
    plotter.plot_loss(
        loss_values_less,
        "MNIST_Gradient_Loss_Over_Epochs_all_epochs_except_zero")
Example #4
0
def test_cifar10(num_epochs=None):
    # (train_x, _), (_, _) = cifar10.load_data()
    (_, _), (train_x, _) = cifar10.load_data()
    print(train_x.shape)
    plotter.plot_mnist(train_x, "original")
    train_x = rgb2gray(train_x)
    train_x = train_x / 255
    plotter.plot_mnist(train_x, "grayscale")
    num_img, img_h, img_w = train_x.shape
    print(train_x.shape)
    learning_rate = 0.5
    num_features = 768;
    loss_values = []
    loss_values_less = []
    loss_diffs = []

    w_in = np.random.normal(size=(img_h * img_w, num_features))
    cifar_in = np.reshape(train_x, (img_h * img_w, num_img))
    # cifar_in = np.reshape(train_x, (img_h, img_w, num_img*img_ch))
    print(cifar_in.shape)

    ae = AutoEncoder(cifar_in, num_features, random_seed=1234, use_gpu=True)
    start_time = time.time()
    times = []
    if num_epochs:
        for epoch in range(num_epochs):
            w_in, z_grd = do_epoch(ae, w_in, learning_rate, loss_values, times, loss_values_less, loss_diffs, epoch,
                                   start_time)
    else:
        epoch_history_check = 5
        epoch = 0
        loss_avg = 1000
        tol = 0.03
        while loss_avg > tol:
            w_in, z_grd = do_epoch(ae, w_in, learning_rate, loss_values, times, loss_values_less, loss_diffs, epoch,
                                   start_time)
            loss_check = loss_diffs[-epoch_history_check:]
            loss_avg = sum(loss_check) / len(loss_check)
            epoch += 1

    print(f"Total time to run gradient decent (sec): {time.time() - start_time}")
    phi_w_img = ae.phi(w_in)  # Calculate phi(W)
    new_cifar = z_grd @ phi_w_img  # Recreate original images using Z and phi(W)
    print(new_cifar.shape)
    new_imgs = np.reshape(new_cifar, train_x.shape)  # Reshape new images have original shape
    plotter.plot_mnist(new_imgs, f"{num_features}_features_gradient")  # Show new images

    # print(loss_values)
    plotter.plot_loss(loss_values, "CIFAR10_Gradient_Loss_Over_Epochs")
    plotter.plot_loss(loss_values_less, "CIFAR10_Gradient_Loss_Over_Epochs_all_epochs_except_zero")
    # return train_x
    return new_imgs
Example #5
0
def test_random():
    # Sanity test to make sure that feature number positively impacts least squares error.
    num_points = 100
    num_data_per_point = 55
    x_in = np.random.normal(size=(num_data_per_point, num_points))
    loss_values = []
    for num_features in [1, 5, 10, 15, 20, 40, 70]:
        ga = Algorithm(x=x_in, num_features=num_features, debug=1)
        # w_in = np.random.normal(size=(num_data_per_point, num_features))
        w_out, best_cost, logs = ga.run()
        loss_values.append(best_cost)

    plotter.plot_loss(loss_values, "Random_Test_with_Features",
                      "Num features from list [1, 5, 10, 15, 20, 40, 70]")
def test_random():
    # Sanity test to make sure that feature number positively impacts least squares error.
    num_points = 100
    num_data_per_point = 55
    x_in = np.random.normal(size=(num_data_per_point, num_points))
    loss_values = []
    for num_features in [1, 5, 10, 15, 20, 40, 70]:
        ae = AutoEncoder(x_in, num_features, random_seed=1234)
        w_in = np.random.normal(size=(num_data_per_point, num_features))
        z_out, least_squares_test = ae.psi(w_in)
        loss_values.append(least_squares_test)
        print(f"(# features : Least squares error = ({num_features} : {least_squares_test})")

    plotter.plot_loss(loss_values, "Random_Test_with_Features", "Num features from list [1, 5, 10, 15, 20, 40, 70]")
Example #7
0
def main(is_train, prediction, plotting, scaling, selected_model):
    if len(sys.argv) < 3:
        print(LINESPLIT)
        print("Usage: python3 {} <path_to_ssn_datafile> <path_to_aa_datafile>".
              format(os.path.basename(__file__)))
        data_file = "data/SILSO/TSN/SN_m_tot_V2.0.txt"
        aa_file = "data/ISGI/aa_1869-01-01_2020-12-19_D.dat"
    else:
        data_file = sys.argv[1]
        aa_file = sys.argv[2]

    print(LINESPLIT)
    print("Code running on device: {}".format(device))

    ssn_data = datasets.SSN(data_file)
    aa_data = datasets.AA(aa_file)

    print(LINESPLIT)
    print('''Data loaded from file locations :
    SSN - {}
    AA - {}'''.format(os.path.abspath(data_file), os.path.abspath(aa_file)))

    if plotting:
        plotter.plot_all("combined_data1.jpg")

    cycle_data = ut.get_cycles(ssn_data)

    print(LINESPLIT)
    print("Solar cycle data loaded/saved as: cycle_data.pickle")
    print(LINESPLIT)
    ut.print_cycles(cycle_data)

    train_samples = datasets.Features(ssn_data, aa_data, cycle_data, normalize=scaling,\
    start_cycle=13, end_cycle=22)
    valid_samples = datasets.Features(ssn_data, aa_data, cycle_data, normalize=scaling,\
    start_cycle=23, end_cycle=23)
    valid_timestamps, _ = ut.gen_samples(ssn_data, aa_data, cycle_data,\
    cycle=23, normalize=scaling, tf=cycle_data["length"][23])
    predn_timestamps, predn_samples = ut.gen_samples(ssn_data, aa_data, cycle_data,\
    cycle=24, normalize=scaling)

    print(LINESPLIT)
    print('''Selected data:
    Training: SC 13 to 22
    Validation: SC 23
    Prediction: SC 24''')

    ############ FFNN/RNN/LSTM (model chosen by user) ############

    model = getattr(models, selected_model)(inp_dim=6).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           mode="min",
                                                           factor=0.9,
                                                           verbose=True)

    print(LINESPLIT)
    print('''Selected model: {}\
    Training mode: {}\
    Prediction mode: {}'''.format(model, is_train, prediction))

    print(LINESPLIT)
    print("Selected optimizer: {}".format(optimizer))

    print(LINESPLIT)
    print('''Selected scheduler: {}(
    {})'''.format(scheduler.__class__.__name__, scheduler.state_dict()))

    pre_trained = load_model(model)

    if not pre_trained:
        if not is_train and prediction:
            print(LINESPLIT)
            print(
                "Warning: Prediction is ON with training OFF and no pretrained models available"
            )

    train_loader = DataLoader(dataset=train_samples,
                              batch_size=BATCH_SIZE,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_samples,
                              batch_size=1,
                              shuffle=False)

    ### Training ###

    if is_train:
        if not pre_trained:
            model.train()
            print(LINESPLIT)
            print("Training model with solar cycle {} to {} data with: num_epochs={}".\
            format(datasets.START_CYCLE, datasets.END_CYCLE - 2, epochs))

            loss = train(model, train_loader, optimizer, scheduler, epochs)
            torch.save(
                model.state_dict(),
                "{}_{}_{}.pth".format(modelfolder, model.__class__.__name__,
                                      MAX_EPOCHS))

            plotter.plot_loss("Average Training Loss", range(len(loss)), loss, "tr_{}.png".\
            format(model.__class__.__name__))

            print(LINESPLIT)
            print('''Training finished successfully.
            Saved model checkpoints can be found in: {}
            Saved data/loss graphs can be found in: {}'''.format(
                modelfolder, graphfolder))

        else:
            print(LINESPLIT)
            print(
                "Skipping training, using pre-trained model for validation and prediction"
            )

    ### Validating ###

        model.eval()
        print(LINESPLIT)
        print("Validating model for solar cycle {} data".format(
            datasets.END_CYCLE - 1))

        valid_predictions, valid_loss = validate(model, valid_loader,
                                                 valid_timestamps)

        plotter.plot_predictions("SC{} Prediction".format(datasets.END_CYCLE - 1),\
        valid_timestamps, valid_predictions, "SC 23 Validation.png", compare=True)
        plotter.plot_loss("Validation Loss", range(len(valid_loss)), valid_loss, "val_{}.png".\
        format(model.__class__.__name__))

        print(LINESPLIT)
        print('''Validation finished successfully.\n
        Saved prediction/loss graphs can be found in: {}'''.format(
            graphfolder))

    ### Predicting ###

    if prediction:
        model.eval()
        print(LINESPLIT)
        print("Predicting SC {} using the above trained model".format(
            datasets.END_CYCLE))

        predn_predictions = predict(model, predn_samples, predn_timestamps)

        plotter.plot_predictions("SC{} Prediction".format(datasets.END_CYCLE),\
        predn_timestamps, predn_predictions, "SC 24 Prediction.png", compare=True)
Example #8
0
    json.dump(validation_list, f, indent=4)

train_data_loader = DataLoader(train_dataset,
                               batch_size=args.batch_size,
                               shuffle=True)
test_data_loader = DataLoader(test_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)

predictor = HiddenLabelPredictorModel(bert, 768, args.num_predictors)

trainer = UI2VecTrainer(predictor, train_data_loader, test_data_loader, vocab,
                        len(vocab_list), args.rate, args.num_predictors, 768,
                        args.loss)

test_loss_data = []
train_loss_data = []
for epoch in tqdm.tqdm(range(args.epochs)):
    print(epoch)
    train_loss = trainer.train(epoch)
    print(train_loss)
    train_loss_data.append(train_loss)
    if test_data_loader is not None:
        test_loss = trainer.test(epoch)
        print(test_loss)
        test_loss_data.append(test_loss)
    if (epoch % 20) == 0:
        trainer.save(epoch, args.output_path)
trainer.save(args.epochs, args.output_path)
plot_loss(train_loss_data, test_loss_data)
                     2: hyper_params[2]['name'],
                     3: hyper_params[3]['name'],
                     4: hyper_params[4]['name']})

print(df)
print()

# Loop through features and train with given hyperparameters
for params in hyper_params:
    column_name = params['name']
    if column_name == 'Cancer' or column_name == 'Heart Disease' or column_name == 'Alsheimers' or column_name == 'Total':
        continue

    weight, bias, error, epoch_data = trainer.train_model(
        feature=df[column_name],
        label=df[label_name],
        learning_rate=params['learning_rate'],
        number_epochs=params['epochs'],
        batch_size=params['batch_size'])
    print(f'bias={bias}, weight={weight}')
    print()

    plotter.plot_model(title='Causes of Death',
                       feature_title=column_name,
                       label_title=label_name,
                       weight=weight,
                       bias=bias,
                       feature_data=df[column_name],
                       label_data=df[label_name])
    plotter.plot_loss(epoch_data=epoch_data, root_mean_squared_error=error)
Example #10
0
nn.add(Dense(512, activation='relu'))
nn.add(Dropout(0.2))
nn.add(Dense(256, activation='relu'))
nn.add(Dropout(0.2))
nn.add(Dense(256, activation='relu'))
nn.add(Dropout(0.2))
nn.add(Dense(nb_breeds, activation='softmax'))

# Compile the NN
nn.compile(optimizer='sgd',
           loss='categorical_crossentropy',
           metrics=['accuracy'])
# Train the NN
history = nn.fit(x_train,
                 y_train,
                 batch_size=batch_size,
                 epochs=nb_epochs,
                 validation_split=0.2)

# Evaluate the model with test set
score = nn.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Plots
plt.plot_accuracy(history)
plt.plot_loss(history)

# Confusion matrix and classification report
plt.get_statistics(nn, x_test, y_test)