Esempio n. 1
0
def sgd_ons():
    x_train, y_train, x_test, y_test = load_processed_data(dir_data)

    results = []
    alpha = 0.1
    gamma = 0.1
    radius = 100
    T = 10000
    _, logger = train_sgd_proj(
        a=x_train,
        b=y_train,
        a_test=x_test,
        b_test=y_test,
        T=T,
        radius=radius,
        alpha=alpha,
    )
    results.append(logger)
    _, logger = train_ons(
        a=x_train,
        b=y_train,
        a_test=x_test,
        b_test=y_test,
        T=T,
        gamma=gamma,
        radius=radius,
        alpha=alpha,
    )
    results.append(logger)
    plot_results_(results)
    quit()
Esempio n. 2
0
def gd_gd_proj_z():
    x_train, y_train, x_test, y_test = load_processed_data(dir_data)

    results = []
    for radius in [0.5, 1.0, 10, 100, 500]:
        alpha = 0.33
        # gamma = 0.1
        # radius = 100
        T = 1000
        _, logger = train_gd(
            a=x_train,
            b=y_train,
            a_test=x_test,
            b_test=y_test,
            T=T,
            # gamma = gamma,
            # radius=radius,
            alpha=alpha,
        )
        results.append(logger)
        _, logger = train_gd_proj(
            a=x_train,
            b=y_train,
            a_test=x_test,
            b_test=y_test,
            T=T,
            # gamma = gamma,
            radius=radius,
            alpha=alpha,
        )
        results.append(logger)
    plot_results_(results)
Esempio n. 3
0
def plot_hogwild():
    x_train, y_train, x_test, y_test = load_processed_data(dir_data)

    # --- 2. plot hogwild for various values of K
    n_runs = 3
    n_workers = 8
    T = 1000000
    alpha = 0.33
    beta = 0.37
    theta = 0.2
    results = [
        AvgLogger([
            train_hogwild(a=x_train,
                          b=y_train,
                          a_test=x_test,
                          b_test=y_test,
                          T=T,
                          alpha=alpha,
                          beta=beta,
                          K=K,
                          theta=theta,
                          n_processes=n_workers,
                          sequential=False,
                          seed=s)[1] for s in range(n_runs)
        ]) for K in [3, 10, 50]
    ]

    # --- 1. plot comparison between SGD and hogwild, fixed K
    # n_runs = 3
    # n_workers = 8
    # T = 1000000
    # alpha = 0.33
    # beta = 0.37
    # theta = 0.2
    # results = [AvgLogger([
    #     train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta,
    #                   K=K, theta=theta, n_processes=n_workers, sequential=False, seed=s)[1]
    #     for s in range(n_runs)
    # ]) for K in [3]]
    # results.append(AvgLogger([
    #     train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta,
    #                   K=3, theta=theta, n_processes=n_workers, sequential=True, seed=s)[1]
    #     for s in range(n_runs)
    # ]))
    # results.append(AvgLogger([
    #     train_sgd(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, return_avg=True, seed=s)[1]
    #     for s in range(n_runs)
    # ]))

    plot_results(
        results,
        add_to_title=
        rf" ($\alpha={alpha}, \beta={beta}, \theta={theta}$, n_runs={n_runs})")
Esempio n. 4
0
def sgd_ons_var():
    x_train, y_train, x_test, y_test = load_processed_data(dir_data)

    results = []

    n_runs = 5
    T = 1000
    alpha = 0.1
    radius = 100
    gamma = 0.1
    results.append(
        AvgLogger([
            train_ons(
                a=x_train,
                b=y_train,
                a_test=x_test,
                b_test=y_test,
                T=T,
                gamma=gamma,
                radius=radius,
                seed=s,
                alpha=alpha,
            )[1] for s in range(n_runs)
        ]))
    results.append(
        AvgLogger([
            train_sgd_proj(
                a=x_train,
                b=y_train,
                a_test=x_test,
                b_test=y_test,
                T=T,
                alpha=alpha,
                radius=100,
                seed=s,
            )[1] for s in range(n_runs)
        ]))

    plot_results_(results,
                  add_to_title=rf" - $\alpha={alpha}$, n_runs={n_runs}")
Esempio n. 5
0
from datetime import datetime
from pathlib import Path

import pandas as pd

from algos import train_hogwild, train_sgd
from data_utils import load_processed_data

if __name__ == "__main__":
    dir_data = Path(__file__).resolve().parents[1].joinpath("data/")
    save_folder = dir_data.joinpath("../csv_results")
    try:
        os.mkdir(save_folder)
    except FileExistsError:
        pass
    x_train, y_train, x_test, y_test = load_processed_data(dir_data)

    n_runs = 1
    T_config = 2017521
    alpha = 0.33
    beta = 0.37
    theta = 0.2
    K = 3
    results = {
        "algo": [],
        "time": [],
        "test_error": [],
        "T": [],
        "n_workers": [],
        "K": [],
    }
Esempio n. 6
0
    202313, 520268, 628267, 760933, 761105, 761274, 767884, 767948, 768051,
    778196, 781774, 790989, 791094, 913179, 1073703, 1132513, 1132676, 1140226,
    1141794, 1237426, 1241905, 1387080, 1388043, 1570724, 1585962, 1586097
]  # all errors in training

START = 761200
WINDOW = 200

r_filename = "training_data.csv"
p_filename = "training_data_ps.csv"

if not PREPROCESSED:
    array, labels = load_raw_data(r_filename, START + WINDOW)

else:
    array, labels = load_processed_data(p_filename, START + WINDOW)

array2 = np.zeros(shape=array.shape)

array2[1:, :] = np.copy(array[:-1, :])

array3 = array[START:, :]

if not os.path.exists("images"):
    os.makedirs("images")

min = array3.min(axis=0)
max = array3.max(axis=0)
array4 = (array3 - min) / (max - min)

scipy.misc.toimage(array4, cmin=-1.0, cmax=1.0).save('images/data1.jpg')
Esempio n. 7
0
def main(argv):
    if len(argv) < 3:
        print("Correct arguments: <model> <data_file> |-<i(interactive)>|")
        exit()
    model_file = argv[1]
    weights_file = argv[1] + ".h5"
    data_file = argv[2]
    if not (os.path.exists(model_file) and os.path.exists(weights_file)
            and os.path.exists(data_file)):
        print("One of the specified files {}, {}, {} doesn't exist".format(
            model_file, weights_file, data_file))
        exit()

    print("# Loading data from files {}".format(data_file))
    (X, y_test) = load_processed_data(data_file, TEST_ROWS)

    print("### Loaded {} test rows".format(X.shape[0]))
    print("## X_test shape: ", X.shape)
    print("## Y_test shape: ", y_test.shape)

    # y_train = np.random.choice([0, 1], size=y_train.shape, p=[0.99, 0.01])

    # Modifying labels to time series prediction

    print("### Modifying labels")
    nonzero_test = np.count_nonzero(y_test)
    print("# Number of non-error labels: {}".format(y_test.shape[0] -
                                                    nonzero_test))
    print("# Number of error labels: {}".format(nonzero_test))

    y_test = warp_labels(y_test, PREDICTION_LENGTH, WINDOW_SIZE)

    nonzero_test = np.count_nonzero(y_test)
    print("## Labels modified")
    print("# Number of non-error labels: {}".format(y_test.shape[0] -
                                                    nonzero_test))
    print("# Number of error labels: {}".format(nonzero_test))

    print("### Modified labels a to signal errors in the next {} samples.".
          format(PREDICTION_LENGTH))

    # Modifying x's to be 3D vectors

    X = make_timeseries_instances(X, WINDOW_SIZE)

    print("### Modified data to tensors with height {}".format(WINDOW_SIZE))

    # Something with adding the channel count

    X = np.expand_dims(X, axis=3)

    y_test = y_test[:X.shape[0]]

    print("### Loading the model from file {}".format(model_file))
    json_file = open(model_file, 'r')
    model_json = json_file.read()
    json_file.close()
    model = model_from_json(model_json)
    print("### Loading weights from file {}".format(weights_file))
    model.load_weights(weights_file)
    print("### Loaded model from disk")

    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    print("### Evaluating the model")

    if len(argv) < 4:
        score = model.evaluate(X, y_test, verbose=1)
        print("#### Results ####")
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])
    elif "-i" in argv:
        print("Enter sample numbers for prediction:")
        while True:
            line = sys.stdin.readline()
            i = int(line) - PREDICTION_LENGTH
            if 0 < i < X.shape[0]:
                prediction = model.predict(X[i, ].reshape(
                    1, WINDOW_SIZE, PCA_TARGET_SIZE, 1))
                value = 0 if math.isnan(
                    np.sum(prediction)) else np.sum(prediction)
                if value > 0.0001:
                    print("Will fail in {}".format(PREDICTION_LENGTH))
                else:
                    print("Will not fail in {}".format(PREDICTION_LENGTH))
Esempio n. 8
0
PREPROCESSED = True

training_filename = 'training_data.csv'
test_filename = 'test_data.csv'

prep_training_filename = 'training_data_ps.csv'
prep_test_filename = 'test_data_ps.csv'

import sys

# Usually we will use pre-processed data, this is for a special case
if PREPROCESSED:
    # Normal turn of events
    print("# Loading prepared data from files {} and {}".format(prep_training_filename, prep_test_filename))
    (x_train, y_train), (x_test, y_test) = load_processed_data(prep_training_filename, TRAIN_ROWS), \
                                           load_processed_data(prep_test_filename, TEST_ROWS)
else:
    # Loading raw unprocessed data
    print("# Loading raw data from files {} and {}".format(training_filename, test_filename))

    (x_train, y_train), (x_test, y_test) = load_raw_data(training_filename, TRAIN_ROWS), \
                                           load_raw_data(test_filename, TEST_ROWS)
    # PCA dimensionality reduction
    pca = decomposition.PCA(n_components=PCA_TARGET_SIZE)
    pca.fit(x_train)
    x_train = pca.transform(x_train)
    pca.fit(x_test)
    x_test = pca.transform(x_test)
    print("# Reduced data to {} dimensions", PCA_TARGET_SIZE)
Esempio n. 9
0
                            sampler=sampler.SubsetRandomSampler(
                                range(NUM_VAL)))

    optimizer = optim.Adadelta(model.parameters())

    train_model(model, optimizer, loader_train, loader_val, epoches=2)


# Upload data to memory
#data_utils.proccess_data()
regular_train_images, regular_test_images, regular_val_images,\
regular_train_labels, regular_test_labels, regular_val_labels,\
inverted_train_images, inverted_test_images, inverted_val_images,\
inverted_train_labels, inverted_test_labels, inverted_val_labels,\
train_one_group_images, test_one_group_images, val_one_group_images,\
train_one_group_labels, test_one_group_labels, val_one_group_labels = data_utils.load_processed_data()

# Check for GPU availability:
device = my_models.device_gpu_cpu()
print('using device:', device)

dtype = torch.float32  # we will be using float

# Constant to control how frequently we print train loss
print_every = 100

# Create models:
model = my_models.model_2()
my_models.test_model_size(model, dtype)  # test model size output:

scene_1()