def incremental_data_experiment(prmlp,
    train_datasets,
    test_datasets,
    no_of_patches=64,
    patch_size=(8, 8),
    **kwargs):

    ds_train = Dataset()
    ds_test = Dataset()

    costs = []
    test_scores = []

    test_ds_name = data_dir + test_datasets[0] + file_suffix
    print "Loading the test dataset"
    ds_test.setup_pretraining_dataset(data_path=test_ds_name,
    train_split_scale=0.5, patch_size=patch_size, normalize_inputs=False)

    """
    Perform the test on test dataset for each learnt training dataset.
    """
    for x_t_idx in xrange(len(train_datasets)):
        train_ds_name = data_dir + train_datasets[x_t_idx] + file_suffix
        print "Loading the dataset %s " % (train_ds_name)

        ds_train.setup_pretraining_dataset(data_path=train_ds_name,
        train_split_scale=1, patch_size=patch_size, normalize_inputs=False)
        print "Training on the dataset %d " % (x_t_idx)
        cost = train_prmlp(prmlp, ds_train.Xtrain_patches, ds_train.Xtrain_presences, **kwargs["train_args"])
        costs.append(cost)

        print "Testing on the test dataset."
        test_scores_per_ds = test_prmlp(prmlp, ds_test.Xtest_patches, ds_test.Xtest_presences, no_of_patches, **kwargs["test_args"])
        test_score_patch_based = prmlp.obj_patch_error_percent
        test_scores.append(test_score_patch_based)

    all_data_dict = {
        "test_scores": test_scores,
        "costs": costs
    }

    numpy.save("/RQusagers/gulcehre/codes/python/experiments/arcade_ds_exps/pretrained_mlp/prmlp_multi_datasets/out/multi_hidden_mlp_240k_lrate_0.025_3hidden_8x8_1epoch.npy", all_data_dict)
    ds = Dataset()
    data_new =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_20k_64patches_seed_112168712_64patches.npy"

    data_new_40k =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_40k_64patches_seed_975168712_64patches.npy"

    data_new_60k =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_60k_64patches_seed_975168712_64patches.npy"

    data_new_100k =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_100k_seed_98313722_64patches.npy"

    patch_size=(8, 8)

    ds.setup_pretraining_dataset(data_path=data_new_60k, patch_size=patch_size, train_split_scale=0.8, normalize_inputs=False)

    x = T.matrix('x')
    n_hiddens = [2048]
    no_of_patches = 64
    no_of_classes = 11

    prmlp = PatchBasedMLP(x, n_in=patch_size[0] * patch_size[1],
    n_hiddens=n_hiddens, n_out=11, no_of_patches=no_of_patches,
    output=0, activation=NeuralActivations.Rectifier, use_adagrad=False)

    post_mlp = PostMLP(x, n_in=no_of_classes, n_hiddens=[64],
    activation=NeuralActivations.Rectifier, n_out=1, use_adagrad=False)
    pre_training(patch_mlp=prmlp, post_mlp=post_mlp, ds=ds)
Beispiel #3
0
    print "starting post-testing on the  dataset"
    post_mlp.test(data=pre_test_test_probs,
                  labels=test_set_labels,
                  **post_cs_args["test_args"])


if __name__ == "__main__":
    print "Loading the dataset"

    ds = Dataset()
    data_path_40k = "/RQusagers/gulcehre/dataset/pentomino/pieces/pento64x64_40k_seed_39112222.npy"
    data_path = "/RQusagers/gulcehre/dataset/pentomino/experiment_data/pento64x64_80k_seed_39112222.npy"

    patch_size = (8, 8)
    ds.setup_pretraining_dataset(data_path=data_path_40k,
                                 patch_size=patch_size,
                                 normalize_inputs=False)

    x = T.matrix('x')
    n_hiddens = [1024, 768]
    no_of_patches = 64
    no_of_classes = 11

    prmlp = PatchBasedMLP(x,
                          n_in=patch_size[0] * patch_size[1],
                          n_hiddens=n_hiddens,
                          n_out=11,
                          no_of_patches=no_of_patches,
                          activation=NeuralActivations.Rectifier,
                          use_adagrad=False)
def pre_training_on_multi_datasets(patch_mlp=None, post_mlp=None, train_ds=None, test_ds=None):

    ds = Dataset()
    print "Loading the dataset"

    train_set_patches, train_set_pre, train_set_labels = ds.Xtrain_patches, ds.Xtrain_presences, ds.Ytrain

    test_set_patches, test_set_pre, test_set_labels = ds.Xtest_patches, ds.Xtest_presences, ds.Ytrain

    cs_args = {
        "train_args": {
            "L1_reg": 1e-6,
            "learning_rate": 0.75,
            "L2_reg": 1e-5,
            "nepochs": 2,
            "cost_type": "crossentropy",
            "save_exp_data": False,
            "batch_size": 250,
            "normalize_weights": False,
        },
        "test_args": {"save_exp_data": False, "batch_size": 250},
    }

    post_cs_args = {
        "train_args": {
            "L1_reg": 1e-6,
            "learning_rate": 0.08,
            "L2_reg": 1e-5,
            "nepochs": 10,
            "cost_type": "crossentropy",
            "save_exp_data": False,
            "batch_size": 250,
            "normalize_weights": False,
        },
        "test_args": {"save_exp_data": False, "batch_size": 250},
    }

    test_dataset = test_ds[0]

    ds.setup_pretraining_dataset(data_path=test_dataset, train_split_scale=0.4, patch_size=(8, 8))
    test_patches, test_pre, test_labels = ds.Xtest_patches, ds.Xtest_presences, ds.Ytest

    for i in xrange(len(train_ds)):

        print "Current training dataset is %s \n" % (train_ds[i])
        ds.setup_pretraining_dataset(data_path=train_ds[i], train_split_scale=1.0, patch_size=(8, 8))

        train_patches, train_pre, train_labels = ds.Xtrain_patches, ds.Xtrain_presences, ds.Ytrain

        test_labels = get_binary_labels(test_labels)
        train_labels = get_binary_labels(train_labels)

        print "Starting the pretraining phase."
        (costs, pre_train_probs) = prmlp.train(train_patches, train_pre, **cs_args["train_args"])
        prmlp.save_data()

        print "Testing on the training dataset."
        (costs, pre_test_train_probs) = prmlp.test(train_patches, train_pre, **cs_args["test_args"])

        print "Testing on the test dataset."
        (costs, pre_test_test_probs) = prmlp.test(test_patches, test_pre, **cs_args["test_args"])

        print "Normalizing patch-mlp's outputs"
        pre_train_probs = normalize_data(pre_train_probs)
        pre_test_train_probs = normalize_data(pre_test_train_probs)
        pre_test_test_probs = normalize_data(pre_test_test_probs)

        print "Training post-mlp"
        post_mlp.train(data=pre_train_probs, labels=train_labels, **post_cs_args["train_args"])

        print "starting post-testing on training dataset"
        post_mlp.test(data=pre_test_train_probs, labels=train_labels, **post_cs_args["test_args"])

        print "starting post-testing on the  dataset"
        post_mlp.test(data=pre_test_test_probs, labels=test_labels, **post_cs_args["test_args"])
Beispiel #5
0
    data_new =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_20k_64patches_seed_112168712_64patches.npy"

    data_new_40k =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_40k_64patches_seed_975168712_64patches.npy"

    data_new_60k =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_60k_64patches_seed_975168712_64patches.npy"

    data_new_100k =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_100k_seed_98313722_64patches.npy"

    patch_size = (8, 8)

    ds.setup_pretraining_dataset(data_path=data_new_100k,
                                 patch_size=patch_size,
                                 train_split_scale=0.8,
                                 normalize_inputs=False)

    x = T.matrix('x')
    n_hiddens = [2048]
    no_of_patches = 64
    no_of_classes = 11

    prmlp = PatchBasedMLP(x,
                          n_in=patch_size[0] * patch_size[1],
                          n_hiddens=n_hiddens,
                          n_out=11,
                          no_of_patches=no_of_patches,
                          activation=NeuralActivations.Rectifier,
                          use_adagrad=False)
Beispiel #6
0
    print "starting post-testing on the  dataset"
    test_error = csvm.test(pre_test_test_probs, test_set_labels, **post_cs_args["test_args"])
    print "For testing %s" %(test_error)

#    import ipdb; ipdb.set_trace()

if __name__=="__main__":
    print "Task has just started."
    print "Loading the dataset"
    ds = Dataset()
    patch_size=(8,8)

    ds_path = \
    "/RQusagers/gulcehre/dataset/pentomino/experiment_data/pento64x64_80k_seed_39112222.npy"
    data_new =\
    "/RQusagers/gulcehre/dataset/pentomino/rnd_pieces/pento64x64_5k_seed_43112222_64patches_rnd.npy"

    data_new_40k =\
    "/RQexec/gulcehre/datasets/pentomino/pento_64x64_8x8patches/pento64x64_40k_64patches_seed_975168712_64patches.npy"

    ds.setup_pretraining_dataset(data_path=data_new_40k, patch_size=patch_size, normalize_inputs=False)
    pre_input = T.matrix('pre_input')
    n_hiddens = [2048]

    prmlp = PatchBasedMLP(pre_input, n_in=8*8, n_hiddens=n_hiddens, n_out=11,
    no_of_patches=64, activation=NeuralActivations.Rectifier, use_adagrad=False)

    csvm = CSVM()
    pre_training(prmlp, csvm, ds)
def pre_training_on_multi_datasets(patch_mlp=None,
                                   post_mlp=None,
                                   train_ds=None,
                                   test_ds=None):

    ds = Dataset()
    print "Loading the dataset"

    train_set_patches, train_set_pre, train_set_labels =\
    ds.Xtrain_patches, ds.Xtrain_presences, ds.Ytrain

    test_set_patches, test_set_pre, test_set_labels = ds.Xtest_patches, ds.Xtest_presences, ds.Ytrain

    cs_args = {
        "train_args": {
            "L1_reg": 1e-6,
            "learning_rate": 0.75,
            "L2_reg": 1e-5,
            "nepochs": 2,
            "cost_type": "crossentropy",
            "save_exp_data": False,
            "batch_size": 250,
            "normalize_weights": False
        },
        "test_args": {
            "save_exp_data": False,
            "batch_size": 250
        }
    }

    post_cs_args = {
        "train_args": {
            "L1_reg": 1e-6,
            "learning_rate": 0.08,
            "L2_reg": 1e-5,
            "nepochs": 10,
            "cost_type": "crossentropy",
            "save_exp_data": False,
            "batch_size": 250,
            "normalize_weights": False
        },
        "test_args": {
            "save_exp_data": False,
            "batch_size": 250
        }
    }

    test_dataset = test_ds[0]

    ds.setup_pretraining_dataset(data_path=test_dataset,
                                 train_split_scale=0.4,
                                 patch_size=(8, 8))
    test_patches, test_pre, test_labels = ds.Xtest_patches, ds.Xtest_presences, ds.Ytest

    for i in xrange(len(train_ds)):

        print "Current training dataset is %s \n" % (train_ds[i])
        ds.setup_pretraining_dataset(data_path=train_ds[i],
                                     train_split_scale=1.0,
                                     patch_size=(8, 8))

        train_patches, train_pre, train_labels = ds.Xtrain_patches, ds.Xtrain_presences, ds.Ytrain

        test_labels = get_binary_labels(test_labels)
        train_labels = get_binary_labels(train_labels)

        print "Starting the pretraining phase."
        (costs, pre_train_probs) = prmlp.train(train_patches, train_pre,
                                               **cs_args["train_args"])
        prmlp.save_data()

        print "Testing on the training dataset."
        (costs, pre_test_train_probs) = prmlp.test(train_patches, train_pre,
                                                   **cs_args["test_args"])

        print "Testing on the test dataset."
        (costs, pre_test_test_probs) = prmlp.test(test_patches, test_pre,
                                                  **cs_args["test_args"])

        print "Normalizing patch-mlp's outputs"
        pre_train_probs = normalize_data(pre_train_probs)
        pre_test_train_probs = normalize_data(pre_test_train_probs)
        pre_test_test_probs = normalize_data(pre_test_test_probs)

        print "Training post-mlp"
        post_mlp.train(data=pre_train_probs,
                       labels=train_labels,
                       **post_cs_args["train_args"])

        print "starting post-testing on training dataset"
        post_mlp.test(data=pre_test_train_probs,
                      labels=train_labels,
                      **post_cs_args["test_args"])

        print "starting post-testing on the  dataset"
        post_mlp.test(data=pre_test_test_probs,
                      labels=test_labels,
                      **post_cs_args["test_args"])