Exemple #1
0
def runAutoencoder():
    ds = StockPrice()
    #print ds.train[0][0]
    data = np.random.randn(10, 5).astype(config.floatX)
    #print data
    print BinomialCorruptor(.2)
    ae = DenoisingAutoencoder(BinomialCorruptor(corruption_level=.2), 1000, 100, act_enc='sigmoid', act_dec='linear',
                     tied_weights=False)
    trainer = sgd.SGD(learning_rate=.005, batch_size=5, termination_criterion=EpochCounter(3), cost=cost_ae.MeanSquaredReconstructionError(), monitoring_batches=5, monitoring_dataset=ds)
    trainer.setup(ae, ds)
    while True:
        trainer.train(dataset=ds)
        ae.monitor()
        ae.monitor.report_epoch()
        if not trainer.continue_learning(ae):
            break
    #print ds.train[0][0]
    #print ae.reconstruct(ds.train[0][0])

    w = ae.weights.get_value()
    #ae.hidbias.set_value(np.random.randn(1000).astype(config.floatX))
    hb = ae.hidbias.get_value()
    #ae.visbias.set_value(np.random.randn(100).astype(config.floatX))
    vb = ae.visbias.get_value()
    d = tensor.matrix()
    result = np.dot(1. / (1 + np.exp(-hb - np.dot(ds.train[0][0],  w))), w.T) + vb
Exemple #2
0
    def __init__(self,
                 corruptor=BinomialCorruptor(0.5),
                 num_corruptions=2,
                 nvis=0,
                 nhid=10,
                 act_enc='sigmoid',
                 act_dec='sigmoid',
                 tied_weights=False,
                 irange=1e-3,
                 rng=9001,
                 dataset_adaptor=VectorDataset(),
                 trainer=SGDTrainer()):
        self.configs = {
            'corruptor': corruptor,
            'num_corruptions': num_corruptions,
            'nvis': nvis,
            'nhid': nhid,
            'act_enc': act_enc,
            'act_dec': act_dec,
            'tied_weights': tied_weights,
            'irange': irange,
            'rng': rng,
        }

        self.dataset_adaptor = dataset_adaptor
        self.trainer = trainer
def create_denoising_autoencoder(structure, corruption=0.1, act='tanh'):
    n_input, n_output = structure
    curruptor = LoggingCorruptor(
        BinomialCorruptor(corruption_level=corruption),
        name='{}'.format(structure))

    irange = numpy.sqrt(6. / (n_input + n_output))
    if act == theano.tensor.nnet.sigmoid or act == 'sigmoid':
        irange *= 4


#     log.debug('initial weight range: {}'.format(irange));

    config = {
        'corruptor': curruptor,
        'nhid': n_output,
        'nvis': n_input,
        'tied_weights': True,
        'act_enc': act,
        'act_dec': act,
        'irange': irange,  #0.001,
    }
    log.debug('creating denoising autoencoder {}'.format(config))

    da = AdaptableDenoisingAutoencoder(**config)
    return da
Exemple #4
0
 def new_model(self, model_params, dataset):
     corruptor = BinomialCorruptor(
         corruption_level=model_params['noise_level'])
     model = DenoisingAutoencoder(nvis=dataset.X.shape[1],
                                  nhid=model_params['hidden_outputs'],
                                  irange=model_params['irange'],
                                  corruptor=corruptor,
                                  act_enc='tanh',
                                  act_dec=None)
     return model
Exemple #5
0
def get_denoising_autoencoder(structure):
    n_input, n_output = structure
    curruptor = BinomialCorruptor(corruption_level=0.5)
    config = {
        'corruptor': curruptor,
        'nhid': n_output,
        'nvis': n_input,
        'tied_weights': True,
        'act_enc': 'sigmoid',
        'act_dec': 'sigmoid',
        'irange': 0.001,
    }
    return DenoisingAutoencoder(**config)
def get_denoising_autoencoder(structure,corr_val):
    n_input, n_output = structure
    corruptor = BinomialCorruptor(corruption_level=corr_val)
    #corruptor =  GaussianCorruptor(stdev=0.25)
    config = {
        'corruptor': corruptor,
        'nhid': n_output,
        'nvis': n_input,
        'tied_weights': True,
        'act_enc': 'sigmoid',
        'act_dec': 'sigmoid',
        'irange': 4*np.sqrt(6. / (n_input + n_output)),
    }
    return DenoisingAutoencoder(**config)
    def create_layer_one(self):
        
        which_set = "train"
        one_hot = True
        start = 0
        # Creating 5 random patch layers based on 8,000 samples (Saturation point where the objective no longer improves.
        stop = 800
        # GridPatchCIFAR10 Randomly selects 5 16x16 patches from each image, and we do this 5 times. This helps increase training time and captures more information. Similar to how the neurons in the eye are attached to a specific region in the image.
        dataset = GridPatchCIFAR10(which_set=which_set, one_hot=one_hot, start=start, stop=stop)

        # Denoising autoencoder model hyper-parameters
        nvis = 768
        nhid = 512 
        irange = 0.05
        corruption_lvl = 0.2
        corruptor = BinomialCorruptor(corruption_level=corruption_lvl)
        activation_encoder = "tanh"
        # Linear activation
        activation_decoder = None 

        # Creating the denoising autoencoder
        model = DenoisingAutoencoder(nvis=nvis, nhid=nhid, irange=irange, corruptor=corruptor, act_enc=activation_encoder, act_dec=activation_decoder)

        # Parameters for SGD learning algorithm instantiated below
        learning_rate = 0.001
        batch_size = 100
        monitoring_batches = 5
        monitoring_dataset = dataset
        cost = MeanSquaredReconstructionError()
        max_epochs = 10
        termination_criterion = EpochCounter(max_epochs=max_epochs)
        

        # SGD Learning algorithm
        algorithm = SGD(learning_rate=learning_rate, batch_size=batch_size, monitoring_batches=monitoring_batches, monitoring_dataset=dataset, cost=cost, termination_criterion=termination_criterion)


        processes = []
        for i in range(0,5):
            print "Training DAE Sub-Layer: ", i
            save_path = self.save_path+str(i)+".pkl"
            save_freq = 1
            train = Train(dataset=dataset,model=model,algorithm=algorithm, save_path=save_path, save_freq=save_freq)
            p = Process(target=train.main_loop, args=())
            p.start()
            processes.append(p)

        for process in processes:
            process.join()
def test_high_order_autoencoder_init():
    """
    Just test that model initialize and return
    the penalty without error.
    """
    corruptor = BinomialCorruptor(corruption_level=0.5)
    model = HigherOrderContractiveAutoencoder(corruptor=corruptor,
                                              num_corruptions=5,
                                              nvis=20,
                                              nhid=30,
                                              act_enc='sigmoid',
                                              act_dec='sigmoid')

    X = tensor.matrix()
    data = np.random.randn(50, 20).astype(config.floatX)
    ff = theano.function([X], model.higher_order_penalty(X))
    assert type(ff(data)) == np.ndarray
    def combine_sublayers(self):
        print "Combining sub-layers"

        # Create a large 2560 unit DAE. The model is considered trained by the concatenation
        # of its 512 unit sub-layers. The 2560 hidden units weights will be initialized
        # by these. 
        nvis = 3072
        nhid = 2560
        irange = 0.05
        corruption = 0.2
        corruptor = BinomialCorruptor(corruption_level=corruption)
        activation_encoder = "tanh"
        activation_decoder = None

        # By default, the DAE initializes the weights at random
        # Since we're using our own already pre-trained weights
        # We will instead change those values.
        large_dae = DenoisingAutoencoder(nvis=nvis, nhid=nhid, corruptor=corruptor, irange=irange, act_enc=activation_encoder, act_dec=activation_decoder)

        # Do not need to change hidden or visible bias. 
        # They are static vars in theory.
        large_dae._params = [
            large_dae.visbias,
            large_dae.hidbias,
            # Here is where we change the weights.
            large_dae.weights
        ]

        numpy_array = np.zeros((3072, 2560))
        # Load sub-layer models and get their weights.
        for i in range (0,5):
            fo = open(self.save_path+str(i)+".pkl", 'rb')

            # 768 Vis, 512 hidden unit DAE.
            small_dae = cPickle.load(fo)
            fo.close()
            
            # TODO: Create numpy array of proper values to set the large_dae. 
            # Get the weights from the small_dae's
            # so that they can be appended together.                                      
            weights = small_dae.weights.get_value()
            large_dae_weights.append(weights)

        print "Successfully combined sub-layers"
def test_sdae():
    """
    Tests that StackedDenoisingAutoencoder works correctly
    """
    data = np.random.randn(10, 5).astype(config.floatX) * 100
    ae = Autoencoder(5, 7, act_enc='tanh', act_dec='cos', tied_weights=False)
    corruptor = BinomialCorruptor(corruption_level=0.5)
    model = StackedDenoisingAutoencoder([ae], corruptor)
    model._ensure_extensions()

    w = ae.weights.get_value()
    w_prime = ae.w_prime.get_value()
    ae.hidbias.set_value(np.random.randn(7).astype(config.floatX))
    hb = ae.hidbias.get_value()
    ae.visbias.set_value(np.random.randn(5).astype(config.floatX))
    vb = ae.visbias.get_value()
    d = tensor.matrix()
    result = np.cos(np.dot(np.tanh(hb + np.dot(data, w)), w_prime) + vb)
    ff = theano.function([d], model.reconstruct(d))
    assert not _allclose(ff(data), result)
Exemple #11
0
def construct_ae(structure):
    # some settings
    irange = 0.1

    layers = []
    for vsize, hsize in zip(structure[:-1], structure[1:]):
        # DenoisingAutoencoder / ContractiveAutoencoder / HigherOrderContractiveAutoencoder
        layers.append(
            autoencoder.DenoisingAutoencoder(
                nvis=vsize,
                nhid=hsize,
                tied_weights=True,
                act_enc='sigmoid',
                act_dec='sigmoid',
                irange=irange,
                # for DenoisingAutoencoder / HigherOrderContractiveAutoencoder:
                corruptor=BinomialCorruptor(0.5),
                # for HigherOrderContractiveAutoencoder:
                # num_corruptions=6
            ))
    return StackedBlocks(layers)
Exemple #12
0
def main():

    # Only the trainset is processed by this function.
    print 'getting preprocessed data to train model'
    pp_trainset, testset = get_processed_dataset()
    # remember to change here when changing datasets
    print 'loading unprocessed data for input displays'
    trainset = cifar10.CIFAR10(which_set="train")

    dmat = trainset.get_design_matrix()
    nvis = dmat.shape[1]

    model = DenoisingAutoencoder(
        corruptor=BinomialCorruptor(corruption_level=0.5),
        nhid=nhid,
        nvis=nvis,
        act_enc='sigmoid',
        act_dec='sigmoid',
        irange=.01)

    algorithm = SGD(
        learning_rate=0.1,
        cost=MeanSquaredReconstructionError(),
        batch_size=1000,
        monitoring_batches=10,
        monitoring_dataset=pp_trainset,
        termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED),
        update_callbacks=None)

    extensions = None

    trainer = Train(model=model,
                    algorithm=algorithm,
                    save_path='testrun.pkl',
                    save_freq=1,
                    extensions=extensions,
                    dataset=pp_trainset)

    trainer.main_loop()
Exemple #13
0
def main():

    # the data isn't going to be fully processed, so we may have to
    # do some stuff to the testset still to make it work.
    trainset, testset = get_processed_dataset()

    # Creating the patch-pairs:
    design_matrix = trainset.get_design_matrix()
    processed_patch_size = design_matrix.shape[1]

    num_images = train_size

    examples_per_image = patches_per_image * (patches_per_image - 1)
    num_examples = examples_per_image * num_images

    stamps = trainset.stamps
    max_stamp = input_width - patch_width
    d_size = (2 * max_stamp + 1)**input_dim

    patch_pairs = np.zeros((num_examples, 2 * processed_patch_size))
    distances = np.zeros((num_examples, input_dim))
    distances_onehot = np.zeros((num_examples, d_size))
    examples = np.zeros((num_examples, 2 * processed_patch_size + d_size))

    nvis = 2 * processed_patch_size + d_size

    def flatten_encoding(encoding, max_stamp):
        dims = len(encoding)
        flat_encoding = 0
        for i in xrange(dims - 1):
            flat_encoding += encoding[i]
            flat_encoding *= max_stamp
        flat_encoding += encoding[-1]

    # Can be done without (or with less) for loops?
    print 'begin for loop'
    for i in xrange(num_images):
        if (i % 1000 == 0):
            print i, '-th outer loop...'
        for j in xrange(patches_per_image):
            patch1_num = i * patches_per_image + j
            patch1_pos = stamps[patch1_num, :]
            for k in xrange(patches_per_image):
                example_num = i * examples_per_image + j * (patches_per_image -
                                                            1) + k
                if (k > j):
                    example_num -= 1
                if (k != j):
                    patch2_num = i * patches_per_image + k
                    patch2_pos = stamps[patch2_num, :]
                    distance = patch1_pos - patch2_pos
                    distances[example_num] = distance
                    distance_encoding = distance + max_stamp
                    distance_encoding = flatten_encoding(
                        distance_encoding, max_stamp)
                    distances_onehot[example_num, distance_encoding] = 1
                    p1 = design_matrix[patch1_num]
                    p2 = design_matrix[patch2_num]
                    patch_pairs[example_num] = np.hstack((p1, p2))
                    examples[example_num] = np.hstack(
                        (patch_pairs[example_num],
                         distances_onehot[example_num]))
    print 'end for loop'

    trainset.set_design_matrix(examples)

    model = DenoisingAutoencoder(
        corruptor=BinomialCorruptor(corruption_level=0.5),
        nhid=nhid,
        nvis=nvis,
        act_enc='sigmoid',
        act_dec='sigmoid',
        irange=.01)

    algorithm = SGD(
        learning_rate=0.1,
        cost=MeanSquaredReconstructionError(),
        batch_size=100,
        monitoring_batches=10,
        monitoring_dataset=trainset,
        termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED),
        update_callbacks=None)

    extensions = None

    trainer = Train(model=model,
                    algorithm=algorithm,
                    save_path='run.pkl',
                    save_freq=1,
                    extensions=extensions,
                    dataset=trainset)

    trainer.main_loop()
Exemple #14
0
patch_width = 4
# assume 2D for next line
patch_shape = (patch_width, patch_width)
patches_per_image = 3

num_components = num_channels * input_width**input_dim
#num_components =
# keep_var_fraction currently doesn't work properly because of a bug in pca.py
keep_var_fraction = .9

train_size = 4900

nhid = 40

# Not sure why this would need to go here...
curruptor = BinomialCorruptor(corruption_level=0.5)

# Loads the preprocessed dataset, or does the preprocessing and saves the dataset.
# Currently using CIFAR10


def get_processed_dataset():

    train_path = 'pp_cifar10_train.pkl'
    test_path = 'pp_cifar10_test.pkl'

    if os.path.exists(train_path) and \
            os.path.exists(test_path):
        print 'loading preprocessed data'
        trainset = serial.load(train_path)
        testset = serial.load(test_path)
Exemple #15
0
def train_sda(params):

    input_trainset, trainset_yaml_str = load_yaml_file(
        os.path.join(os.path.dirname(__file__),
                     'train_sda_dataset_template.yaml'),
        params=params,
    )

    log.info('... building the model')

    # build layers
    layer_dims = [params.input_length]
    layer_dims.extend(params.hidden_layers_sizes)

    layers = []
    for i in xrange(1, len(layer_dims)):
        layer_params = {
            'name': 'da' + str(i),
            'n_inputs': layer_dims[i - 1],
            'n_outputs': layer_dims[i],
            'corruption_level': params.pretrain.corruption_levels[i - 1],
            'input_range':
            numpy.sqrt(6. / (layer_dims[i - 1] + layer_dims[i])),
            'random_seed': params.random_seed,
        }

        layers.append(
            load_yaml_file(
                os.path.join(os.path.dirname(__file__),
                             'train_sda_layer_template.yaml'),
                params=layer_params,
            )[0])

    # unsupervised pre-training
    log.info('... pre-training the model')
    start_time = time.clock()

    for i in xrange(len(layers)):
        # reset corruption to make sure input is not corrupted
        for layer in layers:
            layer.set_corruption_level(0)

        if i == 0:
            trainset = input_trainset
        elif i == 1:
            trainset = TransformerDataset(raw=input_trainset,
                                          transformer=layers[0])
        else:
            trainset = TransformerDataset(raw=input_trainset,
                                          transformer=StackedBlocks(
                                              layers[0:i]))

        # set corruption for layer to train
        layers[i].set_corruption_level(params.pretrain.corruption_levels[i])

        # FIXME: this is not so nice but we have to do it this way as YAML is not flexible enough
        trainer = get_layer_trainer_sgd_autoencoder(
            layers[i],
            trainset,
            learning_rate=params.pretrain.learning_rate,
            max_epochs=params.pretrain.epochs,
            batch_size=params.pretrain.batch_size,
            name='pre-train' + str(i))

        log.info('unsupervised training layer %d, %s ' %
                 (i, layers[i].__class__))
        trainer.main_loop()

#         theano.printing.pydotprint_variables(
#                                      layer_trainer.algorithm.sgd_update.maker.fgraph.outputs[0],
#                                      outfile='pylearn2-sgd_update.png',
#                                      var_with_name_simple=True);

    end_time = time.clock()
    log.info('pre-training code ran for {0:.2f}m'.format(
        (end_time - start_time) / 60.))

    if params.untie_weights:
        # now untie the decoder weights
        log.info('untying decoder weights')
        for layer in layers:
            layer.untie_weights()

    # construct multi-layer training functions

    # unsupervised training
    log.info('... training the model')

    sdae = None
    for depth in xrange(1, len(layers) + 1):
        first_layer_i = len(layers) - depth
        log.debug('training layers {}..{}'.format(first_layer_i,
                                                  len(layers) - 1))

        group = layers[first_layer_i:len(layers)]
        #         log.debug(group);

        # reset corruption
        for layer in layers:
            layer.set_corruption_level(0)

        if first_layer_i == 0:
            trainset = input_trainset
        elif first_layer_i == 1:
            trainset = TransformerDataset(raw=input_trainset,
                                          transformer=layers[0])
        else:
            trainset = TransformerDataset(raw=input_trainset,
                                          transformer=StackedBlocks(
                                              layers[0:first_layer_i]))

        # set corruption for input layer of stack to train


#         layers[first_layer_i].set_corruption_level(stage2_corruption_levels[first_layer_i]);

        corruptor = LoggingCorruptor(
            BinomialCorruptor(corruption_level=params.pretrain_finetune.
                              corruption_levels[first_layer_i]),
            name='depth {}'.format(depth))
        sdae = StackedDenoisingAutoencoder(group, corruptor)

        trainer = get_layer_trainer_sgd_autoencoder(
            sdae,
            trainset,
            learning_rate=params.pretrain_finetune.learning_rate,
            max_epochs=params.pretrain_finetune.epochs,
            batch_size=params.pretrain_finetune.batch_size,
            name='multi-train' + str(depth))

        log.info('unsupervised multi-layer training %d' % (i))
        trainer.main_loop()

    end_time = time.clock()
    log.info('full training code ran for {0:.2f}m'.format(
        (end_time - start_time) / 60.))

    # save the model
    model_file = os.path.join(params.experiment_root, 'sda', 'sda_all.pkl')
    with log_timing(log, 'saving SDA model to {}'.format(model_file)):
        serial.save(model_file, sdae)

    if params.untie_weights:
        # save individual layers for later (with untied weights)
        for i, layer in enumerate(sdae.autoencoders):
            layer_file = os.path.join(params.experiment_root, 'sda',
                                      'sda_layer{}_untied.pkl'.format(i))
            with log_timing(
                    log,
                    'saving SDA layer {} model to {}'.format(i, layer_file)):
                serial.save(layer_file, layer)

    # save individual layers for later (with tied weights)
    for i, layer in enumerate(sdae.autoencoders):
        if params.untie_weights:
            layer.tie_weights()
        layer_file = os.path.join(params.experiment_root, 'sda',
                                  'sda_layer{}_tied.pkl'.format(i))
        with log_timing(
                log, 'saving SDA layer {} model to {}'.format(i, layer_file)):
            serial.save(layer_file, layer)

    log.info('done')

    return sdae
Exemple #16
0
def main():

    # Only the trainset is processed by this function.
    print 'getting preprocessed data for training model'
    pp_trainset, testset = get_processed_dataset()
    # remember to change here when changing datasets
    print 'loading unprocessed data for input displays'
    trainset = cifar10.CIFAR10(which_set="train")

    dmat = pp_trainset.get_design_matrix()
    nvis = dmat.shape[1]

    model = DenoisingAutoencoder(
        corruptor=BinomialCorruptor(corruption_level=0.3),
        nhid=nhid,
        nvis=nvis,
        act_enc='sigmoid',
        act_dec='sigmoid',
        irange=.01)

    algorithm = SGD(
        learning_rate=learning_rate,
        cost=MeanSquaredReconstructionError(),
        batch_size=100,
        monitoring_batches=10,
        monitoring_dataset=pp_trainset,
        termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED),
        update_callbacks=None)

    extensions = None

    trainer = Train(model=model,
                    algorithm=algorithm,
                    save_path='run.pkl',
                    save_freq=1,
                    extensions=extensions,
                    dataset=pp_trainset)

    trainer.main_loop()

    ####################
    # Plot and Save:

    # choose random patch-pairs to plot
    stamps = pp_trainset.stamps
    num_examples = stamps.shape[0]
    to_plot = np.random.randint(0, num_examples, num2plot)

    # use to_plot indices to extract data
    stamps_data = stamps[to_plot]
    image_numbers = stamps[to_plot, 0].astype(int)
    X = trainset.X
    images_data = trainset.get_topological_view(X[image_numbers])
    p1x = stamps_data[:, 1]
    p1y = stamps_data[:, 2]
    p2x = stamps_data[:, 3]
    p2y = stamps_data[:, 4]

    # For input ppd's, once we've identified the patches, we just outline them and draw an arrow for d
    # This might mess with original trainset (I dunno), in which case, we should make a copy
    add_outlines(images_data, p1x, p1y, patch_width)
    add_outlines(images_data, p2x, p2y, patch_width)

    ##################################################
    # translating outputs back into things we can plot
    dataset = pp_trainset
    Xout = dataset.X.astype('float32')
    max_stamp = input_width - patch_width
    d_size = (2 * max_stamp + 1)**input_dim
    # displacement
    d_enc = Xout[:, -d_size:]
    d_out_flat = np.argmax(d_enc, axis=1)
    d_shape = [2 * max_stamp + 1, 2 * max_stamp + 1]  # assumed 2D
    d_out = flat_to_2D(d_out_flat, d_shape)
    d_out[to_plot, ]
    # patches
    vc = dataset.view_converter
    p_enc = Xout[:, :len(Xout.T) - d_size]
    p_size = p_enc.shape[1] / 2
    p1_enc = p_enc[:, :p_size]
    p2_enc = p_enc[:, p_size:]
    p1_enc = vc.design_mat_to_topo_view(p1_enc)
    p2_enc = vc.design_mat_to_topo_view(p2_enc)
    pp = dataset.preprocessor
    gcn = pp.items[1]
    means = gcn.means
    normalizers = gcn.normalizers
    toshape = (num_examples, )
    for i in range(input_dim):
        toshape += (1, )
    if num_channels != 1:
        toshape += (1, )
    # When the number of patches and patch-pairs differs, this breaks.
    # I need to match up normalizers/means with their corresponding patches
    # undoing the PCA might be breaking too, but without errors...
    normalizers1 = expand_p1(normalizers)
    normalizers2 = expand_p2(normalizers)
    means1 = expand_p1(means)
    means2 = expand_p2(means)

    p1_enc *= normalizers1.reshape(toshape)
    p1_enc += means1.reshape(toshape)
    p2_enc *= normalizers2.reshape(toshape)
    p2_enc += means2.reshape(toshape)
    # Now, we pull off the same examples from the data to compare to dAE inputs in plots
    outputs = copy.deepcopy(images_data)
    insertpatches(outputs, p1_enc[to_plot], p1x, p1y, patch_width)
    insertpatches(outputs, p2_enc[to_plot], p2x, p2y, patch_width)

    plt.figure()

    for i in range(num2plot):
        # Inputs
        plt.subplot(num2plot, 2, 2 * i + 1)
        plt.imshow(images_data[i], cmap=cm.Greys_r)
        print stamps_data[i]
        a = (stamps_data[i, 2] + patch_width / 2,
             stamps_data[i, 1] + patch_width / 2, stamps_data[i, 6],
             stamps_data[i, 5])
        plt.arrow(a[0], a[1], a[2], a[3], head_width=1.0, head_length=0.6)
        # Outputs
        plt.subplot(num2plot, 2, 2 * i + 2)
        plt.imshow(outputs[i], cmap=cm.Greys_r)
        plt.arrow(a[0],
                  a[1],
                  d_out[to_plot[i], 1],
                  d_out[to_plot[i], 0],
                  head_width=1.0,
                  head_length=0.6)

    plt.show()

    savestr = 'cifar_ppd.png'
    plt.savefig(savestr)
def train_SdA(config, dataset):
    ## load config
    hidden_layers_sizes = config.get('hidden_layers_sizes', [10, 10])
    corruption_levels = config.get('corruption_levels', [0.1, 0.2])
    stage2_corruption_levels = config.get('stage2_corruption_levels',
                                          [0.1, 0.1])

    pretrain_epochs = config.get('pretrain_epochs', 10)
    pretrain_lr = config.get('pretrain_learning_rate', 0.001)

    finetune_epochs = config.get('finetune_epochs', 10)
    finetune_lr = config.get('finetune_learning_rate', 0.01)

    batch_size = config.get('batch_size', 10)
    monitoring_batches = config.get('monitoring_batches', 5)

    output_path = config.get('output_path', './')

    input_trainset = dataset
    design_matrix = input_trainset.get_design_matrix()
    #     print design_matrix.shape;
    n_input = design_matrix.shape[1]
    log.info('done')

    log.debug('input dimensions : {0}'.format(n_input))
    log.debug('training examples: {0}'.format(design_matrix.shape[0]))

    # numpy random generator
    #     numpy_rng = numpy.random.RandomState(89677)

    log.info('... building the model')

    # build layers
    layer_dims = [n_input]
    layer_dims.extend(hidden_layers_sizes)

    layers = []
    for i in xrange(1, len(layer_dims)):
        structure = [layer_dims[i - 1], layer_dims[i]]
        layers.append(
            create_denoising_autoencoder(structure,
                                         corruption=corruption_levels[i - 1]))

    # unsupervised pre-training
    log.info('... pre-training the model')
    start_time = time.clock()

    for i in xrange(len(layers)):
        # reset corruption to make sure input is not corrupted
        for layer in layers:
            layer.set_corruption_level(0)

        if i == 0:
            trainset = input_trainset
        elif i == 1:
            trainset = TransformerDataset(raw=input_trainset,
                                          transformer=layers[0])
        else:
            trainset = TransformerDataset(raw=input_trainset,
                                          transformer=StackedBlocks(
                                              layers[0:i]))

        # set corruption for layer to train
        layers[i].set_corruption_level(corruption_levels[i])

        trainer = get_layer_trainer_sgd_autoencoder(
            layers[i],
            trainset,
            learning_rate=pretrain_lr,
            max_epochs=pretrain_epochs,
            batch_size=batch_size,
            monitoring_batches=monitoring_batches,
            name='pre-train' + str(i))

        log.info('unsupervised training layer %d, %s ' %
                 (i, layers[i].__class__))
        trainer.main_loop()

#         theano.printing.pydotprint_variables(
#                                      layer_trainer.algorithm.sgd_update.maker.fgraph.outputs[0],
#                                      outfile='pylearn2-sgd_update.png',
#                                      var_with_name_simple=True);

    end_time = time.clock()
    log.info('pre-training code ran for {0:.2f}m'.format(
        (end_time - start_time) / 60.))

    # now untie the decoder weights
    log.info('untying decoder weights')
    for layer in layers:
        layer.untie_weights()

    # construct multi-layer training fuctions

    # unsupervised training
    log.info('... training the model')

    sdae = None
    for depth in xrange(1, len(layers) + 1):
        first_layer_i = len(layers) - depth
        log.debug('training layers {}..{}'.format(first_layer_i,
                                                  len(layers) - 1))

        group = layers[first_layer_i:len(layers)]
        #         log.debug(group);

        # reset corruption
        for layer in layers:
            layer.set_corruption_level(0)

        if first_layer_i == 0:
            trainset = input_trainset
        elif first_layer_i == 1:
            trainset = TransformerDataset(raw=input_trainset,
                                          transformer=layers[0])
        else:
            trainset = TransformerDataset(raw=input_trainset,
                                          transformer=StackedBlocks(
                                              layers[0:first_layer_i]))

        # set corruption for input layer of stack to train
#         layers[first_layer_i].set_corruption_level(stage2_corruption_levels[first_layer_i]);

        corruptor = LoggingCorruptor(BinomialCorruptor(
            corruption_level=stage2_corruption_levels[first_layer_i]),
                                     name='depth {}'.format(depth))
        sdae = StackedDenoisingAutoencoder(group, corruptor)

        trainer = get_layer_trainer_sgd_autoencoder(
            sdae,
            trainset,
            learning_rate=finetune_lr,
            max_epochs=finetune_epochs,
            batch_size=batch_size,
            monitoring_batches=monitoring_batches,
            name='multi-train' + str(depth))

        log.info('unsupervised multi-layer training %d' % (i))
        trainer.main_loop()

    end_time = time.clock()
    log.info('full training code ran for {0:.2f}m'.format(
        (end_time - start_time) / 60.))

    # save the model
    model_file = os.path.join(output_path, 'sdae-model.pkl')
    with log_timing(log, 'saving SDA model to {}'.format(model_file)):
        serial.save(model_file, sdae)

    # TODO: pylearn2.train_extensions.best_params.KeepBestParams(model, cost, monitoring_dataset, batch_size)
    # pylearn2.train_extensions.best_params.MonitorBasedSaveBest

    log.info('done')

    return sdae
Exemple #18
0
def main_train(work_dir="../results/avicenna/",
               corruption_level=0.3,
               nvis=75,
               nhid=600,
               tied_weights=True,
               act_enc="sigmoid",
               act_dec=None,
               max_epochs=2,
               learning_rate=0.001,
               batch_size=20,
               monitoring_batches=5,
               save_freq=1,
               n_components_trans_pca=7):

    conf = {
        'corruption_level': corruption_level,
        'nvis': nvis,
        'nhid': nhid,
        'tied_weights': tied_weights,
        'act_enc': act_enc,
        'act_dec': act_dec,
        'max_epochs': max_epochs,
        'learning_rate': learning_rate,
        'batch_size': batch_size,
        'monitoring_batches': monitoring_batches,
        'save_freq': save_freq,
        'n_components_trans_pca': n_components_trans_pca
    }

    start = time.clock()

    ###############   TRAIN THE DAE
    train_file = work_dir + "train_pca" + str(conf['nvis']) + ".npy"
    save_path = work_dir + "train_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + "_model.pkl"

    trainset = NpyDataset(file=train_file)
    trainset.yaml_src = 'script'
    corruptor = BinomialCorruptor(corruption_level=conf['corruption_level'])
    dae = DenoisingAutoencoder(nvis=conf['nvis'],
                               nhid=conf['nhid'],
                               tied_weights=conf['tied_weights'],
                               corruptor=corruptor,
                               act_enc=conf['act_enc'],
                               act_dec=conf['act_dec'])
    cost = MeanSquaredReconstructionError()
    termination_criterion = EpochCounter(max_epochs=conf['max_epochs'])
    algorithm = UnsupervisedExhaustiveSGD(
        learning_rate=conf['learning_rate'],
        batch_size=conf['batch_size'],
        monitoring_batches=conf['monitoring_batches'],
        monitoring_dataset=trainset,
        cost=cost,
        termination_criterion=termination_criterion)

    train_obj = Train(dataset=trainset,
                      model=dae,
                      algorithm=algorithm,
                      save_freq=conf['save_freq'],
                      save_path=save_path)
    train_obj.main_loop()

    ###############   APPLY THE MODEL ON THE TRAIN DATASET
    print("Applying the model on the train dataset...")
    model = load(save_path)
    save_train_path = work_dir + "train_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model,
                           dataset=trainset,
                           path=save_train_path)
    dump_obj.main_loop()

    ###############   APPLY THE MODEL ON THE VALID DATASET
    print("Applying the model on the valid dataset...")
    valid_file = work_dir + "valid_pca" + str(conf['nvis']) + ".npy"

    validset = NpyDataset(file=valid_file)
    validset.yaml_src = 'script'
    save_valid_path = work_dir + "valid_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model,
                           dataset=validset,
                           path=save_valid_path)
    dump_obj.main_loop()

    ###############   APPLY THE MODEL ON THE TEST DATASET
    print("Applying the model on the test dataset...")
    test_file = work_dir + "test_pca" + str(conf['nvis']) + ".npy"

    testset = NpyDataset(file=test_file)
    testset.yaml_src = 'script'
    save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + ".npy"
    dump_obj = FeatureDump(encoder=model, dataset=testset, path=save_test_path)
    dump_obj.main_loop()

    ###############   COMPUTE THE ALC SCORE ON VALIDATION SET
    valid_data = ift6266h12.load_npy(save_valid_path)
    label_data = ift6266h12.load_npy(
        '/data/lisa/data/UTLC/numpy_data/avicenna_valid_y.npy')
    alc_1 = score(valid_data, label_data)

    ###############   APPLY THE TRANSDUCTIVE PCA
    test_data = ift6266h12.load_npy(save_test_path)
    trans_pca = PCA(n_components=conf['n_components_trans_pca'])
    final_valid = trans_pca.fit_transform(valid_data)
    final_test = trans_pca.fit_transform(test_data)

    save_valid_path = work_dir + "valid_pca" + str(
        conf['nvis']) + "_dae" + str(conf['nhid']) + "_tpca" + str(
            conf['n_components_trans_pca']) + ".npy"
    save_test_path = work_dir + "test_pca" + str(conf['nvis']) + "_dae" + str(
        conf['nhid']) + "_tpca" + str(conf['n_components_trans_pca']) + ".npy"

    np.save(save_valid_path, final_valid)
    np.save(save_test_path, final_test)

    ###############   COMPUTE THE NEW ALC SCORE ON VALIDATION SET
    alc_2 = score(final_valid, label_data)

    ###############   OUTPUT AND RETURN THE RESULTS
    timeSpent = ((time.clock() - start) / 60.)
    print 'FINAL RESULTS (PCA-' + str(conf['nvis']) + ' DAE-' + str(conf['nhid']) + ' TransPCA-' + str(conf['n_components_trans_pca']) + ') ALC after DAE: ', alc_1, ' FINAL ALC: ', alc_2, \
            ' Computed in %5.2f min' % (timeSpent)

    return timeSpent, alc_1, alc_2