Example #1
0
def test_pylearn2_trainin():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()],
              dims=[784, 100, 784],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    block_cost = BlocksCost(cost)
    block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features'))

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01,
              cost=block_cost,
              batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
Example #2
0
def finish_one_layer(X_img_train, X_txt_train, y_train, X_img_test, X_txt_test, y_test, h_units, epochs, lr=0.1, model_type='FullModal', alpha=0.8, layer_num='1', prefix='', suffix='', save_path=''):
    """预备+训练+测试完整的一层"""
    #1.构造数据集
    dsit_train, dsit_test = make_dataset(X_img_train=X_img_train, X_txt_train=X_txt_train, y_train=y_train, 
                                                        X_img_test=X_img_test, X_txt_test=X_txt_test, y_test=y_test)

    #2.训练单层模型
    monitoring_dataset = {'train': dsit_train, 'test': dsit_test}
	
    ae_model = MyMultimodalAutoEncoder(model_type=model_type, alpha=alpha, n_vis_img=X_img_train.shape[1], n_vis_txt=X_txt_train.shape[1], n_hid_img=h_units, n_hid_txt=h_units, dec_f_img=True, dec_f_txt=True)
    alg = SGD(learning_rate=lr, cost=None, batch_size=20, init_momentum=None, monitoring_dataset=monitoring_dataset, termination_criterion=EpochCounter(max_epochs=epochs))
    
    train = Train(dataset=dsit_train, model=ae_model, algorithm=alg, save_path='multi_ae_save_layer' + layer_num + '.pkl', save_freq=10)
    
    t0 = time.clock()
    train.main_loop()
    print 'training time for layer%s: %f' % (layer_num, time.clock() - t0)
    
    #3.计算经过训练后模型传播的设计矩阵
    X_img_propup_train, X_txt_propup_train, X_img_propup_test, X_txt_propup_test, X_propup_train, X_propup_test = propup_design_matrix(X_train=dsit_train.X, X_test=dsit_test.X, ae_model=ae_model)
    
    #4.测试训练后的模型分类性能
    print '!!!evaluate model on dataset+++++++++++++++++++++++++++++++++++++++++++++++++++++++'
    model_evaluate(X_img_train=X_img_propup_train, X_txt_train=X_txt_propup_train, y_train=y_train, X_img_test= X_img_propup_test, X_txt_test=X_txt_propup_test, y_test=y_test, layer_num=layer_num, prefix=prefix, suffix=suffix, save_path=save_path)
    
    return X_img_propup_train, X_txt_propup_train, X_img_propup_test, X_txt_propup_test
def test_training_a_model():
    """
    tests wether SparseDataset can be trained
    with a dummy model.
    """

    dim = 3
    m = 10
    rng = np.random.RandomState([22, 4, 2014])

    X = rng.randn(m, dim)
    ds = csr_matrix(X)
    dataset = SparseDataset(from_scipy_sparse_dataset=ds)

    model = SoftmaxModel(dim)
    learning_rate = 1e-1
    batch_size = 5

    epoch_num = 2
    termination_criterion = EpochCounter(epoch_num)

    cost = DummyCost()

    algorithm = SGD(learning_rate, cost, batch_size=batch_size,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    train = Train(dataset, model, algorithm, save_path=None,
                  save_freq=0, extensions=None)

    train.main_loop()
Example #4
0
def test_multiple_inputs():
    """
    Create a VectorSpacesDataset with two inputs (features0 and features1)
    and train an MLP which takes both inputs for 1 epoch.
    """
    mlp = MLP(layers=[
        FlattenerLayer(
            CompositeLayer('composite',
                           [Linear(10, 'h0', 0.1),
                            Linear(10, 'h1', 0.1)], {
                                0: [1],
                                1: [0]
                            })),
        Softmax(5, 'softmax', 0.1)
    ],
              input_space=CompositeSpace([VectorSpace(15),
                                          VectorSpace(20)]),
              input_source=('features0', 'features1'))
    dataset = VectorSpacesDataset(
        (np.random.rand(20, 20).astype(theano.config.floatX),
         np.random.rand(20, 15).astype(theano.config.floatX),
         np.random.rand(20, 5).astype(theano.config.floatX)),
        (CompositeSpace(
            [VectorSpace(20), VectorSpace(15),
             VectorSpace(5)]), ('features1', 'features0', 'targets')))
    train = Train(dataset, mlp, SGD(0.1, batch_size=5))
    train.algorithm.termination_criterion = EpochCounter(1)
    train.main_loop()
Example #5
0
def test_train_ae():
    GC = GaussianCorruptor

    gsn = GSN.new(
        layer_sizes=[ds.X.shape[1], 1000],
        activation_funcs=["sigmoid", "tanh"],
        pre_corruptors=[None, GC(1.0)],
        post_corruptors=[SaltPepperCorruptor(0.5), GC(1.0)],
        layer_samplers=[BinomialSampler(), None],
        tied=False
    )

    # average MBCE over example rather than sum it
    _mbce = MeanBinaryCrossEntropy()
    reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1]

    c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK)

    alg = SGD(
        LEARNING_RATE,
        init_momentum=MOMENTUM,
        cost=c,
        termination_criterion=EpochCounter(MAX_EPOCHS),
        batches_per_iter=BATCHES_PER_EPOCH,
        batch_size=BATCH_SIZE,
        monitoring_dataset=ds,
        monitoring_batches=10
   )

    trainer = Train(ds, gsn, algorithm=alg, save_path="gsn_ae_example.pkl",
                    save_freq=5)
    trainer.main_loop()
    print "done training"
Example #6
0
def train():
    LEARNING_RATE = 1e-4
    MOMENTUM = 0.25

    MAX_EPOCHS = 500
    BATCHES_PER_EPOCH = 100
    BATCH_SIZE = 1000

    dataset = FunnelDistribution()
    cost = FunnelGSNCost([(0, 1.0, MSR())], walkback=1)

    gc = GaussianCorruptor(0.75)
    dc = DropoutCorruptor(.5)
    gsn = GSN.new([10, 200, 10],
                  [None, "tanh", "tanh"], # activation
                  [None] * 3, # pre corruption
                  [None] * 3, # post corruption
                  [None] * 3, # layer samplers
                  tied=False)
    gsn._bias_switch = False

    alg = SGD(LEARNING_RATE, init_momentum=MOMENTUM, cost=cost,
              termination_criterion=EpochCounter(MAX_EPOCHS),
              batches_per_iter=BATCHES_PER_EPOCH, batch_size=BATCH_SIZE,
              monitoring_batches=100,
              monitoring_dataset=dataset)

    trainer = Train(dataset, gsn, algorithm=alg, save_path="funnel_gsn.pkl",
                    extensions=[MonitorBasedLRAdjuster()],
                    save_freq=50)

    trainer.main_loop()
    print "done training"
Example #7
0
def test_flattener_layer_state_separation_for_softmax():
    """
    Creates a CompositeLayer wrapping two Softmax layers
    and ensures that state gets correctly picked apart.
    """
    mlp = MLP(
        layers=[
            FlattenerLayer(
                CompositeLayer(
                    'composite',
                    [Softmax(5, 'sf1', 0.1),
                     Softmax(5, 'sf2', 0.1)]
                )
            )
        ],
        nvis=2
        )

    dataset = DenseDesignMatrix(
        X=np.random.rand(20, 2).astype(theano.config.floatX),
        y=np.random.rand(20, 10).astype(theano.config.floatX))

    train = Train(dataset, mlp,
                  SGD(0.1, batch_size=5, monitoring_dataset=dataset))
    train.algorithm.termination_criterion = EpochCounter(1)
    train.main_loop()
Example #8
0
def test_multiple_inputs():
    """
    Create a VectorSpacesDataset with two inputs (features0 and features1)
    and train an MLP which takes both inputs for 1 epoch.
    """
    mlp = MLP(
        layers=[
            FlattenerLayer(
                CompositeLayer(
                    'composite',
                    [Linear(10, 'h0', 0.1),
                     Linear(10, 'h1', 0.1)],
                    {
                        0: [1],
                        1: [0]
                    }
                )
            ),
            Softmax(5, 'softmax', 0.1)
        ],
        input_space=CompositeSpace([VectorSpace(15), VectorSpace(20)]),
        input_source=('features0', 'features1')
    )
    dataset = VectorSpacesDataset(
        (np.random.rand(20, 20).astype(theano.config.floatX),
         np.random.rand(20, 15).astype(theano.config.floatX),
         np.random.rand(20, 5).astype(theano.config.floatX)),
        (CompositeSpace([
            VectorSpace(20),
            VectorSpace(15),
            VectorSpace(5)]),
         ('features1', 'features0', 'targets')))
    train = Train(dataset, mlp, SGD(0.1, batch_size=5))
    train.algorithm.termination_criterion = EpochCounter(1)
    train.main_loop()
Example #9
0
def test_execution_order():

    # ensure save is called directly after monitoring by checking
    # parameter values in `on_monitor` and `on_save`.

    model = MLP(layers=[Softmax(layer_name='y', n_classes=2, irange=0.)],
                nvis=3)

    dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)),
                                y=np.random.normal(size=(6, 2)))

    epoch_counter = EpochCounter(max_epochs=1)

    algorithm = SGD(batch_size=2,
                    learning_rate=0.1,
                    termination_criterion=epoch_counter)

    extension = ParamMonitor()

    train = Train(dataset=dataset,
                  model=model,
                  algorithm=algorithm,
                  extensions=[extension],
                  save_freq=1,
                  save_path="save.pkl")

    # mock save
    train.save = MethodType(only_run_extensions, train)

    train.main_loop()
Example #10
0
def test_train_ae():
    GC = GaussianCorruptor

    gsn = GSN.new(layer_sizes=[ds.X.shape[1], 1000],
                  activation_funcs=["sigmoid", "tanh"],
                  pre_corruptors=[None, GC(1.0)],
                  post_corruptors=[SaltPepperCorruptor(0.5),
                                   GC(1.0)],
                  layer_samplers=[BinomialSampler(), None],
                  tied=False)

    # average MBCE over example rather than sum it
    _mbce = MeanBinaryCrossEntropy()
    reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1]

    c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK)

    alg = SGD(LEARNING_RATE,
              init_momentum=MOMENTUM,
              cost=c,
              termination_criterion=EpochCounter(MAX_EPOCHS),
              batches_per_iter=BATCHES_PER_EPOCH,
              batch_size=BATCH_SIZE,
              monitoring_dataset=ds,
              monitoring_batches=10)

    trainer = Train(ds,
                    gsn,
                    algorithm=alg,
                    save_path="gsn_ae_example.pkl",
                    save_freq=5)
    trainer.main_loop()
    print "done training"
def train_layer5(supervised=True):
    global unsup_dataset, sup_dataset
    
    # Process unsupervised layer 5
    unsup_dataset = TransformerDataset(raw=unsup_dataset, transformer=serial.load(layer4_unsup_model))
    model = DenoisingAutoencoder(BinomialCorruptor(corruption_level=0.002), nvis=nhid4, nhid=nhid5, act_enc='tanh', act_dec=None,  irange=0.5)
    training_alg = SGD(cost=MeanSquaredReconstructionError(), learning_rate=1e-4, batch_size= batch_size, monitoring_dataset=unsup_dataset, termination_criterion=EpochCounter(max_epochs=max_epochs))
    extensions = [MonitorBasedLRAdjuster()]
    experiment = Train(dataset=unsup_dataset, model=model, algorithm=training_alg, save_path=layer5_unsup_model, save_freq=50, allow_overwrite=True, extensions=extensions)
    experiment.main_loop()
    
    if supervised:
        # Process supervised layer 5, this will be the final classifier
        layers = [PretrainedLayer(layer_name='h1', layer_content=serial.load(layer1_unsup_model), freeze_params=False),
                  PretrainedLayer(layer_name='h2', layer_content=serial.load(layer2_unsup_model), freeze_params=False),
                  PretrainedLayer(layer_name='h3', layer_content=serial.load(layer3_unsup_model), freeze_params=False),
                  PretrainedLayer(layer_name='h4', layer_content=serial.load(layer4_unsup_model), freeze_params=False), 
                  PretrainedLayer(layer_name='h5', layer_content=serial.load(layer5_unsup_model), freeze_params=False),
                  Softmax(n_classes=class_number, layer_name='y', irange=0.5)]
        model = MLP(layers=layers, batch_size=sup_dataset.y.shape[0], nvis=nvis, layer_name=None)
        training_alg = SGD(learning_rate=1e-3, monitoring_dataset=sup_dataset, termination_criterion=EpochCounter(max_epochs=10000))
        experiment = Train(dataset=sup_dataset, model=model, algorithm=training_alg, save_path=mlp_model, save_freq=50, allow_overwrite=True, extensions=extensions)
        experiment.main_loop()
        serial.save(layer1_unsup_model, model.layers[0].layer_content)
        serial.save(layer2_unsup_model, model.layers[1].layer_content)
        serial.save(layer3_unsup_model, model.layers[2].layer_content)
    serial.save(layer4_unsup_model, model.layers[3].layer_content)
def finish_one_layer(X_train, y_train, X_test, y_test, img_units, txt_units, h_units, epochs, lr=0.1, model_type='FullModal', alpha=0.5, beta=0.5, layer_num='1', prefix='', suffix='', save_path=''):
    """
    预备+训练+测试完整的一层
    暂时假定单模态是图像,将图像平均分为两半
    """
    #0.参数检查
    print 'img_units=', img_units
    print 'txt_units=', txt_units
    print 'X_train.shape[1]=', X_train.shape[1]
    assert img_units + txt_units == X_train.shape[1]
    assert img_units + txt_units == X_test.shape[1]
    #1.构造数据集
    dsit_train, dsit_test = make_dataset_single_modal(X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)

    #2.训练单层模型
    monitoring_dataset = {'train': dsit_train, 'test': dsit_test}
    print 'in finish_one_layer, alpha=%f, beta=%f' % (alpha, beta)	
    ae_model = AdjustableMultimodalAutoEncoder(model_type=model_type, alpha=alpha, beta=beta, n_vis_img=img_units, n_vis_txt=txt_units, n_hid_img=h_units, n_hid_txt=h_units, dec_f_img=True, dec_f_txt=True)
    alg = SGD(learning_rate=lr, cost=None, batch_size=20, init_momentum=None, monitoring_dataset=monitoring_dataset, termination_criterion=EpochCounter(max_epochs=epochs)) #cost=None,目的是使用模型自带的get_default_cost()的返回值提供的代价
    
    train = Train(dataset=dsit_train, model=ae_model, algorithm=alg, save_path='multi_ae_save_layer' + layer_num + '.pkl', save_freq=10)
    
    t0 = time.clock()
    train.main_loop()
    print 'training time for layer%s: %f' % (layer_num, time.clock() - t0)
    
    #3.计算经过训练后模型传播的设计矩阵
    X_img_propup_train, X_txt_propup_train, X_img_propup_test, X_txt_propup_test, X_propup_train, X_propup_test = propup_design_matrix(X_train=dsit_train.X, X_test=dsit_test.X, ae_model=ae_model)
    
    #4.测试训练后的模型分类性能
    print '!!!evaluate model on dataset+++++++++++++++++++++++++++++++++++++++++++++++++++++++'
    model_evaluate(X_img_train=X_img_propup_train, X_txt_train=X_txt_propup_train, y_train=y_train, X_img_test= X_img_propup_test, X_txt_test=X_txt_propup_test, y_test=y_test, layer_num=layer_num, prefix=prefix, suffix=suffix, save_path=save_path)
    
    return X_propup_train, X_propup_test
def test_train_ae():
    ds = MNIST(which_set='train',one_hot=True,all_labelled=ALL_LABELLED,supervised=SUPERVISED)

    gsn = GSN.new(
        layer_sizes=[ds.X.shape[1], HIDDEN_SIZE,ds.X.shape[1]],
        activation_funcs=["sigmoid", "tanh", rescaled_softmax],
        pre_corruptors=[GaussianCorruptor(GAUSSIAN_NOISE)] * 3,
        post_corruptors=[SaltPepperCorruptor(SALT_PEPPER_NOISE), None,SmoothOneHotCorruptor(GAUSSIAN_NOISE)],
        layer_samplers=[BinomialSampler(), None, MultinomialSampler()],
        tied=False
    )

    _mbce = MeanBinaryCrossEntropy()
    reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1]

    c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK)

    alg = SGD(
        LEARNING_RATE,
        init_momentum=MOMENTUM,
        cost=c,
        termination_criterion=EpochCounter(MAX_EPOCHS),
        batches_per_iter=BATCHES_PER_EPOCH,
        batch_size=BATCH_SIZE,
        monitoring_dataset=ds,
        monitoring_batches=MONITORING_BATCHES
   )

    trainer = Train(ds, gsn, algorithm=alg, save_path="./results/gsn_ae_trained.pkl",
                    save_freq=5, extensions=[MonitorBasedLRAdjuster()])
    trainer.main_loop()
    print "done training"
Example #14
0
def test_sgd_sup():

    # tests that we can run the sgd algorithm
    # on a supervised cost.
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    dim = 3
    m = 10

    rng = np.random.RandomState([25, 9, 2012])

    X = rng.randn(m, dim)

    idx = rng.randint(0, dim, (m, ))
    Y = np.zeros((m, dim))
    for i in xrange(m):
        Y[i, idx[i]] = 1

    dataset = DenseDesignMatrix(X=X, y=Y)

    m = 15
    X = rng.randn(m, dim)

    idx = rng.randint(0, dim, (m,))
    Y = np.zeros((m, dim))
    for i in xrange(m):
        Y[i, idx[i]] = 1

    # Including a monitoring dataset lets us test that
    # the monitor works with supervised data
    monitoring_dataset = DenseDesignMatrix(X=X, y=Y)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    cost = SupervisedDummyCost()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = SGD(learning_rate, cost,
                    batch_size=batch_size,
                    monitoring_batches=3,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Example #15
0
class RBMTraining:
	def __init__(self, data_path="./datasets/", save_path="training.pkl", simulation_data = None, identifier = 0):
		self.id = identifier
		self.data_path = data_path
		self.save_path = save_path
		if simulation_data != None:
			self.sim_data = simulation_data
			self.save_data_loaded()
		else:
			self.sim_data = SimulationData(data_path)
			self.load_data()
		
	def load_data(self):
		self.sim_data.load_data()
		self.sim_data.preprocessor() 

		tmp = self.sim_data.split_train_test()
		self.datasets = {'train' : tmp[0], 'test' : tmp[1]}

		self.num_simulations = self.sim_data.num_simulations
		self.input_values = self.sim_data.input_values
		self.output_values = self.sim_data.output_values

	def set_structure(self, num_layers = 4, shape = 'linear'):
		self.vis = self.input_values
		self.hid = self.output_values
		return [self.vis, self.hid]
		
		   
	def get_model(self):
		self.model = RBM(nvis=self.vis, nhid=self.hid, irange=.05)
		return self.model
	   
	def set_training_criteria(self, 
							learning_rate=0.05,
							batch_size=10, 
							max_epochs=10):
		
		self.training_alg = DefaultTrainingAlgorithm(batch_size = batch_size, 
													monitoring_dataset = self.datasets, 
													termination_criterion = EpochCounter(max_epochs))
	
	def set_extensions(self, extensions=None):
		self.extensions = None #[MonitorBasedSaveBest(channel_name='objective',
												#save_path = './training/training_monitor_best.pkl')]
		
	def set_attributes(self, attributes):
		self.attributes = attributes

	def define_training_experiment(self, save_freq = 10):
		self.experiment = Train(dataset=self.datasets['train'], 
								model=self.model, 
								algorithm=self.training_alg, 
								save_path=self.save_path , 
								save_freq=save_freq, 
								allow_overwrite=True, 
								extensions=self.extensions)

	def train_experiment(self):
		self.experiment.main_loop()
Example #16
0
def test_execution_order():

    # ensure save is called directly after monitoring by checking 
    # parameter values in `on_monitor` and `on_save`.

    model = MLP(layers=[Softmax(layer_name='y',
                                n_classes=2,
                                irange=0.)],
                nvis=3)

    dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)),
                                y=np.random.normal(size=(6, 2)))

    epoch_counter = EpochCounter(max_epochs=1)

    algorithm = SGD(batch_size=2, learning_rate=0.1,
                    termination_criterion=epoch_counter)

    extension = ParamMonitor()

    train = Train(dataset=dataset,
                  model=model,
                  algorithm=algorithm,
                  extensions=[extension],
                  save_freq=1,
                  save_path="save.pkl")

    # mock save
    train.save = MethodType(only_run_extensions, train)

    train.main_loop()
Example #17
0
def train_model():
    global ninput, noutput
    simdata = SimulationData(
        sim_path="../../javaDataCenter/generarDadesV1/CA_SDN_topo1/")
    simdata.load_data()
    simdata.preprocessor()
    dataset = simdata.get_matrix()

    structure = get_structure()
    layers = []
    for pair in structure:
        layers.append(get_autoencoder(pair))

    model = DeepComposedAutoencoder(layers)
    training_alg = SGD(learning_rate=1e-3,
                       cost=MeanSquaredReconstructionError(),
                       batch_size=1296,
                       monitoring_dataset=dataset,
                       termination_criterion=EpochCounter(max_epochs=50))
    extensions = [MonitorBasedLRAdjuster()]
    experiment = Train(dataset=dataset,
                       model=model,
                       algorithm=training_alg,
                       save_path='training2.pkl',
                       save_freq=10,
                       allow_overwrite=True,
                       extensions=extensions)
    experiment.main_loop()
Example #18
0
def test_empty_monitoring_datasets():
    """
    Test that handling of monitoring datasets dictionnary
    does not fail when it is empty.
    """

    learning_rate = 1e-3
    batch_size = 5

    dim = 3

    rng = np.random.RandomState([25, 9, 2012])

    train_dataset = DenseDesignMatrix(X=rng.randn(10, dim))

    model = SoftmaxModel(dim)

    cost = DummyCost()

    algorithm = SGD(learning_rate, cost,
                    batch_size=batch_size,
                    monitoring_dataset={},
                    termination_criterion=EpochCounter(2))

    train = Train(train_dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Example #19
0
def test_serialization_guard():

    # tests that Train refuses to serialize the dataset

    dim = 2
    m = 11

    rng = np.random.RandomState([28,9,2012])
    X = rng.randn(m, dim)
    dataset = DenseDesignMatrix(X=X)

    model = DummyModel(dim)
    # make the dataset part of the model, so it will get
    # serialized
    model.dataset = dataset

    Monitor.get_monitor(model)

    algorithm = DummyAlgorithm()

    train = Train(dataset, model, algorithm, save_path='_tmp_unit_test.pkl',
                 save_freq=1, extensions=None)

    try:
        train.main_loop()
    except RuntimeError:
        return
    assert False # train did not complain, this is a bug
def my_train():
    trainset = CIN_FEATURE2(which_set='train')
    validset = CIN_FEATURE2(which_set='valid')
    layers = []
    layers1 = []
    h1 = Linear(layer_name='h1', dim=850, irange=0.05)
    h2 = Linear(layer_name='h2', dim=556, irange=0.05)
    layers1.append(h1)
    layers1.append(h2)
    l1 = CompositeLayerWithSource(layer_name='c', layers=layers1)
    l2 = Linear(layer_name='o', dim=2, irange=0.05)
    layers.append(l1)
    layers.append(l2)

    input_space = CompositeSpace(components=[VectorSpace(dim=850), VectorSpace(dim=556)])
    input_source = ['feature850', 'feature556']
    model = MLPWithSource(batch_size=1140, layers=layers,
                          input_space=input_space, input_source=input_source)

    algorithm = BGD(conjugate=1,
                    # batch_size=1140,
                    line_search_mode='exhaustive',
                    cost=Default(),
                    termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS))

    train = Train(dataset=trainset, model=model, algorithm=algorithm)
    train.main_loop()
Example #21
0
def test_serialization_guard():

    # tests that Train refuses to serialize the dataset

    dim = 2
    m = 11

    rng = np.random.RandomState([28, 9, 2012])
    X = rng.randn(m, dim)
    dataset = DenseDesignMatrix(X=X)

    model = DummyModel(dim)
    # make the dataset part of the model, so it will get
    # serialized
    model.dataset = dataset

    Monitor.get_monitor(model)

    algorithm = DummyAlgorithm()

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path='_tmp_unit_test.pkl',
                  save_freq=1,
                  callbacks=None)

    try:
        train.main_loop()
    except RuntimeError:
        return
    assert False  # train did not complain, this is a bug
Example #22
0
    def train_with_monitoring_datasets(train_dataset,
                                       monitoring_datasets,
                                       model_force_batch_size,
                                       train_iteration_mode,
                                       monitor_iteration_mode):

        model = SoftmaxModel(dim)
        if model_force_batch_size:
            model.force_batch_size = model_force_batch_size

        cost = DummyCost()

        algorithm = SGD(learning_rate, cost,
                        batch_size=batch_size,
                        train_iteration_mode=train_iteration_mode,
                        monitor_iteration_mode=monitor_iteration_mode,
                        monitoring_dataset=monitoring_datasets,
                        termination_criterion=EpochCounter(2))

        train = Train(train_dataset,
                      model,
                      algorithm,
                      save_path=None,
                      save_freq=0,
                      extensions=None)

        train.main_loop()
Example #23
0
def cnn_run_dropout_maxout(data_path, num_rows, num_cols, num_channels,
                           input_path, pred_path):
    t = time.time()
    sub_window = gen_center_sub_window(76, num_cols)
    trn = SarDataset(ds[0][0], ds[0][1], sub_window)
    vld = SarDataset(ds[1][0], ds[1][1], sub_window)
    tst = SarDataset(ds[2][0], ds[2][1], sub_window)
    print 'Take {}s to read data'.format(time.time() - t)
    t = time.time()
    batch_size = 100
    h1 = maxout.Maxout(layer_name='h2', num_units=1, num_pieces=100, irange=.1)
    hidden_layer = mlp.ConvRectifiedLinear(layer_name='h2',
                                           output_channels=8,
                                           irange=0.05,
                                           kernel_shape=[5, 5],
                                           pool_shape=[2, 2],
                                           pool_stride=[2, 2],
                                           max_kernel_norm=1.9365)
    hidden_layer2 = mlp.ConvRectifiedLinear(layer_name='h3',
                                            output_channels=8,
                                            irange=0.05,
                                            kernel_shape=[5, 5],
                                            pool_shape=[2, 2],
                                            pool_stride=[2, 2],
                                            max_kernel_norm=1.9365)
    #output_layer = mlp.Softplus(dim=1,layer_name='output',irange=0.1)
    output_layer = mlp.Linear(dim=1, layer_name='output', irange=0.05)
    trainer = sgd.SGD(learning_rate=0.001,
                      batch_size=100,
                      termination_criterion=EpochCounter(2000),
                      cost=dropout.Dropout(),
                      train_iteration_mode='even_shuffled_sequential',
                      monitor_iteration_mode='even_shuffled_sequential',
                      monitoring_dataset={
                          'test': tst,
                          'valid': vld,
                          'train': trn
                      })
    layers = [hidden_layer, hidden_layer2, output_layer]
    input_space = space.Conv2DSpace(shape=[num_rows, num_cols],
                                    num_channels=num_channels)

    ann = mlp.MLP(layers, input_space=input_space, batch_size=batch_size)
    watcher = best_params.MonitorBasedSaveBest(channel_name='valid_objective',
                                               save_path='sar_cnn_mlp.pkl')
    experiment = Train(dataset=trn,
                       model=ann,
                       algorithm=trainer,
                       extensions=[watcher])
    print 'Take {}s to compile code'.format(time.time() - t)
    t = time.time()
    experiment.main_loop()
    print 'Training time: {}s'.format(time.time() - t)
    serial.save('cnn_hhv_{0}_{1}.pkl'.format(num_rows, num_cols),
                ann,
                on_overwrite='backup')

    #read hh and hv into a 3D numpy
    image = read_hhv(input_path)
    return ann, sar_predict(ann, image, pred_path)
Example #24
0
def train_example(dataset=None):
    model = GaussianBinaryRBM(nvis=1296,
                              nhid=61,
                              irange=0.5,
                              energy_function_class=grbm_type_1(),
                              learn_sigma=True,
                              init_sigma=.4,
                              init_bias_hid=2.,
                              mean_vis=False,
                              sigma_lr_scale=1e-3)
    cost = SMD(corruptor=GaussianCorruptor(stdev=0.4))
    algorithm = SGD(learning_rate=.1,
                    batch_size=5,
                    monitoring_batches=20,
                    monitoring_dataset=dataset,
                    cost=cost,
                    termination_criterion=MonitorBased(prop_decrease=0.01,
                                                       N=1))
    train = Train(dataset=dataset,
                  model=model,
                  save_path="./experiment/training.pkl",
                  save_freq=10,
                  algorithm=algorithm,
                  extensions=[])
    train.main_loop()
Example #25
0
def test_batch_size_specialization():

    # Tests that using a batch size of 1 for training and a batch size
    # other than 1 for monitoring does not result in a crash.
    # This catches a bug reported in the [email protected]
    # e-mail "[pylearn-dev] monitor assertion error: channel_X.type != X.type"
    # The training data was specialized to a row matrix (theano tensor with
    # first dim broadcastable) and the monitor ended up with expressions
    # mixing the specialized and non-specialized version of the expression.

    m = 2
    rng = np.random.RandomState([25,9,2012])
    X = np.zeros((m,1))
    dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(1)

    learning_rate = 1e-3

    cost = DummyCost()

    algorithm = SGD(learning_rate, cost, batch_size=1,
                 monitoring_batches=1, monitoring_dataset=dataset,
                 termination_criterion=EpochCounter(max_epochs=1),
                 update_callbacks=None,
                 set_batch_size = False)

    train = Train(dataset, model, algorithm, save_path=None,
                 save_freq=0, extensions=None)

    train.main_loop()
Example #26
0
def MultiPIECV():
  # Learning rate, nr pieces
  parms = [(0.1, 2), (0.1, 3), (0.01, 2), (0.01, 3)]

  accuracies = []

  for i in xrange(len(parms)):
    h0 = maxout.Maxout(layer_name='h0', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0)
    h1 = maxout.Maxout(layer_name='h1', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0)
    h2 = maxout.Maxout(layer_name='h2', num_units=1500, num_pieces=parms[i][1], W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0)
    outlayer = mlp.Softmax(layer_name='y', n_classes=6, irange=0)

    layers = [h0, h1, h2, outlayer]

    model = mlp.MLP(layers, nvis=1200)

    trainIndices, validationIndices, testIndices = getMultiPIEindices()
    train = MultiPIE('train', indices=trainIndices)
    valid = MultiPIE('valid', indices=validationIndices)
    test = MultiPIE('test',   indices=testIndices)

    monitoring = dict(valid=valid)
    termination = MonitorBased(channel_name="valid_y_misclass", N=100)
    extensions = [best_params.MonitorBasedSaveBest(channel_name="valid_y_misclass",
                                                   save_path="/data/mcr10/train_best.pkl")]

    algorithm = sgd.SGD(parms[i][0], batch_size=100, cost=Dropout(),
                        monitoring_dataset=monitoring, termination_criterion=termination)

    save_path = "/data/mcr10/train_best.pkl"

    if not args.train and os.path.exists(save_path):
        model = serial.load(save_path)
    else:
      print 'Running training'
      train_job = Train(train, model, algorithm, extensions=extensions, save_path="/data/mcr10/trainpie.pkl", save_freq=1)
      train_job.main_loop()

    X = model.get_input_space().make_batch_theano()
    Y = model.fprop(X)

    y = T.argmax(Y, axis=1)

    f = function(inputs=[X], outputs=y, allow_input_downcast=True)
    yhat = f(test.X)

    print sum(yhat)
    print yhat.shape

    y = np.argmax(np.squeeze(test.get_targets()), axis=1)

    accuracy =  (y==yhat).sum() / y.size
    accuracies += [accuracy]

  # TODO: some confusion matrix?
  for i in xrange(len(parms)):
    print "for parameter" + str(i)
    print "the correct rate was " + str(accuracies[i])
Example #27
0
def model1():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = MNIST(which_set='train', one_hot=True)
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    valid_set = MNIST(which_set='test', one_hot=True)
    test_set = MNIST(which_set='test', one_hot=True)

    #import pdb
    #pdb.set_trace()
    #print train_set.X.shape[1]

    # =====<Create the MLP Model>=====

    h2_layer = NoisyRELU(layer_name='h1',
                         sparse_init=15,
                         noise_factor=5,
                         dim=1000,
                         desired_active_rate=0.2,
                         bias_factor=20,
                         max_col_norm=1)
    #h2_layer = RectifiedLinear(layer_name='h2', dim=100, sparse_init=15, max_col_norm=1)
    #print h1_layer.get_params()
    #h2 = RectifiedLinear(layer_name='h2', dim=500, sparse_init=15, max_col_norm=1)
    y_layer = Softmax(layer_name='y', n_classes=10, irange=0., max_col_norm=1)

    mlp = MLP(batch_size=200,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h2_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={'valid': valid_set},
              cost=MethodCost('cost_from_X'),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.001, N=50))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)]

    # =====<Create Training Object>=====
    save_path = './mlp_model1.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=0)
    #train_obj.setup_extensions()

    #import pdb
    #pdb.set_trace()
    train_obj.main_loop()

    # =====<Run the training>=====
    '''
Example #28
0
 def fit(X,y=None):#TODO
     dataset=
     self.set_train_params(train_params={
         'dtataset':dataset,
         'algorithm':algorithm,
         'extensions':extension})#which influenced by X,y
         #real data or symbol?
     train=Train(model=self.model,**self.train_params)
     train.main_loop()
Example #29
0
def test_bgd_unsup():

    # tests that we can run the bgd algorithm
    # on an supervised cost.
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    dim = 3
    m = 10

    rng = np.random.RandomState([25, 9, 2012])

    X = rng.randn(m, dim)

    dataset = DenseDesignMatrix(X=X)

    m = 15
    X = rng.randn(m, dim)

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    class DummyCost(Cost):
        def expr(self, model, data):
            self.get_data_specs(model)[0].validate(data)
            X = data
            return T.square(model(X) - X).mean()

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    cost = DummyCost()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = BGD(cost,
                    batch_size=5,
                    monitoring_batches=2,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Example #30
0
 def run(self, start_config_id = None):
     self.db = DatabaseHandler()
     print 'running'
     while True:
         if start_config_id is None:
             (config_id, model_id, ext_id, train_id,
                 dataset_id, random_seed, batch_size) \
                  = self.select_next_config(self.experiment_id)
         else:
             (config_id, model_id, ext_id, train_id,
                 dataset_id, random_seed, batch_size) \
                  = self.select_config(start_config_id)
         start_config_id = None
         
         (dataset_desc, input_space_id) = self.select_dataset(dataset_id)
         input_space = self.get_space(input_space_id)
         
         # build model
         model = self.get_model(model_id, 
                                random_seed, 
                                batch_size, 
                                input_space)
         
         # extensions
         extensions = self.get_extensions(ext_id)
         
         # prepare monitor
         self.prep_valtest_monitor(model, batch_size)
         
         # monitor based save best
         if self.mbsb_channel_name is not None:
             save_path = self.save_prefix+str(config_id)+"_best.pkl"
             extensions.append(MonitorBasedSaveBest(
                     channel_name = self.mbsb_channel_name,
                     save_path = save_path,
                     cost = False \
                 )
             )
         
         # HPS Logger
         extensions.append(
             HPSLog(self.log_channel_names, self.db, config_id)
         )
         
         # training algorithm
         algorithm = self.get_trainingAlgorithm(train_id, batch_size)
         
         print 'sgd complete'
         learner = Train(dataset=self.train_ddm,
                         model=model,
                         algorithm=algorithm,
                         extensions=extensions)
         print 'learning'     
         learner.main_loop()
         
         self.set_end_time(config_id)
 def __call__(self):
     dataset = DenseDesignMatrix(X=self.X)
     self.cnmf.termination_criterion = self.termination_criterion
     self.cnmf.set_W(self.W)
     train = Train(dataset, self.cnmf)
     train.main_loop()
     self.cnmf.monitor = Monitor(self.cnmf)
     H = self.cnmf.H.get_value()
     results = {"W": self.cnmf.W.get_value(), "H": H}
     return numpy.argmax(H, axis=1), results
Example #32
0
def test_kmeans():
    X = np.random.random(size=(100, 10))
    Y = np.random.randint(5, size=(100, 1))

    dataset = DenseDesignMatrix(X, y=Y)

    model = KMeans(k=5, nvis=10)

    train = Train(model=model, dataset=dataset)
    train.main_loop()
Example #33
0
def testing_multiple_datasets_with_specified_dataset_in_monitor_based_lr():
    # tests that the class MonitorBasedLRAdjuster in sgd.py can properly use
    # the spcified dataset_name in the constructor when multiple datasets
    # exist.

    dim = 3
    m = 10

    rng = np.random.RandomState([06, 02, 2014])

    X = rng.randn(m, dim)
    Y = rng.randn(m, dim)

    learning_rate = 1e-2
    batch_size = 5

    # We need to include this so the test actually stops running at some point
    epoch_num = 1

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_train = DenseDesignMatrix(X=X)
    monitoring_test = DenseDesignMatrix(X=Y)

    cost = DummyCost()

    model = SoftmaxModel(dim)

    dataset = DenseDesignMatrix(X=X)

    termination_criterion = EpochCounter(epoch_num)

    monitoring_dataset = {'train': monitoring_train, 'test': monitoring_test}

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=batch_size,
                    monitoring_batches=2,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    dataset_name = monitoring_dataset.keys()[0]
    monitor_lr = MonitorBasedLRAdjuster(dataset_name=dataset_name)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=[monitor_lr])

    train.main_loop()
Example #34
0
def test_sgd_topo():

    # tests that we can run the sgd algorithm
    # on data with topology
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    rows = 3
    cols = 4
    channels = 2
    dim = rows * cols * channels
    m = 10

    rng = np.random.RandomState([25,9,2012])

    X = rng.randn(m, rows, cols, channels)

    idx = rng.randint(0, dim, (m,))
    Y = np.zeros((m,dim))
    for i in xrange(m):
        Y[i,idx[i]] = 1

    dataset = DenseDesignMatrix(topo_view=X, y=Y)

    m = 15
    X = rng.randn(m, rows, cols, channels)

    idx = rng.randint(0, dim, (m,))
    Y = np.zeros((m,dim))
    for i in xrange(m):
        Y[i,idx[i]] = 1

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_dataset = DenseDesignMatrix(topo_view=X, y=Y)

    model = TopoSoftmaxModel(rows, cols, channels)

    learning_rate = 1e-3
    batch_size = 5

    cost = CrossEntropy()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = SGD(learning_rate, cost, batch_size=5,
                 monitoring_batches=3, monitoring_dataset= monitoring_dataset,
                 termination_criterion=termination_criterion, update_callbacks=None,
                 init_momentum = None, set_batch_size = False)

    train = Train(dataset, model, algorithm, save_path=None,
                 save_freq=0, extensions=None)

    train.main_loop()
Example #35
0
def test_train_supervised():
    """
    Train a supervised GSN.
    """
    # initialize the GSN
    gsn = GSN.new(
        layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]],
        activation_funcs=["sigmoid", "tanh", rescaled_softmax],
        pre_corruptors=[GaussianCorruptor(0.5)] * 3,
        post_corruptors=[
            SaltPepperCorruptor(.3), None,
            SmoothOneHotCorruptor(.5)
        ],
        layer_samplers=[BinomialSampler(), None,
                        MultinomialSampler()],
        tied=False)

    # average over costs rather than summing
    _rcost = MeanBinaryCrossEntropy()
    reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1]

    _ccost = MeanBinaryCrossEntropy()
    classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1]

    # combine costs into GSNCost object
    c = GSNCost(
        [
            # reconstruction on layer 0 with weight 1.0
            (0, 1.0, reconstruction_cost),

            # classification on layer 2 with weight 2.0
            (2, 2.0, classification_cost)
        ],
        walkback=WALKBACK,
        mode="supervised")

    alg = SGD(
        LEARNING_RATE,
        init_momentum=MOMENTUM,
        cost=c,
        termination_criterion=EpochCounter(MAX_EPOCHS),
        batches_per_iter=BATCHES_PER_EPOCH,
        batch_size=BATCH_SIZE,
        monitoring_dataset=ds,
        monitoring_batches=10,
    )

    trainer = Train(ds,
                    gsn,
                    algorithm=alg,
                    save_path="gsn_sup_example.pkl",
                    save_freq=10,
                    extensions=[MonitorBasedLRAdjuster()])
    trainer.main_loop()
    print("done training")
Example #36
0
def train_example(dataset = None):
    model = GaussianBinaryRBM(nvis=1296, nhid=61, irange=0.5,
                            energy_function_class=grbm_type_1(), learn_sigma=True,
                            init_sigma=.4, init_bias_hid=2., mean_vis=False,
                            sigma_lr_scale=1e-3)
    cost = SMD(corruptor=GaussianCorruptor(stdev=0.4))
    algorithm = SGD(learning_rate=.1, batch_size=5, monitoring_batches=20,
                    monitoring_dataset=dataset, cost=cost,
                    termination_criterion=MonitorBased(prop_decrease=0.01, N=1))
    train = Train(dataset=dataset,model=model,save_path="./experiment/training.pkl", save_freq=10, algorithm=algorithm, extensions=[])
    train.main_loop()
Example #37
0
def model2():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = MNIST(which_set='train', one_hot=True)
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    test_set = MNIST(which_set='test', one_hot=True)

    # =====<Create the MLP Model>=====

    h1_layer = RectifiedLinear(layer_name='h1', dim=1000, irange=0.5)
    #print h1_layer.get_params()
    h2_layer = RectifiedLinear(layer_name='h2',
                               dim=1000,
                               sparse_init=15,
                               max_col_norm=1)
    y_layer = Softmax(layer_name='y',
                      n_classes=train_set.y.shape[1],
                      irange=0.5)

    mlp = MLP(batch_size=100,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h1_layer, h2_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(batch_size=100,
              init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={
                  'valid': train_set,
                  'test': test_set
              },
              cost=SumOfCosts(costs=[
                  MethodCost('cost_from_X'),
                  WeightDecay(coeffs=[0.00005, 0.00005, 0.00005])
              ]),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.0001, N=5))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.99)]

    # =====<Create Training Object>=====
    save_path = './mlp_model2.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=0)
    #train_obj.setup_extensions()

    train_obj.main_loop()
Example #38
0
 def test(layers):
     from pylearn2.datasets.iris import Iris
     ddm = Iris()
     from pylearn2.models.mlp import MLP
     mlp = MLP(layers=layers, nvis=4, batch_size=10)
     from pylearn2.costs.mlp import Default
     cost = Default()
     from pylearn2.training_algorithms.sgd import SGD
     sgd = SGD(learning_rate=0.01, cost=cost, monitoring_dataset=ddm)
     from pylearn2.train import Train
     trainer = Train(dataset=ddm, model=mlp, algorithm=sgd)
     trainer.main_loop()
Example #39
0
def model3():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = SVHN_On_Memory(which_set='train')
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    test_set = SVHN_On_Memory(which_set='test')

    # =====<Create the MLP Model>=====

    h1_layer = NoisyRELU(layer_name='h1',
                         dim=2000,
                         threshold=5,
                         sparse_init=15,
                         max_col_norm=1)
    #print h1_layer.get_params()
    #h2_layer = NoisyRELU(layer_name='h2', dim=100, threshold=15, sparse_init=15, max_col_norm=1)

    y_layer = Softmax(layer_name='y',
                      n_classes=train_set.y.shape[1],
                      irange=0.5)

    mlp = MLP(batch_size=64,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h1_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(batch_size=64,
              init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={
                  'valid': train_set,
                  'test': test_set
              },
              cost=MethodCost('cost_from_X'),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.001, N=50))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)]

    # =====<Create Training Object>=====
    save_path = './mlp_model.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=10)
    #train_obj.setup_extensions()

    train_obj.main_loop()
def main():
    start_time = time.clock()

    # optin parser
    parser = OptionParser()
    parser.add_option("-p", dest="plot_prediction",
                      action="store_true", default=False,
                      help="plot model prediction transitions")
    parser.add_option("-f", "--file", dest="out_filename", default=None,
                      help="write animation to FILE (require -a option)", metavar="FILE")
    (options, args) = parser.parse_args()

    # make Detaset
    ds = sinDataset(SIZE_DATA)

    # make layers
    hidden_layer1 = mlp.Tanh(layer_name='hidden1', dim=20, irange=0.5, init_bias=1.0)
    hidden_layer2 = mlp.Tanh(layer_name='hidden2', dim=4, irange=0.5, init_bias=1.0)
    output_layer = mlp.Linear(layer_name='out', dim=1, irange=0.5, init_bias=1)

    # set layers
    layers = [hidden_layer1, hidden_layer2, output_layer]
    model = mlp.MLP(layers, nvis=1)

    # set training rule and extensions
    algorithm = sgd.SGD(
        learning_rate = 0.01,
        batch_size = 1,
        monitoring_batch_size = 1,
        monitoring_batches =  1,
        monitoring_dataset = ds,
        termination_criterion = EpochCounter(MAX_EPOCHS)
    )
    extensions = [sgd.MonitorBasedLRAdjuster()]
    if options.plot_prediction:
        plotEx = PlotPredictionOnMonitor()
        extensions.append(plotEx)

    trainer = Train(model = model,
                    algorithm = algorithm,
                    dataset = ds,
                    extensions = extensions,
                    save_path='./funcmodel.pkl',
                    save_freq=500)

    # training loop
    trainer.main_loop()

    end_time = time.clock()
    print("tortal_seconds_this_learning : %f s(%f min)" % (end_time - start_time, (end_time - start_time)/60))
    if options.plot_prediction:
        plotEx.plot(out_filename=options.out_filename)
Example #41
0
def test_bgd_unsup():

    # tests that we can run the bgd algorithm
    # on an supervised cost.
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    dim = 3
    m = 10

    rng = np.random.RandomState([25,9,2012])

    X = rng.randn(m, dim)

    dataset = DenseDesignMatrix(X=X)

    m = 15
    X = rng.randn(m, dim)


    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    class DummyCost(Cost):

        def expr(self, model, data):
            self.get_data_specs(model)[0].validate(data)
            X = data
            return T.square(model(X) - X).mean()

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    cost = DummyCost()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = BGD(cost, batch_size=5,
                monitoring_batches=2, monitoring_dataset= monitoring_dataset,
                termination_criterion = termination_criterion)

    train = Train(dataset, model, algorithm, save_path=None,
                 save_freq=0, extensions=None)

    train.main_loop()
Example #42
0
def test_kmeans():
    X = np.random.random(size=(100, 10))
    Y = np.random.randint(5, size=(100, 1))

    dataset = DenseDesignMatrix(X, y=Y)

    model = KMeans(
        k=5,
        nvis=10
    )

    train = Train(model=model, dataset=dataset)
    train.main_loop()
Example #43
0
def MultiPIEmain():
  # h0 = maxout.Maxout(layer_name='h0', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0)
  # h1 = maxout.Maxout(layer_name='h1', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0)
  # h2 = maxout.Maxout(layer_name='h2', num_units=500, num_pieces=3, W_lr_scale=1.0, max_col_norm = 1.0,irange=0.005, b_lr_scale=1.0)
  h0 = maxout.Maxout(layer_name='h0', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0)
  h1 = maxout.Maxout(layer_name='h1', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0)
  h2 = maxout.Maxout(layer_name='h2', num_units=1000, num_pieces=2, W_lr_scale=1.0, irange=0.005, b_lr_scale=1.0)

  outlayer = mlp.Softmax(layer_name='y', n_classes=6, irange=0)

  layers = [h0, h1, h2, outlayer]

  model = mlp.MLP(layers, nvis=1200)

  trainIndices, validationIndices, testIndices = getMultiPIEindices()
  train = MultiPIE('train', indices=trainIndices)
  valid = MultiPIE('valid', indices=validationIndices)
  test = MultiPIE('test',   indices=testIndices)

  monitoring = dict(valid=valid)
  termination = MonitorBased(channel_name="valid_y_misclass", N=100)
  extensions = [best_params.MonitorBasedSaveBest(channel_name="valid_y_misclass",
                                                 save_path="/data/mcr10/train_best.pkl"),
                MomentumAdjustor(final_momentum=0.7, start=1, saturate=250)]

  algorithm = sgd.SGD(0.05, batch_size=20, cost=Dropout(), learning_rule=Momentum(0.5),
                      monitoring_dataset=monitoring, termination_criterion=termination)

  save_path = "/data/mcr10/train_best.pkl"

  if not args.train and os.path.exists(save_path):
      model = serial.load(save_path)
  else:
    print 'Running training'
    train_job = Train(train, model, algorithm, extensions=extensions, save_path="/data/mcr10/trainpie.pkl", save_freq=50)
    train_job.main_loop()

  X = model.get_input_space().make_batch_theano()
  Y = model.fprop(X)

  y = T.argmax(Y, axis=1)

  f = function(inputs=[X], outputs=y, allow_input_downcast=True)
  yhat = f(test.X)

  print sum(yhat)
  print yhat.shape

  y = np.argmax(np.squeeze(test.get_targets()), axis=1)

  print 'accuracy', (y==yhat).sum() / y.size
Example #44
0
def test_train_supervised():
    """
    Train a supervised GSN.
    """
    # initialize the GSN
    gsn = GSN.new(
        layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]],
        activation_funcs=["sigmoid", "tanh", rescaled_softmax],
        pre_corruptors=[GaussianCorruptor(0.5)] * 3,
        post_corruptors=[SaltPepperCorruptor(.3), None, SmoothOneHotCorruptor(.5)],
        layer_samplers=[BinomialSampler(), None, MultinomialSampler()],
        tied=False
    )

    # average over costs rather than summing
    _rcost = MeanBinaryCrossEntropy()
    reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1]

    _ccost = MeanBinaryCrossEntropy()
    classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1]

    # combine costs into GSNCost object
    c = GSNCost(
        [
            # reconstruction on layer 0 with weight 1.0
            (0, 1.0, reconstruction_cost),

            # classification on layer 2 with weight 2.0
            (2, 2.0, classification_cost)
        ],
        walkback=WALKBACK,
        mode="supervised"
    )

    alg = SGD(
        LEARNING_RATE,
        init_momentum=MOMENTUM,
        cost=c,
        termination_criterion=EpochCounter(MAX_EPOCHS),
        batches_per_iter=BATCHES_PER_EPOCH,
        batch_size=BATCH_SIZE,
        monitoring_dataset=ds,
        monitoring_batches=10,
    )

    trainer = Train(ds, gsn, algorithm=alg,
                    save_path="gsn_sup_example.pkl", save_freq=10,
                    extensions=[MonitorBasedLRAdjuster()])
    trainer.main_loop()
    print "done training"
Example #45
0
class AutoEncoderTrainer(BaseTrainer):
    def __init__(self,
                 runner,
                 model_params,
                 resume=False,
                 resume_data=None,
                 s3_data=None,
                 **kwargs):
        dataset = create_dense_design_matrix(x=runner.dp.train_set_x)

        if resume:
            model, model_params = self.resume_model(model_params, resume_data)
        else:
            model = self.new_model(model_params, dataset=dataset)

        termination_criterion = MaxEpochNumber(model_params['maxnum_iter'])
        algorithm = SGD(learning_rate=model_params['learning_rate']['init'],
                        monitoring_dataset=dataset,
                        cost=MeanSquaredReconstructionError(),
                        termination_criterion=termination_criterion,
                        batch_size=model_params['batch_size'])
        ext = AutoEncoderStatReporter(runner,
                                      resume=resume,
                                      resume_data=resume_data,
                                      save_freq=model_params['save_freq'])
        self.train_obj = Train(dataset=dataset,
                               model=model,
                               algorithm=algorithm,
                               extensions=[ext])

    def train(self):
        self.train_obj.main_loop()

    def resume_model(self, model_params, resume_data):
        model = resume_data['model']
        #TODO: FIX IT
        model = pylearn2.monitor.push_monitor(model, 'monitor_validation',
                                              True)
        return model, model_params

    def new_model(self, model_params, dataset):
        corruptor = BinomialCorruptor(
            corruption_level=model_params['noise_level'])
        model = DenoisingAutoencoder(nvis=dataset.X.shape[1],
                                     nhid=model_params['hidden_outputs'],
                                     irange=model_params['irange'],
                                     corruptor=corruptor,
                                     act_enc='tanh',
                                     act_dec=None)
        return model
Example #46
0
def test_sgd_topo():
    # tests that we can run the sgd algorithm
    # on data with topology
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    rows = 3
    cols = 4
    channels = 2
    dim = rows * cols * channels
    m = 10

    rng = np.random.RandomState([25, 9, 2012])

    dataset = get_topological_dataset(rng, rows, cols, channels, m)

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    m = 15
    monitoring_dataset = get_topological_dataset(rng, rows, cols, channels, m)

    model = TopoSoftmaxModel(rows, cols, channels)

    learning_rate = 1e-3
    batch_size = 5

    cost = CrossEntropy()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=5,
                    monitoring_batches=3,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Example #47
0
def testing_multiple_datasets_in_monitor_based_lr():
    # tests that the class MonitorBasedLRAdjuster in sgd.py does not take multiple datasets in which multiple channels ending in '_objectives' exist. 
    # This case happens when the user has not specified either channel_name or dataset_name in the constructor

    dim = 3
    m = 10

    rng = np.random.RandomState([06,02,2014])

    X = rng.randn(m, dim)
    Y = rng.randn(m, dim)

    learning_rate = 1e-2
    batch_size = 5

    # We need to include this so the test actually stops running at some point
    epoch_num = 1

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_train = DenseDesignMatrix(X=X)
    monitoring_test = DenseDesignMatrix(X=Y)

    cost = DummyCost()

    model = SoftmaxModel(dim)

    dataset = DenseDesignMatrix(X=X)

    termination_criterion = EpochCounter(epoch_num)

    algorithm = SGD(learning_rate, cost, batch_size=5,
             monitoring_batches=2, monitoring_dataset= {'train': monitoring_train, 'test' : monitoring_test},
             termination_criterion=termination_criterion, update_callbacks=None,
             init_momentum = None, set_batch_size = False)

    monitor_lr = MonitorBasedLRAdjuster()

    train = Train(dataset, model, algorithm, save_path=None,
             save_freq=0, extensions=[monitor_lr])

    try:
        train.main_loop()
    except ValueError:
        return
        
    raise AssertionError("MonitorBasedLRAdjuster takes multiple dataset names in which more than one \"objective\" channel exist and the user has not specified " + 
        "either channel_name or database_name in the constructor to disambiguate.")
Example #48
0
def test_kmeans():
    """
    Tests kmeans.Kmeans by using it as a model in a Train object.
    """

    X = np.random.random(size=(100, 10))
    Y = np.random.randint(5, size=(100, 1))

    dataset = DenseDesignMatrix(X, y=Y)

    model = KMeans(
        k=5,
        nvis=10
    )

    train = Train(model=model, dataset=dataset)
    train.main_loop()
Example #49
0
def test_flattener_layer_state_separation_for_conv():
    """
    Creates a CompositeLayer wrapping two Conv layers
    and ensures that state gets correctly picked apart.
    """
    conv1 = ConvElemwise(8, [2, 2], 'sf1', SigmoidConvNonlinearity(), .1)
    conv2 = ConvElemwise(8, [2, 2], 'sf2', SigmoidConvNonlinearity(), .1)
    mlp = MLP(layers=[FlattenerLayer(CompositeLayer('comp', [conv1, conv2]))],
              input_space=Conv2DSpace(shape=[5, 5], num_channels=2))

    topo_view = np.random.rand(10, 5, 5, 2).astype(theano.config.floatX)
    y = np.random.rand(10, 256).astype(theano.config.floatX)
    dataset = DenseDesignMatrix(topo_view=topo_view, y=y)

    train = Train(dataset, mlp,
                  SGD(0.1, batch_size=5, monitoring_dataset=dataset))
    train.algorithm.termination_criterion = EpochCounter(1)
    train.main_loop()
Example #50
0
def test_flattener_layer_state_separation_for_softmax():
    """
    Creates a CompositeLayer wrapping two Softmax layers
    and ensures that state gets correctly picked apart.
    """
    soft1 = Softmax(5, 'sf1', .1)
    soft2 = Softmax(5, 'sf2', .1)
    mlp = MLP(layers=[FlattenerLayer(CompositeLayer('comp', [soft1, soft2]))],
              nvis=2)

    X = np.random.rand(20, 2).astype(theano.config.floatX)
    y = np.random.rand(20, 10).astype(theano.config.floatX)
    dataset = DenseDesignMatrix(X=X, y=y)

    train = Train(dataset, mlp,
                  SGD(0.1, batch_size=5, monitoring_dataset=dataset))
    train.algorithm.termination_criterion = EpochCounter(1)
    train.main_loop()
Example #51
0
def test_correctness():
    """
    Test that the cost function works with float64
    """
    x_train, y_train, x_valid, y_valid = create_dataset()

    trainset = DenseDesignMatrix(X=np.array(x_train), y=y_train)
    validset = DenseDesignMatrix(X=np.array(x_valid), y=y_valid)

    n_inputs = trainset.X.shape[1]
    n_outputs = 1
    n_hidden = 10

    hidden_istdev = 4 * (6 / float(n_inputs + n_hidden)) ** 0.5
    output_istdev = 4 * (6 / float(n_hidden + n_outputs)) ** 0.5

    model = MLP(layers=[Sigmoid(dim=n_hidden, layer_name='hidden',
                                istdev=hidden_istdev),
                        Sigmoid(dim=n_outputs, layer_name='output',
                                istdev=output_istdev)],
                nvis=n_inputs, seed=[2013, 9, 16])

    termination_criterion = And([EpochCounter(max_epochs=1),
                                 MonitorBased(prop_decrease=1e-7,
                                 N=2)])

    cost = SumOfCosts([(0.99, Default()),
                       (0.01, L1WeightDecay({}))])

    algo = SGD(1e-1,
               update_callbacks=[ExponentialDecay(decay_factor=1.00001,
                                 min_lr=1e-10)],
               cost=cost,
               monitoring_dataset=validset,
               termination_criterion=termination_criterion,
               monitor_iteration_mode='even_shuffled_sequential',
               batch_size=2)

    train = Train(model=model, dataset=trainset, algorithm=algo)
    train.main_loop()
Example #52
0
def test_training_a_model():
    """
    tests wether SparseDataset can be trained
    with a dummy model.
    """

    dim = 3
    m = 10
    rng = np.random.RandomState([22, 4, 2014])

    X = rng.randn(m, dim)
    ds = csr_matrix(X)
    dataset = SparseDataset(from_scipy_sparse_dataset=ds)

    model = SoftmaxModel(dim)
    learning_rate = 1e-1
    batch_size = 5

    epoch_num = 2
    termination_criterion = EpochCounter(epoch_num)

    cost = DummyCost()

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=batch_size,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Example #53
0
def main():

    # Only the trainset is processed by this function.
    print 'getting preprocessed data to train model'
    pp_trainset, testset = get_processed_dataset()
    # remember to change here when changing datasets
    print 'loading unprocessed data for input displays'
    trainset = cifar10.CIFAR10(which_set="train")

    dmat = trainset.get_design_matrix()
    nvis = dmat.shape[1]

    model = DenoisingAutoencoder(
        corruptor=BinomialCorruptor(corruption_level=0.5),
        nhid=nhid,
        nvis=nvis,
        act_enc='sigmoid',
        act_dec='sigmoid',
        irange=.01)

    algorithm = SGD(
        learning_rate=0.1,
        cost=MeanSquaredReconstructionError(),
        batch_size=1000,
        monitoring_batches=10,
        monitoring_dataset=pp_trainset,
        termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS_UNSUPERVISED),
        update_callbacks=None)

    extensions = None

    trainer = Train(model=model,
                    algorithm=algorithm,
                    save_path='testrun.pkl',
                    save_freq=1,
                    extensions=extensions,
                    dataset=pp_trainset)

    trainer.main_loop()
Example #54
0
def test_batch_size_specialization():

    # Tests that using a batch size of 1 for training and a batch size
    # other than 1 for monitoring does not result in a crash.
    # This catches a bug reported in the [email protected]
    # e-mail "[pylearn-dev] monitor assertion error: channel_X.type != X.type"
    # The training data was specialized to a row matrix (theano tensor with
    # first dim broadcastable) and the monitor ended up with expressions
    # mixing the specialized and non-specialized version of the expression.

    m = 2
    rng = np.random.RandomState([25, 9, 2012])
    X = np.zeros((m, 1))
    dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(1)

    learning_rate = 1e-3

    cost = DummyCost()

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=1,
                    monitoring_batches=1,
                    monitoring_dataset=dataset,
                    termination_criterion=EpochCounter(max_epochs=1),
                    update_callbacks=None,
                    set_batch_size=False)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
def test_afew2ft_train():
    n_classes = 7
    dataset = AFEW2FaceTubes(which_set='Train')
    monitoring_dataset = {
        'train': dataset,
        'valid': AFEW2FaceTubes(which_set='Val')}
    model = DummyModel(n_classes=n_classes,
                       input_space=dataset.get_data_specs()[0].components[0])
    cost = DummyCost()
    termination_criterion = EpochCounter(10)

    learning_rate = 1e-6
    batch_size = 1
    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=batch_size,
                    monitoring_batches=batch_size,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion)
    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None)
    train.main_loop()
def main():

    #creating layers
        #2 convolutional rectified layers, border mode valid
    batch_size = 48
    lr = 1.0 #0.1/4
    finMomentum = 0.9
    maxout_units = 2000
    num_pcs = 4
    lay1_reg = lay2_reg = maxout_reg = None
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    #save_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'.joblib'
    #best_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'best.joblib'
    save_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb.joblib'
    best_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb_best.joblib'

    #numBatches = 400000/batch_size

    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)

    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='train')
    ddmValid = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='valid')
    #ddmSmallTrain = ComboDatasetPyTable('/Tmp/zumerjer/all_', which_set='small_train')

    layer1 = ConvRectifiedLinear(layer_name = 'convRect1',
                     output_channels = 64,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [4, 4],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name = 'convRect2',
                     output_channels = 128,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [3, 3],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay2_reg)

        # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name= 'maxout',
                    irange= .005,
                    num_units= maxout_units,
                    num_pieces= num_pcs,
                    W_lr_scale = 0.1,
                    max_col_norm= maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size = batch_size,
                 input_space = Conv2DSpace(shape = [96, 96],
                 num_channels = 3, axes=('b', 0, 1, 'c')),
                 layers = [ layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                            missing_target_value=missing_target_value )
    mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : 1.0 }, input_scales= { 'convRect1': 1. })

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation':ddmValid}#, 'mini-train':ddmSmallTrain}

    term_crit  = MonitorBased(prop_decrease = 1e-7, N = 100, channel_name = 'validation_objective')

    kp_ada = KeypointADADELTA(decay_factor = 0.95, 
            #init_momentum = 0.5, 
                        monitoring_dataset = monitoring_dataset, batch_size = batch_size,
                        termination_criterion = term_crit,
                        cost = mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    #train_ext = LinearDecayOverEpoch(start= 1,saturate= 250,decay_factor= .01)
    #train_ext = ADADELTA(0.95)

    #train object
    train = Train(dataset = ddmTrain,
                  save_path= save_path,
                  save_freq=10,
                  model = MLPerc,
                  algorithm= kp_ada,
                  extensions = [#train_ext, 
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                                     save_path= best_path)#,

#                                MomentumAdjustor(start = 1,#
 #                                                saturate = 25,
  #                                               final_momentum = finMomentum)
  ] )
    train.main_loop()
    train.save()
Example #57
0
    def run_sgd(mode):
        # Must be seeded the same both times run_sgd is called
        disturb_mem.disturb_mem()
        rng = np.random.RandomState([2012, 11, 27])

        batch_size = 5
        train_batches = 3
        valid_batches = 4
        num_features = 2

        # Synthesize dataset with a linear decision boundary
        w = rng.randn(num_features)

        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches * batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m, 1))
            y[:, 0] = np.dot(X, w) > 0.

            rval = DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = ""  # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval

        train = make_dataset(train_batches)
        valid = make_dataset(valid_batches)

        num_chunks = 10
        chunk_width = 2

        class ManyParamsModel(Model):
            """
            Make a model with lots of parameters, so that there are many
            opportunities for their updates to get accidentally re-ordered
            non-deterministically. This makes non-determinism bugs manifest
            more frequently.
            """
            def __init__(self):
                self.W1 = [
                    sharedX(rng.randn(num_features, chunk_width))
                    for i in xrange(num_chunks)
                ]
                disturb_mem.disturb_mem()
                self.W2 = [
                    sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks)
                ]
                self._params = safe_union(self.W1, self.W2)
                self.input_space = VectorSpace(num_features)
                self.output_space = VectorSpace(1)

        disturb_mem.disturb_mem()
        model = ManyParamsModel()
        disturb_mem.disturb_mem()

        class LotsOfSummingCost(Cost):
            """
            Make a cost whose gradient on the parameters involves summing many terms together,
            so that T.grad is more likely to sum things in a random order.
            """

            supervised = True

            def expr(self, model, data, **kwargs):
                self.get_data_specs(model)[0].validate(data)
                X, Y = data
                disturb_mem.disturb_mem()

                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(
                    mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred - Y[:, 0]).sum()

            def get_data_specs(self, model):
                data = CompositeSpace(
                    (model.get_input_space(), model.get_output_space()))
                source = (model.get_input_source(), model.get_target_source())
                return (data, source)

        cost = LotsOfSummingCost()

        disturb_mem.disturb_mem()

        algorithm = SGD(
            cost=cost,
            batch_size=batch_size,
            init_momentum=.5,
            learning_rate=1e-3,
            monitoring_dataset={
                'train': train,
                'valid': valid
            },
            update_callbacks=[ExponentialDecay(decay_factor=2., min_lr=.0001)],
            termination_criterion=EpochCounter(max_epochs=5))

        disturb_mem.disturb_mem()

        train_object = Train(dataset=train,
                             model=model,
                             algorithm=algorithm,
                             extensions=[
                                 PolyakAveraging(start=0),
                                 MomentumAdjustor(final_momentum=.9,
                                                  start=1,
                                                  saturate=5),
                             ],
                             save_freq=0)

        disturb_mem.disturb_mem()

        train_object.main_loop()