def predict(self, X, var=False, max_rows=1000): """Predict the target values. Parameters ---------- X : array_like Array of shape (n_samples, n_features). var : boolean Flag indicating whether the variance of the predictions should be returned as well. In this case, the complexity rises from O(n) to O(n^2). max_rows : integer Maximum number of predictions to do in one step; a lower number might help performance. Returns ------- mean : array_like Array of the form (n_samples, 1) containing the mean of the predictions. variance : array_like Only if ``var == True``. Array of shape (n_samples, 1) containing the variance of the predictions. """ if self.f_predict is None or self.f_predict_var is None: self.f_predict, self.f_predict_var = self._make_predict_functions(self.stored_X, self.stored_Z) n_steps, rest = divmod(X.shape[0], max_rows) if rest != 0: n_steps += 1 steps = [(i * max_rows, (i + 1) * max_rows) for i in range(n_steps)] X = (X - self.mean_x) / self.std_x if var: Y, = theano_floatx(np.empty((X.shape[0], 1))) Y_var, = theano_floatx(np.empty((X.shape[0], 1))) for start, stop in steps: this_x = X[start:stop] m, s = self.f_predict_var(this_x) Y[start:stop] = m Y_var[start:stop] = s Y = (Y * self.std_z) + self.mean_z Y_var = Y_var * self.std_z return Y, Y_var else: Y, = theano_floatx(np.empty((X.shape[0], 1))) for start, stop in steps: this_x = X[start:stop] Y[start:stop] = self.f_predict(this_x) Y = (Y * self.std_z) + self.mean_z return Y
def _make_start_exprs(self): exprs = { 'inpt': T.tensor3('inpt') } exprs['inpt'].tag.test_value, = theano_floatx(np.ones((3, 2, self.n_inpt))) if self.imp_weight: exprs['imp_weight'] = T.tensor3('imp_weight') exprs['imp_weight'].tag.test_value, = theano_floatx(np.ones((3, 2, 1))) return exprs
def _make_start_exprs(self): exprs = {'inpt': T.tensor3('inpt')} exprs['inpt'].tag.test_value, = theano_floatx( np.ones((3, 2, self.n_inpt))) if self.imp_weight: exprs['imp_weight'] = T.tensor3('imp_weight') exprs['imp_weight'].tag.test_value, = theano_floatx( np.ones((3, 2, 1))) return exprs
def _make_start_exprs(self): inpt = T.tensor3('inpt') inpt.tag.test_value, = theano_floatx(np.ones((4, 3, self.n_inpt))) if self.use_imp_weight: imp_weight = T.tensor3('imp_weight') imp_weight.tag.test_value, = theano_floatx(np.ones((4, 3, 1))) else: imp_weight = None return inpt, imp_weight
def _make_start_exprs(self): inpt = T.matrix('inpt') inpt.tag.test_value, = theano_floatx(np.ones((3, self.n_inpt))) if self.use_imp_weight: imp_weight = T.matrix('imp_weight') imp_weight.tag.test_value, = theano_floatx(np.ones((3, 1))) else: imp_weight = None return inpt, imp_weight
def _make_start_exprs(self): inpt = T.tensor3("inpt") inpt.tag.test_value, = theano_floatx(np.ones((4, 3, self.n_inpt))) if self.use_imp_weight: imp_weight = T.tensor3("imp_weight") imp_weight.tag.test_value, = theano_floatx(np.ones((4, 3, 1))) else: imp_weight = None return inpt, imp_weight
def test_gp_fit_linear(): X = np.arange(-2, 2, .01)[:, np.newaxis].astype(theano.config.floatX) X, = theano_floatx(X) idxs = range(X.shape[0]) idxs = random.sample(idxs, 200) X = X[idxs] Z = np.sin(X) X, Z = theano_floatx(X, Z) gp = GaussianProcess(1, max_iter=10, kernel='linear') gp.fit(X, Z)
def test_gp_fit_linear(): X = np.arange(-2, 2, .1)[:, np.newaxis].astype(theano.config.floatX) X, = theano_floatx(X) idxs = range(X.shape[0]) idxs = random.sample(idxs, 20) X = X[idxs] Z = np.sin(X) X, Z = theano_floatx(X, Z) gp = GaussianProcess(1, max_iter=10, kernel='linear') gp.fit(X, Z)
def _make_start_exprs(self): inpt = T.tensor4('inpt') inpt.tag.test_value, = theano_floatx(np.ones( (3, self.n_channel, self.image_height, self.image_width))) if self.use_imp_weight: imp_weight = T.tensor4('imp_weight') imp_weight.tag.test_value, = theano_floatx(np.ones( (3, self.n_channel, self.image_height, self.image_width))) else: imp_weight = None return inpt, imp_weight
def _make_start_exprs(self): inpt = T.tensor4('inpt') inpt.tag.test_value, = theano_floatx( np.ones((3, self.n_channel, self.image_height, self.image_width))) if self.use_imp_weight: imp_weight = T.tensor4('imp_weight') imp_weight.tag.test_value, = theano_floatx( np.ones( (3, self.n_channel, self.image_height, self.image_width))) else: imp_weight = None return inpt, imp_weight
def test_dmlp_predict(): X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) mlp = DropoutMlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10, p_dropout_inpt=.2, p_dropout_hiddens=[0.5]) mlp.predict(X)
def test_sparse_coding_fit(): raise SkipTest() X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) sf = SparseCoding(2, 7, max_iter=10) sf.fit(X)
def test_rim_iter_fit(): X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) rim = Rim(2, 10, 0.1, max_iter=10) for i, info in enumerate(rim.iter_fit(X)): if i >= 10: break
def test_storn(): theano.config.compute_test_value = 'raise' X = np.random.random((3, 5, 2)) X, = theano_floatx(X) class Assmptn(sgvb.DiagGaussLatentAssumption, sgvb.DiagGaussVisibleAssumption): pass m = sgvb.StochasticRnn( 2, [5], 4, [5], ['tanh'] * 1, ['rectifier'] * 1, assumptions=Assmptn(), optimizer='rprop', batch_size=None, max_iter=3) print 'init pars and expressions' m._init_pars() m._init_exprs() print 'fitting' m.fit(X) print 'scoring' m.score(X) print 'transforming' m.transform(X) print 'sampling map' m.sample(5, visible_map=True) print 'sampling' m.sample(5, visible_map=False)
def test_cnn_predict(): X = np.random.standard_normal((10, 2 * 100 * 50)) X, = theano_floatx(X) m = Cnn( 100 * 50, [10, 15], [20, 12], 1, ['sigmoid', 'sigmoid'], ['rectifier', 'rectifier'], 'sigmoid', 'cat_ce', 100, 50, 2, optimizer=('rmsprop', { 'step_rate': 1e-4, 'decay': 0.9 }), batch_size=2, max_iter=10, pool_shapes=[(2, 2), (2, 2)], filter_shapes=[(4, 4), (3, 3)], ) m.predict(X)
def test_cnn_iter_fit(): X = np.random.standard_normal((10, 2 * 100 * 50)) Z = np.random.random((10, 1)) > 0.5 X, Z = theano_floatx(X, Z) m = Cnn( 100 * 50, [10, 15], [20, 12], 1, ['sigmoid', 'sigmoid'], ['rectifier', 'rectifier'], 'sigmoid', 'cat_ce', 100, 50, 2, optimizer=('rmsprop', { 'step_rate': 1e-4, 'decay': 0.9 }), batch_size=2, max_iter=10, pool_shapes=[(2, 2), (2, 2)], filter_shapes=[(4, 4), (3, 3)], ) for i, info in enumerate(m.iter_fit(X, Z)): if i >= 10: break
def test_storn(): X = np.random.random((3, 3, 2)) X, = theano_floatx(X) kwargs = { 'n_inpt': X.shape[2], 'n_hiddens_recog': [5], 'n_latent': 11, 'n_hiddens_gen': [7], 'recog_transfers': ['tanh'], 'gen_transfers': ['rectifier'], 'p_dropout_inpt': .1, 'p_dropout_hiddens': [.1], 'p_dropout_shortcut': [.1], 'p_dropout_hidden_to_out': .1, 'use_imp_weight': False, 'optimizer': 'adam', 'batch_size': None, 'verbose': False, 'max_iter': 3, } m = MyStorn(**kwargs) print 'fitting' m.fit(X) print 'scoring' m.score(X) print 'initializing' m.initialize()
def test_lenet(): image_height, image_width = 16, 16 X = np.random.standard_normal((11, 1, image_height, image_width)) Z = np.random.random((11, 1)) > 0.5 X, Z = theano_floatx(X, Z) n_hiddens_conv = [5, 2] filter_shapes = [(2, 2), (2, 2)] pool_shapes = [(2, 2), (2, 2)] n_hiddens_full = [20] transfers_conv = ['tanh', 'tanh'] transfers_full = ['rectifier'] n_channel = 1 n_output = 1 out_transfer = 'identity' loss = 'squared' m = Lenet(image_height, image_width, n_channel, n_hiddens_conv, filter_shapes, pool_shapes, n_hiddens_full, n_output, transfers_conv, transfers_full, out_transfer, loss) f_predict = m.function(['inpt'], 'output', mode='FAST_COMPILE') f_predict(X) m.fit(X, Z)
def test_sparse_coding_iter_fit(): X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) sf = SparseCoding(2, 7, max_iter=10) for i, info in enumerate(sf.iter_fit(X)): if i >= 10: break
def test_tsne(): X = np.random.random((100, 3)).astype(theano.config.floatX) X, = theano_floatx(X) tsne = Tsne(n_inpt=3, n_lowdim=2, perplexity=40, early_exaggeration=50, max_iter=10) E = tsne.fit_transform(X)
def test_rica_reconstruct(): X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) rica = Rica(2, 10, code_transfer='softabs', hidden_transfer='identity', loss='squared', c_ica=0.5, max_iter=10) rica.reconstruct(X)
def test_minibatch_score_trainer(): X = np.random.random((100, 10)) Z = np.random.random((100, 2)) X, Z = theano_floatx(X, Z) data = {'train': (X, Z), 'val': (X, Z), 'test': (X, Z)} cut_size = 10 class MyModel(mlp.Mlp): def score(self, X, Z): assert X.shape[0] <= cut_size return super(MyModel, self).score(X, Z) m = MyModel(10, [100], 2, ['tanh'], 'identity', 'squared', max_iter=10) score = MinibatchScore(cut_size, [0, 0]) trainer = Trainer(m, data, score=score, pause=lambda info: True, stop=lambda info: False) trainer.val_key = 'val' for _ in trainer.iter_fit(X, Z): break
def test_training_continuation(): # Make model and data for the test. X = np.random.random((10, 2)) X, = theano_floatx(X) optimizer = 'gd' m = autoencoder.AutoEncoder(2, [2], ['tanh'], 'identity', 'squared', tied_weights=True, max_iter=10, optimizer=optimizer) # Train the mdoel with a trainer for 2 epochs. stopper = climin.stops.OnSignal() print stopper.sig stops = climin.stops.Any([stopper, climin.stops.AfterNIterations(5)]) t = Trainer(m, stop=stops, pause=climin.stops.always) t.val_key = 'val' t.eval_data = {'val': (X, )} killed = False for info in t.iter_fit(X): os.kill(os.getpid(), stopper.sig) assert info['n_iter'] == 1
def test_fd_predict(): X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) loss = lambda target, prediction: squared(target, prediction[:, :target.shape[1]]) mlp = FastDropoutNetwork( 2, [10], 1, ['rectifier'], 'identity', loss, max_iter=10) mlp.predict(X)
def test_training_continuation(): if sys.platform == 'win32': raise SkipTest() # Make model and data for the test. X = np.random.random((100, 10)) Z = np.random.random((100, 2)) X, Z = theano_floatx(X, Z) data = { 'train': (X, Z), 'val': (X, Z), 'test': (X, Z) } optimizer = 'rmsprop', {'step_rate': 0.0001} m = mlp.Mlp(10, [2], 2, ['tanh'], 'identity', 'squared', max_iter=10, optimizer=optimizer) # Train the mdoel with a trainer for 2 epochs. stopper = climin.stops.OnSignal() stops = climin.stops.Any([stopper, climin.stops.AfterNIterations(5)]) t = Trainer( m, data, stop=stops, pause=climin.stops.always) t.val_key = 'val' for info in t.iter_fit(X, Z): os.kill(os.getpid(), stopper.sig) assert info['n_iter'] == 1
def test_training_continuation(): # Make model and data for the test. X = np.random.random((10, 2)) X, = theano_floatx(X) optimizer = 'gd' m = autoencoder.AutoEncoder(2, [2], ['tanh'], 'identity', 'squared', tied_weights=True, max_iter=10, optimizer=optimizer) # Train the mdoel with a trainer for 2 epochs. stopper = climin.stops.OnSignal() print stopper.sig stops = climin.stops.Any([stopper, climin.stops.AfterNIterations(5)]) t = Trainer( m, stop=stops, pause=climin.stops.always) t.val_key = 'val' t.eval_data = {'val': (X,)} killed = False for info in t.iter_fit(X): os.kill(os.getpid(), stopper.sig) assert info['n_iter'] == 1
def test_minibatch_score_trainer(): X = np.random.random((100, 10)) X, = theano_floatx(X) cut_size = 10 class MyAutoEncoder(autoencoder.AutoEncoder): def score(self, X): assert X.shape[0] <= cut_size return super(MyAutoEncoder, self).score(X) m = MyAutoEncoder(10, [100], ['tanh'], 'identity', 'squared', tied_weights=True, max_iter=10) score = MinibatchScore(cut_size, [0]) trainer = Trainer(m, score=score, pause=lambda info: True, stop=lambda info: False) trainer.eval_data = {'val': (X, )} trainer.val_key = 'val' for _ in trainer.iter_fit(X): break
def test_minibatch_score_trainer(): X = np.random.random((100, 10)) Z = np.random.random((100, 2)) X, Z = theano_floatx(X, Z) data = { 'train': (X, Z), 'val': (X, Z), 'test': (X, Z) } cut_size = 10 class MyModel(mlp.Mlp): def score(self, X, Z): assert X.shape[0] <= cut_size return super(MyModel, self).score(X, Z) m = MyModel(10, [100], 2, ['tanh'], 'identity', 'squared', max_iter=10) score = MinibatchScore(cut_size, [0, 0]) trainer = Trainer( m, data, score=score, pause=lambda info: True, stop=lambda info: False) trainer.val_key = 'val' for _ in trainer.iter_fit(X, Z): break
def test_training_continuation(): if sys.platform == 'win32': raise SkipTest() # Make model and data for the test. X = np.random.random((100, 10)) Z = np.random.random((100, 2)) X, Z = theano_floatx(X, Z) data = {'train': (X, Z), 'val': (X, Z), 'test': (X, Z)} optimizer = 'rmsprop', {'step_rate': 0.0001} m = mlp.Mlp(10, [2], 2, ['tanh'], 'identity', 'squared', max_iter=10, optimizer=optimizer) # Train the mdoel with a trainer for 2 epochs. stopper = climin.stops.OnSignal() stops = climin.stops.Any([stopper, climin.stops.AfterNIterations(5)]) t = Trainer(m, data, stop=stops, pause=climin.stops.always) t.val_key = 'val' for info in t.iter_fit(X, Z): os.kill(os.getpid(), stopper.sig) assert info['n_iter'] == 1
def test_storn(): theano.config.compute_test_value = 'raise' X = np.random.random((3, 5, 2)) X, = theano_floatx(X) class Assmptn(sgvb.DiagGaussLatentAssumption, sgvb.DiagGaussVisibleAssumption): pass m = sgvb.StochasticRnn(2, [5], 4, [5], ['tanh'] * 1, ['rectifier'] * 1, assumptions=Assmptn(), optimizer='rprop', batch_size=None, max_iter=3) print 'init pars and expressions' m._init_pars() m._init_exprs() print 'fitting' m.fit(X) print 'scoring' m.score(X) print 'transforming' m.transform(X) print 'sampling map' m.sample(5, visible_map=True) print 'sampling' m.sample(5, visible_map=False)
def test_lde(): X = np.eye(2) X, = theano_floatx(X) lde = LinearDenoiser(0.5) lde.fit(X) assert np.allclose(lde.weights, [[0.499995, -0.499985], [-0.499985, 0.499995]]) assert np.allclose(lde.bias, [0.499995, 0.499995])
def test_uslstm_iter_fit(): raise SkipTest() X = np.random.standard_normal((10, 5, 2)).astype(theano.config.floatX) X, = theano_floatx(X) rnn = UnsupervisedLstm(2, [10], 3, loss=lambda x: T.log(x), max_iter=10) for i, info in enumerate(rnn.iter_fit(X)): if i >= 10: break
def test_slstm_iter_fit(): X = np.random.standard_normal((10, 5, 2)).astype(theano.config.floatX) Z = np.random.standard_normal((10, 5, 3)).astype(theano.config.floatX) X, Z = theano_floatx(X, Z) rnn = SupervisedLstmRnn(2, [10], 3, hidden_transfers=['sigmoid'], max_iter=10) for i, info in enumerate(rnn.iter_fit(X, Z)): if i >= 10: break
def test_sparse_filtering_iter_fit(): X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) sf = SparseFiltering(2, 10, max_iter=10) for i, info in enumerate(sf.iter_fit(X)): if i >= 10: break
def test_slstm_predict(): X = np.random.standard_normal((10, 5, 2)).astype(theano.config.floatX) X, = theano_floatx(X) rnn = SupervisedLstmRnn(2, [10], 3, hidden_transfers=['sigmoid'], max_iter=10) rnn.predict(X)
def test_autoencoder(): X = np.random.random((10, 10)) X, = theano_floatx(X) m = autoencoder.AutoEncoder(10, [100], ["tanh"], "identity", "squared", tied_weights=True, max_iter=10) m.fit(X) m.score(X) m.transform(X)
def test_sparse_coding_iter_fit(): raise SkipTest() X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) sf = SparseCoding(2, 7, max_iter=10) for i, info in enumerate(sf.iter_fit(X)): if i >= 10: break
def test_mlp_fit(): X = np.random.standard_normal((10, 2)) Z = np.random.standard_normal((10, 1)) X, Z = theano_floatx(X, Z) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10) mlp.fit(X, Z)
def test_srnn_predict(): X = np.random.standard_normal((10, 5, 2)).astype(theano.config.floatX) X, = theano_floatx(X) rnn = SupervisedRnn(2, [10], 3, hidden_transfers=['tanh'], max_iter=10) rnn.predict(X) rnn = SupervisedRnn(2, [10], 3, hidden_transfers=['tanh'], skip_to_out=True, max_iter=10) rnn.predict(X)
def test_mlp_iter_fit(): X = np.random.standard_normal((10, 2)) Z = np.random.standard_normal((10, 1)) X, Z = theano_floatx(X, Z) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10) for i, info in enumerate(mlp.iter_fit(X, Z)): if i >= 10: break
def test_mlp_fit_with_imp_weight(): X = np.random.standard_normal((10, 2)) Z = np.random.standard_normal((10, 1)) W = np.random.random((10, 1)) > 0.5 X, Z, W = theano_floatx(X, Z, W) mlp = Mlp(2, [10], 1, ['tanh'], 'identity', 'squared', max_iter=10, imp_weight=True) mlp.fit(X, Z, W)
def test_checkpoint_trainer(): # Make model and data for the test. X = np.random.random((100, 10)) Z = np.random.random((100, 2)) X, Z = theano_floatx(X, Z) data = {'train': (X, Z), 'val': (X, Z), 'test': (X, Z)} optimizer = 'rmsprop', {'step_rate': 0.0001} m = mlp.Mlp(10, [2], 2, ['tanh'], 'identity', 'squared', max_iter=10, optimizer=optimizer) # Train the mdoel with a trainer for 2 epochs. t = Trainer(m, data, stop=climin.stops.AfterNIterations(2), pause=climin.stops.always) t.val_key = 'val' t.fit() # Make a copy of the trainer. t2 = copy.deepcopy(t) t2.data = t.data intermediate_pars = t2.model.parameters.data.copy() intermediate_info = t2.current_info.copy() # Train original for 2 more epochs. t.stop = climin.stops.AfterNIterations(4) t.fit() # Check that the snapshot has not changed assert np.all(t2.model.parameters.data == intermediate_pars) final_pars = t.model.parameters.data.copy() final_info = t.current_info.copy() check_infos(intermediate_info, t2.current_info) t2.stop = climin.stops.AfterNIterations(4) t2.fit() check_infos(final_info, t2.current_info) assert np.allclose(final_pars, t2.model.parameters.data) t_pickled = cPickle.dumps(t2) t_unpickled = cPickle.loads(t_pickled) t_unpickled.data = data t.stop = climin.stops.AfterNIterations(4) t_unpickled.fit() assert np.allclose(final_pars, t_unpickled.model.parameters.data, atol=5.e-3)
def test_checkpoint_trainer(): # Make model and data for the test. X = np.random.random((100, 10)) Z = np.random.random((100, 2)) X, Z = theano_floatx(X, Z) data = { 'train': (X, Z), 'val': (X, Z), 'test': (X, Z) } optimizer = 'rmsprop', {'step_rate': 0.0001} m = mlp.Mlp(10, [2], 2, ['tanh'], 'identity', 'squared', max_iter=10, optimizer=optimizer) # Train the mdoel with a trainer for 2 epochs. t = Trainer( m, data, stop=climin.stops.AfterNIterations(2), pause=climin.stops.always) t.val_key = 'val' t.fit() # Make a copy of the trainer. t2 = copy.deepcopy(t) t2.data = t.data intermediate_pars = t2.model.parameters.data.copy() intermediate_info = t2.current_info.copy() # Train original for 2 more epochs. t.stop = climin.stops.AfterNIterations(4) t.fit() # Check that the snapshot has not changed assert np.all(t2.model.parameters.data == intermediate_pars) final_pars = t.model.parameters.data.copy() final_info = t.current_info.copy() check_infos(intermediate_info, t2.current_info) t2.stop = climin.stops.AfterNIterations(4) t2.fit() check_infos(final_info, t2.current_info) assert np.allclose(final_pars, t2.model.parameters.data) t_pickled = cPickle.dumps(t2) t_unpickled = cPickle.loads(t_pickled) t_unpickled.data = data t.stop = climin.stops.AfterNIterations(4) t_unpickled.fit() assert np.allclose( final_pars, t_unpickled.model.parameters.data, atol=5.e-3)
def test_srnn_lstm_fit(): X = np.random.standard_normal((13, 5, 4)).astype(theano.config.floatX) Z = np.random.standard_normal((13, 5, 3)).astype(theano.config.floatX) W = np.random.standard_normal((13, 5, 3)).astype(theano.config.floatX) X, Z, W = theano_floatx(X, Z, W) rnn = SupervisedRnn(4, [10], 3, hidden_transfers=['lstm'], max_iter=2) rnn.fit(X, Z)
def test_rica_iter_fit(): X = np.random.standard_normal((10, 2)) X, = theano_floatx(X) rica = Rica(2, 10, code_transfer='softabs', hidden_transfer='identity', loss='squared', c_ica=0.5, max_iter=10) for i, info in enumerate(rica.iter_fit(X)): if i >= 10: break
def test_deep_fdvae(): X = np.random.random((2, 10)) X, = theano_floatx(X) m = MyFDVAE(95, [20, 30], 4, [15, 25], ['rectifier'] * 2, ['rectifier'] * 2, optimizer='rprop', batch_size=None, max_iter=3)