Beispiel #1
0
    def End2end_Early_stopping(self, numpy_rng, dataset, n_validate, data_name,
                               batch_size, end2end_lr, algo, norm, patience,
                               validation):

        train_X, test_X, actual = dataset
        valid_x = train_X.get_value()[:n_validate]
        train_x = train_X.get_value()[n_validate:]
        #train_x = train_x[:100]

        "for compute tm and vm before optimization process"
        t = theano.shared(numpy.asarray(train_x, dtype=theano.config.floatX),
                          borrow=True)
        v = theano.shared(numpy.asarray(valid_x, dtype=theano.config.floatX),
                          borrow=True)

        "Use downhill for training network"

        opt = downhill.build(algo=algo,
                             params=self.params,
                             loss=self.end2end_cost,
                             inputs=[self.x])

        train = downhill.Dataset(train_x, batch_size=batch_size, rng=numpy_rng)
        valid = downhill.Dataset(valid_x,
                                 batch_size=len(valid_x),
                                 rng=numpy_rng)

        "for monitoring before optimization process"
        stop_ep = 0

        for tm1, vm1 in opt.iterate(
                train,
                valid,
                patience=patience,
                validate_every=validation,
                min_improvement=1e-3,
                #learning_rate =  end2end_lr,
                momentum=0.0,
                nesterov=False):

            stop_ep = stop_ep + 1
            #
            ##            "******* Classification Results after End to End training ******"
            #            if ((stop_ep%1 == 0) and (stop_ep > 0)):
            #                lof,cen,dis,kde,svm05,svm01,ae = self.Compute_AUC_Hidden(train_X, test_X, actual, norm, data_name)
            #                a = [stop_ep, lof, cen, dis, kde, svm05, svm01, ae]
            #            monitor = np.append(monitor, a)

            if (stop_ep >= 1000):
                break

        #Plotting AUC and save to csv file


#        monitor = np.reshape(monitor, (-1,8))
#        Plotting_Monitor(monitor, 0.4, 1.0, data_name, path)
#        np.savetxt(path + data_name + "_monitor_auc.csv", monitor, delimiter=",", fmt='%f' )

        return [stop_ep, vm1['loss'], tm1['loss']]
Beispiel #2
0
    def End2end_Early_stopping(self, numpy_rng, dataset, n_validate, data_name,
                               batch_size, end2end_lr, algo, norm, patience,
                               validation):

        train_X, test_X, actual = dataset
        valid_x = train_X.get_value()[:n_validate]
        train_x = train_X.get_value()[n_validate:]
        "for compute tm and vm before optimization process"

        "Training network by downhill"
        #'adadelta' 'adagrad (default 0.01)' 'adam''esgd' 'nag''rmsprop' 'rprop' 'sgd'
        opt = downhill.build(algo=algo,
                             params=self.params,
                             loss=self.end2end_cost,
                             inputs=[self.x])
        train = downhill.Dataset(train_x, batch_size=batch_size, rng=numpy_rng)
        valid = downhill.Dataset(valid_x,
                                 batch_size=len(valid_x),
                                 rng=numpy_rng)

        "***** Monitor before optimization *****"
        stop_ep = 0
        RE = np.empty([0, 3])

        for tm, vm in opt.iterate(
                train,  # 5, 5, 1e-2, 0.9
                valid,
                patience=patience,  # 10
                validate_every=validation,  # 5
                min_improvement=1e-3,  # 1e-3
                #learning_rate =  end2end_lr,  # 1e-4
                momentum=0.0,
                nesterov=False):
            stop_ep = stop_ep + 1

            re = np.column_stack([stop_ep, vm['loss'], tm['loss']])
            RE = np.append(RE, re)

            if (stop_ep >= 1000):
                break

        RE = np.reshape(RE, (-1, 3))
        Plotting_End2End_RE(RE, stop_ep, 0.0, 0.4, data_name, path)
        np.savetxt(path + data_name + "_training_error1.csv",
                   RE,
                   delimiter=",",
                   fmt='%f')

        np.set_printoptions(precision=6, suppress=True)
        print("\n ", RE[stop_ep - 1])

        return RE[stop_ep - 1]
Beispiel #3
0
 def test_batch_size(self):
     ds = downhill.Dataset([np.random.randn(40, 2)], batch_size=10, rng=4)
     assert len(ds._batches) == 4
     assert ds._batches[0][0].shape == (10, 2)
     assert ds._batches[1][0].shape == (10, 2)
     assert ds._batches[2][0].shape == (10, 2)
     assert ds._batches[3][0].shape == (10, 2)
     ds = downhill.Dataset([np.random.randn(40, 2)], batch_size=11, rng=4)
     assert len(ds._batches) == 4
     assert ds._batches[0][0].shape == (11, 2)
     assert ds._batches[1][0].shape == (11, 2)
     assert ds._batches[2][0].shape == (7, 2)
     assert ds._batches[3][0].shape == (11, 2)
Beispiel #4
0
 def test_batch_size(self):
     ds = downhill.Dataset([np.random.randn(40, 2)], batch_size=10, rng=4)
     assert len(ds._slices) == 4
     assert_size(ds, 0, 10)
     assert_size(ds, 1, 10)
     assert_size(ds, 2, 10)
     assert_size(ds, 3, 10)
     ds = downhill.Dataset([np.random.randn(40, 2)], batch_size=11, rng=4)
     assert len(ds._slices) == 4
     assert_size(ds, 0, 11)
     assert_size(ds, 1, 11)
     assert_size(ds, 2, 7)
     assert_size(ds, 3, 11)
Beispiel #5
0
    def pretrain_Early_stopping(self, numpy_rng, train_set, n_validate,
                                data_name, batch_size, pre_lr, corruptions):

        RE = np.empty([10000, self.n_layers])
        stop_epoch = np.empty([self.n_layers])

        for i in range(self.n_layers):
            cost, updates = self.dA_layers[i].get_cost_updates(
                corruptions[i], pre_lr)
            if (i == 0):
                train_x1 = train_set.get_value()
            else:
                train_x1 = self.get_hidden_i(train_set, i - 1)

            valid_x = train_x1[:n_validate]
            train_x = train_x1[n_validate:]
            # adadelta, 'adagrad (default 0.01)' 'adam''esgd' 'nag''rmsprop' 'rprop' 'sgd'
            opt = downhill.build(algo='sgd',
                                 params=self.dA_layers[i].params,
                                 loss=cost)
            train = downhill.Dataset(train_x,
                                     batch_size=batch_size,
                                     rng=numpy_rng)
            valid = downhill.Dataset(valid_x,
                                     batch_size=len(valid_x),
                                     rng=numpy_rng)

            epoch = 0
            re = np.empty([10000])
            for tm1, vm1 in opt.iterate(
                    train,
                    valid,
                    patience=100,  #100
                    validate_every=5,  #5
                    min_improvement=1e-3,  #4
                    learning_rate=pre_lr,  #1e-2
                    momentum=0.0,
                    nesterov=False):
                re[epoch] = tm1['loss']
                epoch = epoch + 1
                if (epoch == 200):
                    break

            RE[:, i] = re
            stop_epoch[i] = epoch

        print(' + Stopping epoch:', stop_epoch)
        Plotting_Pre_RE1(RE, stop_epoch, self.n_layers, 0.0, 0.1, batch_size,
                         data_name, path)
Beispiel #6
0
    def fit(self, train, entities, relations, param):

        self.n, self.m, self.l, self.k = entities, relations, entities, param.k
        self.setup(param)

        train, inputs = self.minibatch(train, param)
        opt = downhill.build(param.sgd,
                             loss=self.loss_opt,
                             inputs=inputs,
                             monitor_gradients=True)

        train = downhill.Dataset(train, name='train')

        it = 0
        for _ in opt.iterate(train,
                             None,
                             max_updates=param.epoch,
                             validate_every=10,
                             patience=5,
                             max_gradient_norm=1,
                             learning_rate=param.lr):

            it += 1
            if it >= param.epoch:
                break
Beispiel #7
0
	def fit(self, train_triples, valid_triples, hparams, n=0,m=0,l=0, scorer = None):

		#Set input_dimensions:
		if n == 0: #No given dimensions, can be useful for transparent predicton of entities/rels not seen in train
			self.set_dims(train_triples, hparams)
		else:
			self.n, self.m, self.l, self.k = n, m, l, hparams.embedding_size

		#Define the downhill loss corresponding to the input dimensions
		self.setup_params_for_train(train_triples, valid_triples, hparams)
		
		#get the loss inputs:
		train_vals, train_symbs, valid_vals = self.get_loss_args_and_symb_vars(train_triples, valid_triples, hparams)

		opt = downhill.build(hparams.learning_rate_policy, loss=self.loss_to_opt, inputs=train_symbs, monitor_gradients=True)

		train_vals = downhill.Dataset(train_vals, name = 'train')


		#Main SGD loop
		it = 0
		best_valid_mrr = -1
		best_valid_ap = -1
		for tm, vm in opt.iterate(train_vals, None,
				max_updates=hparams.max_iter,
				validate_every=9999999, 				#I take care of the valiation, with validation metrics instead of loss
				patience=9999999,						#Number of tolerated imporvements of validation loss that are inferior to min_improvement
				max_gradient_norm=1,          			# Prevent gradient explosion!
				learning_rate=hparams.learning_rate):


			if it % hparams.valid_scores_every == 0 and scorer is not None:
				if valid_triples is not None:
					logger.info("Validation metrics:")
					res = scorer.compute_scores(self, self.name, hparams, valid_triples)
					cv_res = CV_Results()
					cv_res.add_res(res, self.name, hparams.embedding_size, hparams.lmbda, self.nb_params)


					if scorer.compute_ranking_scores:
						metrics = cv_res.print_MRR_and_hits()
						
						#Early stopping on filtered MRR
						if best_valid_mrr >= metrics[self.name][2]:
							logger.info("Validation filtered MRR decreased, stopping here.")
							break
						else:
							best_valid_mrr = metrics[self.name][2]
					else:
						logger.info("Validation AP: " + str(res.ap))
						#Early stopping on Average Precision
						if best_valid_ap >= res.ap:
							logger.info("Validation AP decreased, stopping here.")
							break
						else:
							best_valid_ap = res.ap

			it += 1
			if it >= hparams.max_iter: #Avoid downhill resetting the parameters when max_iter is reached
				break
Beispiel #8
0
    def test_iteration_size(self):
        def batches_unchanged(previous):
            return all(
                np.allclose(a, b) for a, b in zip(ds._batches, previous))

        ds = downhill.Dataset([np.random.randn(40, 2)],
                              batch_size=5,
                              iteration_size=3)

        previous = list(ds._batches)
        c = sum(1 for _ in ds)
        assert c == 3, 'got {}'.format(c)
        assert ds._index == 3, 'got {}'.format(ds._index)
        assert batches_unchanged(previous)

        previous = list(ds._batches)
        c = sum(1 for _ in ds)
        assert c == 3
        assert ds._index == 6, 'got {}'.format(ds._index)
        assert batches_unchanged(previous)

        previous = list(ds._batches)
        c = sum(1 for _ in ds)
        assert c == 3
        assert ds._index == 1, 'got {}'.format(ds._index)
        assert not batches_unchanged(previous)
Beispiel #9
0
    def create_dataset(self, data, **kwargs):
        '''Create a dataset for this experiment.

        Parameters
        ----------
        data : sequence of ndarray or callable
            The values that you provide for data will be encapsulated inside a
            :class:`Dataset <downhill.Dataset>` instance; see that class for
            documentation on the types of things it needs. In particular, you
            can currently pass in either a list/array/etc. of data, or a
            callable that generates data dynamically.

        Returns
        -------
        data : :class:`Dataset <downhill.Dataset>`
            A dataset capable of providing mini-batches of data to a training
            algorithm.
        '''
        default_axis = 0
        if not callable(data) and not callable(data[0]) and len(
                data[0].shape) == 3:
            default_axis = 1
        name = kwargs.get('name', 'dataset')
        b, i, s = 'batch_size', 'iteration_size', '{}_batches'.format(name)
        return downhill.Dataset(data,
                                name=name,
                                batch_size=kwargs.get(b, 32),
                                iteration_size=kwargs.get(i, kwargs.get(s)),
                                axis=kwargs.get('axis', default_axis))
def dnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):

    # GPU cached data
    _M = theano.shared(M.T.astype(float64))
    dum = Th.vector('dum')

    # Get layer sizes
    K = []
    for i in range(len(W1)):
        K.append([W1[i].shape[0], W2[i].shape[0]])
    K.append([M.T.shape[1], M.T.shape[1]])

    # We have weights to discover, init = 2/(Nin+Nout)
    H = theano.shared(
        sqrt(2. / (K[0][0] + K[0][1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0][0] + K[0][1]).astype(float64))
    fI = InputLayer(shape=(M.T.shape[0], K[0][0] + K[0][1]), input_var=H)

    # Split in two pathways, one for each source's autoencoder
    H1 = (len(W1) + 1) * [None]
    H2 = (len(W1) + 1) * [None]
    H1[0] = SliceLayer(fI, indices=slice(0, K[0][0]), axis=1)
    H2[0] = SliceLayer(fI, indices=slice(K[0][0], K[0][0] + K[0][1]), axis=1)

    # Put the subsequent layers
    for i in range(len(W1)):
        H1[i + 1] = DenseLayer(H1[i],
                               num_units=K[i + 1][0],
                               W=W1[i].astype(float64),
                               nonlinearity=lambda x: psoftplus(x, spb),
                               b=None)
        H2[i + 1] = DenseLayer(H2[i],
                               num_units=K[i + 1][1],
                               W=W2[i].astype(float64),
                               nonlinearity=lambda x: psoftplus(x, spb),
                               b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([H1[-1], H2[-1]])

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M +
                   Ro) + 0 * Th.mean(dum)
    for i in range(len(H1) - 1):
        cost += sp * Th.mean(abs(get_output(H1[i]))) + sp * Th.mean(
            abs(get_output(H2[i])))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([d]).astype(float64), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float64)).T + eps
    _r1 = nget(H1[-1], dum, array([0]).astype(float64)).T
    _r2 = nget(H2[-1], dum, array([0]).astype(float64)).T

    return _r, _r1, _r2, er
def dnn_model(M,
              K=[20, 20],
              hh=.0001,
              ep=5000,
              d=0,
              wsp=0.0001,
              hsp=0,
              spb=3,
              bt=0,
              al='rprop'):

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # Setup the layers
    L = K + [M.T.shape[1]]
    H = len(L) * [None]
    Hd = len(L) * [None]

    # First layer
    H[0] = DenseLayer(I, num_units=K[0], nonlinearity=act, b=None)

    # All the rest
    for k in range(1, len(L)):
        # Optional dropout
        Hd[k - 1] = DropoutLayer(H[k - 1], d)

        # Next layer
        H[k] = DenseLayer(Hd[k - 1], num_units=L[k], nonlinearity=act, b=None)

    # Cost function
    Ro = get_output(H[-1]) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M + Ro)
    for k in range(len(L) - 1):
        cost += wsp * Th.mean(abs(H[k].W)) + hsp * Th.mean(get_output(H[k]))

    # Train it using Lasagne
    opt = downhill.build(al,
                         loss=cost,
                         inputs=[_M],
                         params=get_all_params(H[-1]))
    train = downhill.Dataset(M.T.astype(float64), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    h = [nget(H[k], _M, M.T.astype(float64)).T for k in range(len(L))]
    w = [H[k].W.get_value() for k in range(len(L))]

    return h, w, er
def downhill_models(M,
                    P,
                    FE,
                    z,
                    K=20,
                    hh=.001,
                    ep=5000,
                    dp=0,
                    wsp=.001,
                    plt=False):
    from paris.signal import bss_eval

    rng = theano.tensor.shared_randomstreams.RandomStreams(0)

    # Shared variables to use
    x = Th.matrix('x')
    y = theano.shared(M.astype(theano.config.floatX))
    d = theano.shared(float32(dp))

    # Network weights
    W0 = theano.shared(
        sqrt(2. / (K + M.shape[0])) *
        random.randn(K, M.shape[0]).astype(theano.config.floatX))
    W1 = theano.shared(
        sqrt(2. / (K + M.shape[0])) *
        random.randn(M.shape[0], K).astype(theano.config.floatX))

    # First layer is the transform to a non-negative subspace
    h = psoftplus(W0.dot(x), 3.)

    # Dropout
    if dp > 0:
        h *= (1. / (1. - d) * (rng.uniform(size=h.shape) > d).astype(
            theano.config.floatX)).astype(theano.config.floatX)

    # Second layer reconstructs the input
    r = psoftplus(W1.dot(h), 3.)

    # Approximate input using KL-like distance
    cost = Th.mean(y * (Th.log(y + eps) - Th.log(r + eps)) - y +
                   r) + wsp * Th.mean(abs(W1))

    # Make an optimizer and define the training input
    opt = downhill.build('rprop', loss=cost, inputs=[x], params=[W0, W1])
    train = downhill.Dataset(M.astype(theano.config.floatX), batch_size=0)

    # Train it
    downhill_train(opt, train, hh, ep, None)

    # Get approximation
    d = 0
    _, _r = theano.function(inputs=[x], outputs=[h, r],
                            updates=[])(M.astype(theano.config.floatX))
    o = FE.ife(_r, P)
    sxr = bss_eval(o, 0, array([z]))

    return W1.get_value(), sxr
def build_model(algo):
    loss_value = []
    W1.set_value(W1_val)
    b1.set_value(b1_val)
    W2.set_value(W2_val)
    b2.set_value(b2_val)
    opt = downhill.build(algo, loss=loss)
    train = downhill.Dataset([train_X[:-1000], train_y_onehot[:-1000]],
                             batch_size=1,
                             iteration_size=1)
    valid = downhill.Dataset([train_X[-1000:], train_y_onehot[-1000:]])
    iterations = 0
    for tm, vm in opt.iterate(train, valid, patience=1000):
        iterations += 1
        loss_value.append(vm['loss'])
        if iterations > 1000:
            break
    return loss_value
def cnn_model(M,
              K=20,
              T=1,
              hh=.0001,
              ep=5000,
              d=0,
              hsp=0.0001,
              wsp=0,
              spb=3,
              bt=0,
              al='rprop'):
    # Facilitate reasonable convolutions core
    theano.config.dnn.conv.algo_fwd = 'fft_tiling'
    theano.config.dnn.conv.algo_bwd_filter = 'none'
    theano.config.dnn.conv.algo_bwd_data = 'none'

    # Reformat input data
    M3 = reshape(M.astype(float32), (1, M.shape[0], M.shape[1]))

    # Copy key variables to GPU
    _M = Th.tensor3('_M')

    # Input and forward transform
    I = InputLayer(shape=M3.shape, input_var=_M)

    # First layer is the transform to a non-negative subspace
    H = Conv1DLayer(I,
                    filter_size=T,
                    num_filters=K,
                    pad='same',
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Upper layer is the synthesizer
    R = Conv1DLayer(H,
                    filter_size=T,
                    num_filters=M.shape[0],
                    pad='same',
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro) \
      + hsp*Th.mean( get_output( H))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M3, batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation and hidden state
    _r = squeeze(nget(R, _M, M3))
    _h = squeeze(nget(H, _M, M3))

    return _r, R.W.get_value(), er, _h
Beispiel #15
0
 def create_dataset(data, **kwargs):
     name = kwargs.get('name', 'dataset')
     s = '{}_batches'.format(name)
     return downhill.Dataset(data,
                             name=name,
                             batch_size=kwargs.get('batch_size', 32),
                             iteration_size=kwargs.get(
                                 'iteration_size', kwargs.get(s)),
                             axis=kwargs.get('axis', 0),
                             rng=kwargs['rng'])
Beispiel #16
0
 def test_shared(self):
     x = theano.shared(np.random.randn(40, 2))
     ds = downhill.Dataset([x], batch_size=10, rng=4)
     assert len(ds._slices) == 4
     assert_size(ds, 0, 10)
     assert_size(ds, 1, 10)
     assert_size(ds, 2, 10)
     assert_size(ds, 3, 10)
     f = list(ds)[0][0]
     assert isinstance(f, TT.TensorVariable), type(f)
Beispiel #17
0
 def test_sparse_csr(self):
     import scipy.sparse as ss
     x = ss.csr_matrix(np.random.randn(40, 2))
     ds = downhill.Dataset([x], batch_size=10, rng=4)
     assert len(ds._slices) == 4
     assert_size(ds, 0, 10)
     assert_size(ds, 1, 10)
     assert_size(ds, 2, 10)
     assert_size(ds, 3, 10)
     f = list(ds)[0][0]
     assert isinstance(f, ss.csr.csr_matrix), type(f)
Beispiel #18
0
 def test_pandas(self):
     import pandas as pd
     x = pd.DataFrame(np.random.randn(40, 2))
     ds = downhill.Dataset([x], batch_size=10, rng=4)
     assert len(ds._slices) == 4
     assert_size(ds, 0, 10)
     assert_size(ds, 1, 10)
     assert_size(ds, 2, 10)
     assert_size(ds, 3, 10)
     f = list(ds)[0][0]
     assert isinstance(f, pd.DataFrame), type(f)
def lasagne_models(M,
                   P,
                   FE,
                   z,
                   K=20,
                   hh=.0001,
                   ep=5000,
                   d=0,
                   wsp=0.0001,
                   plt=True):
    from paris.signal import bss_eval

    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=M.T.shape, input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I,
                    num_units=K,
                    nonlinearity=lambda x: psoftplus(x, 3.),
                    b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute source modulator
    R = DenseLayer(H,
                   num_units=M.T.shape[1],
                   nonlinearity=lambda x: psoftplus(x, 3.),
                   b=None)

    # Cost function
    cost = (_M*(Th.log(_M+eps) - Th.log( get_output( R)+eps)) - _M + get_output( R)).mean() \
       + wsp*Th.mean( abs( R.W))

    # Train it using Lasagne
    opt = downhill.build('rprop',
                         loss=cost,
                         inputs=[_M],
                         params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)[-1]

    # Get approximation
    _r = nget(R, _M, M.T.astype(float32)).T
    _h = nget(H, _M, M.T.astype(float32)).T
    o = FE.ife(_r, P)
    sxr = bss_eval(o, 0, array([z]))

    return R, sxr
Beispiel #20
0
    def test_callable_length(self):
        class Batches:
            called = 0

            def __call__(self):
                self.called += 1
                return 'hello'

            def __len__(self):
                return 10

        batches = Batches()
        ds = downhill.Dataset(batches, iteration_size=10)
        assert list(ds) == ['hello'] * 10
        assert batches.called == 10
Beispiel #21
0
 def test_minimize(self):
     x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x')
     data = downhill.Dataset(np.zeros((1, 1), 'f'), batch_size=1)
     data._slices = [[]]
     downhill.minimize(
         (100 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2).sum(),
         data,
         algo='nag',
         learning_rate=0.001,
         momentum=0.9,
         patience=1,
         min_improvement=0.1,
         max_gradient_norm=1,
     )
     assert np.allclose(x.get_value(), [1, 1]), x.get_value()
def rnn_model(M,
              K=20,
              hh=.0001,
              ep=5000,
              d=0,
              wsp=0.0001,
              hsp=0,
              spb=3,
              bt=0,
              al='rmsprop',
              t=5):
    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I,
                    num_units=K,
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute output
    R = RecurrentLayer(H,
                       num_units=M.T.shape[1],
                       nonlinearity=lambda x: psoftplus(x, spb),
                       gradient_steps=t,
                       b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro)  \
      + hsp*Th.mean( get_output( H0))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float32), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    _r = nget(R, _M, M.T.astype(float32)).T
    _h = nget(H, _M, M.T.astype(float32)).T

    return _r, (R.W_in_to_hid.get_value(), R.W_hid_to_hid.get_value()), er, _h
def nn_model(M,
             K=20,
             hh=.0001,
             ep=5000,
             d=0,
             wsp=0.0001,
             hsp=0,
             spb=3,
             bt=0,
             al='rprop'):

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I, num_units=K, nonlinearity=act, b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute output
    R = DenseLayer(H, num_units=M.T.shape[1], nonlinearity=act, b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro)  \
      + wsp*Th.mean( abs( R.W[0])) + hsp*Th.mean( get_output( H0))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float64), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    _r = nget(R, _M, M.T.astype(float64)).T
    _h = nget(H, _M, M.T.astype(float64)).T

    return _r, R.W.get_value(), er, _h
y_hat = T.nnet.softmax(z2)
loss_reg = 1. / batch_size * reg_lambda / 2 * (T.sum(T.sqr(W1)) +
                                               T.sum(T.sqr(W2)))
loss = T.nnet.categorical_crossentropy(y_hat, y).mean() + loss_reg
prediction = T.argmax(y_hat, axis=1)
predict = theano.function([X], prediction)

#Store the training and vlidation loss
train_loss = []
validation_loss = []
opt = downhill.build('sgd', loss=loss)
#Set up training and validation dataset splits, use only one example in a batch
#and use only one batch per step/epoc
#Use everything except last 1000 examples for training
train = downhill.Dataset([train_X[:-1000], train_y_onehot[:-1000]],
                         batch_size=batch_size,
                         iteration_size=1)
#Use last 1000 examples for valudation
valid = downhill.Dataset([train_X[-1000:], train_y_onehot[-1000:]])
#SGD
iterations = 0
for tm, vm in opt.iterate(train, valid, patience=10000):
    iterations += 1
    # Record the training and validation loss
    train_loss.append(tm['loss'])
    validation_loss.append(vm['loss'])
    if iterations > 1000:
        break


def build_model(algo):
Beispiel #25
0
def lasagne_separate(M,
                     P,
                     FE,
                     W1,
                     W2,
                     z1,
                     z2,
                     hh=.0001,
                     ep=5000,
                     d=0,
                     wsp=.0001,
                     plt=True):
    # Gt dictionary shapes
    K = [W1.shape[0], W2.shape[0]]

    # GPU cached data
    _M = theano.shared(M.astype(float32))

    # Input is the learned dictionary set
    lW = hstack((W1.T, W2.T)).astype(float32)
    _lW = Th.matrix('_lW')
    fI = InputLayer(shape=lW.shape, input_var=_lW)

    # Split in two paths
    fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Dropout?
    dfW1 = DropoutLayer(fW1, d)
    dfW2 = DropoutLayer(fW2, d)

    N_sequence = 10
    # # Compute source modulators
    # R1 = LSTMLayer(dfW1, N_sequence)
    # R2 = LSTMLayer(dfW2, N_sequence)

    # Bring to standard orientation
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    cost = (
        _M * (Th.log(_M + eps) - Th.log(get_output(R) + eps)) - _M +
        get_output(R)).mean() + wsp * (Th.mean(abs(R1.W)) + Th.mean(abs(R2.W)))

    # Train it using Lasagne
    opt = downhill.build('rprop',
                         loss=cost,
                         inputs=[_lW],
                         params=get_all_params(R))
    train = downhill.Dataset(lW, batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)[-1]

    # Get outputs
    _r = nget(R, _lW, lW) + eps
    _r1 = nget(R1, _lW, lW)
    _r2 = nget(R2, _lW, lW)
    o1 = FE.ife(_r1 * (M / _r), P)
    o2 = FE.ife(_r2 * (M / _r), P)
    sxr = bss_eval(o1, 0, vstack((z1, z2))) + bss_eval(o2, 1, vstack((z1, z2)))

    return o1, o2, (array(sxr[:3]) + array(sxr[3:])) / 2.
def rnn_sep(M,
            W1,
            W2,
            hh=.0001,
            ep=5000,
            d=0,
            sp=.0001,
            spb=3,
            al='rmsprop',
            t=5):
    # Get dictionary shapes
    K = [W1[0].shape[0], W2[0].shape[0]]

    # GPU cached data
    _M = theano.shared(M.T.astype(float32))
    dum = Th.vector('dum')

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0] + K[1]).astype(float32))
    fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H)

    # Split in two pathways
    fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Dropout?
    dfW1 = DropoutLayer(fW1, dum[0])
    dfW2 = DropoutLayer(fW2, dum[0])

    # Compute source modulators using previously learned dictionaries
    R1 = RecurrentLayer(dfW1,
                        num_units=M.T.shape[1],
                        b=None,
                        W_in_to_hid=W1[0].astype(float32),
                        W_hid_to_hid=W1[1].astype(float32),
                        nonlinearity=lambda x: psoftplus(x, spb),
                        gradient_steps=5)
    R2 = RecurrentLayer(dfW2,
                        num_units=M.T.shape[1],
                        b=None,
                        W_in_to_hid=W2[0].astype(float32),
                        W_hid_to_hid=W2[1].astype(float32),
                        nonlinearity=lambda x: psoftplus(x, spb),
                        gradient_steps=5)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    Ro = get_output(R) + eps
    cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \
       + sp*Th.mean( abs( H)) + 0*Th.mean( dum)

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([d]).astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float32)).T + eps
    _r1 = nget(R1, dum, array([0]).astype(float32)).T
    _r2 = nget(R2, dum, array([0]).astype(float32)).T

    return _r, _r1, _r2, er
Beispiel #27
0
 def test_name(self):
     ds = downhill.Dataset([np.random.randn(40, 2)], name='foo')
     assert ds.name == 'foo'
     ds = downhill.Dataset([np.random.randn(40, 2)])
     assert ds.name.startswith('dataset')
     assert ds.name[7:].isdigit()
def cnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):
    # Facilitate reasonable convolutions core
    theano.config.dnn.conv.algo_fwd = 'fft_tiling'
    theano.config.dnn.conv.algo_bwd_filter = 'none'
    theano.config.dnn.conv.algo_bwd_data = 'none'

    # Reformat input data
    M3 = reshape(M.astype(float32), (1, M.shape[0], M.shape[1]))

    # Copy key variables to GPU
    _M = theano.shared(M3.astype(float32))

    # Get dictionary shapes
    K = [W1.shape[1], W2.shape[1]]
    T = W1.shape[2]

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(1, K[0] + K[1], M.T.shape[0]).astype(float32))
    fI = InputLayer(shape=(1, K[0] + K[1], M.T.shape[0]), input_var=H)

    # Split in two pathways
    H1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    H2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Compute source modulators using previously learned convolutional dictionaries
    R1 = Conv1DLayer(H1,
                     filter_size=T,
                     W=W1,
                     num_filters=M.shape[0],
                     pad='same',
                     nonlinearity=lambda x: psoftplus(x, spb),
                     b=None)
    R2 = Conv1DLayer(H2,
                     filter_size=T,
                     W=W2,
                     num_filters=M.shape[0],
                     pad='same',
                     nonlinearity=lambda x: psoftplus(x, spb),
                     b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    dum = Th.vector('dum')
    Ro = get_output(R) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M +
                   Ro) + 0 * Th.mean(dum) + sp * Th.mean(abs(H))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([0]).astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = squeeze(nget(R, dum, array([0]).astype(float32))) + eps
    _r1 = squeeze(nget(R1, dum, array([0]).astype(float32)))
    _r2 = squeeze(nget(R2, dum, array([0]).astype(float32)))

    return _r, _r1, _r2, er
Beispiel #29
0
    def test_callable(self):
        def batches():
            return 'hello'

        ds = downhill.Dataset(batches, iteration_size=10)
        assert list(ds) == ['hello'] * 10
Beispiel #30
0
 def test_rng(self):
     ds = downhill.Dataset([np.random.randn(40, 2)], rng=4)
     assert ds.rng.randint(10) == 7
     ds = downhill.Dataset([np.random.randn(40, 2)],
                           rng=np.random.RandomState(4))
     assert ds.rng.randint(10) == 7