Ejemplo n.º 1
0
	def fit(self, train_triples, valid_triples, hparams, n=0,m=0,l=0, scorer = None):

		#Set input_dimensions:
		if n == 0: #No given dimensions, can be useful for transparent predicton of entities/rels not seen in train
			self.set_dims(train_triples, hparams)
		else:
			self.n, self.m, self.l, self.k = n, m, l, hparams.embedding_size

		#Define the downhill loss corresponding to the input dimensions
		self.setup_params_for_train(train_triples, valid_triples, hparams)
		
		#get the loss inputs:
		train_vals, train_symbs, valid_vals = self.get_loss_args_and_symb_vars(train_triples, valid_triples, hparams)

		opt = downhill.build(hparams.learning_rate_policy, loss=self.loss_to_opt, inputs=train_symbs, monitor_gradients=True)

		train_vals = downhill.Dataset(train_vals, name = 'train')


		#Main SGD loop
		it = 0
		best_valid_mrr = -1
		best_valid_ap = -1
		for tm, vm in opt.iterate(train_vals, None,
				max_updates=hparams.max_iter,
				validate_every=9999999, 				#I take care of the valiation, with validation metrics instead of loss
				patience=9999999,						#Number of tolerated imporvements of validation loss that are inferior to min_improvement
				max_gradient_norm=1,          			# Prevent gradient explosion!
				learning_rate=hparams.learning_rate):


			if it % hparams.valid_scores_every == 0 and scorer is not None:
				if valid_triples is not None:
					logger.info("Validation metrics:")
					res = scorer.compute_scores(self, self.name, hparams, valid_triples)
					cv_res = CV_Results()
					cv_res.add_res(res, self.name, hparams.embedding_size, hparams.lmbda, self.nb_params)


					if scorer.compute_ranking_scores:
						metrics = cv_res.print_MRR_and_hits()
						
						#Early stopping on filtered MRR
						if best_valid_mrr >= metrics[self.name][2]:
							logger.info("Validation filtered MRR decreased, stopping here.")
							break
						else:
							best_valid_mrr = metrics[self.name][2]
					else:
						logger.info("Validation AP: " + str(res.ap))
						#Early stopping on Average Precision
						if best_valid_ap >= res.ap:
							logger.info("Validation AP decreased, stopping here.")
							break
						else:
							best_valid_ap = res.ap

			it += 1
			if it >= hparams.max_iter: #Avoid downhill resetting the parameters when max_iter is reached
				break
Ejemplo n.º 2
0
	def fit(self, train_triples, valid_triples, hparams, n=0,m=0,l=0, scorer = None):

		#Set input_dimensions:
		if n == 0: #No given dimensions, can be useful for transparent predicton of entities/rels not seen in train
			self.set_dims(train_triples, hparams)
		else:
			self.n, self.m, self.l, self.k = n, m, l, hparams.embedding_size

		#Define the downhill loss corresponding to the input dimensions
		self.setup_params_for_train(train_triples, valid_triples, hparams)
		
		#get the loss inputs:
		train_vals, train_symbs, valid_vals = self.get_loss_args_and_symb_vars(train_triples, valid_triples, hparams)

		opt = downhill.build(hparams.learning_rate_policy, loss=self.loss_to_opt, inputs=train_symbs, monitor_gradients=True)

		train_vals = downhill.Dataset(train_vals, name = 'train')


		#Main SGD loop
		it = 0
		best_valid_mrr = -1
		best_valid_ap = -1
		for tm, vm in opt.iterate(train_vals, None,
				max_updates=hparams.max_iter,
				validate_every=9999999, 				#I take care of the valiation, with validation metrics instead of loss
				patience=9999999,						#Number of tolerated imporvements of validation loss that are inferior to min_improvement
				max_gradient_norm=1,          			# Prevent gradient explosion!
				learning_rate=hparams.learning_rate):


			if it % hparams.valid_scores_every == 0 and scorer is not None:
				if valid_triples is not None:
					logger.info("Validation metrics:")
					res = scorer.compute_scores(self, self.name, hparams, valid_triples)
					cv_res = CV_Results()
					cv_res.add_res(res, self.name, hparams.embedding_size, hparams.lmbda, self.nb_params)


					if scorer.compute_ranking_scores:
						metrics = cv_res.print_MRR_and_hits()
						
						#Early stopping on filtered MRR
						if best_valid_mrr >= metrics[self.name][2]:
							logger.info("Validation filtered MRR decreased, stopping here.")
							break
						else:
							best_valid_mrr = metrics[self.name][2]
					else:
						logger.info("Validation AP: " + str(res.ap))
						#Early stopping on Average Precision
						if best_valid_ap >= res.ap:
							logger.info("Validation AP decreased, stopping here.")
							break
						else:
							best_valid_ap = res.ap

			it += 1
			if it >= hparams.max_iter: #Avoid downhill resetting the parameters when max_iter is reached
				break
Ejemplo n.º 3
0
    def fit(self, train, entities, relations, param):

        self.n, self.m, self.l, self.k = entities, relations, entities, param.k
        self.setup(param)

        train, inputs = self.minibatch(train, param)
        opt = downhill.build(param.sgd,
                             loss=self.loss_opt,
                             inputs=inputs,
                             monitor_gradients=True)

        train = downhill.Dataset(train, name='train')

        it = 0
        for _ in opt.iterate(train,
                             None,
                             max_updates=param.epoch,
                             validate_every=10,
                             patience=5,
                             max_gradient_norm=1,
                             learning_rate=param.lr):

            it += 1
            if it >= param.epoch:
                break
def dnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):

    # GPU cached data
    _M = theano.shared(M.T.astype(float64))
    dum = Th.vector('dum')

    # Get layer sizes
    K = []
    for i in range(len(W1)):
        K.append([W1[i].shape[0], W2[i].shape[0]])
    K.append([M.T.shape[1], M.T.shape[1]])

    # We have weights to discover, init = 2/(Nin+Nout)
    H = theano.shared(
        sqrt(2. / (K[0][0] + K[0][1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0][0] + K[0][1]).astype(float64))
    fI = InputLayer(shape=(M.T.shape[0], K[0][0] + K[0][1]), input_var=H)

    # Split in two pathways, one for each source's autoencoder
    H1 = (len(W1) + 1) * [None]
    H2 = (len(W1) + 1) * [None]
    H1[0] = SliceLayer(fI, indices=slice(0, K[0][0]), axis=1)
    H2[0] = SliceLayer(fI, indices=slice(K[0][0], K[0][0] + K[0][1]), axis=1)

    # Put the subsequent layers
    for i in range(len(W1)):
        H1[i + 1] = DenseLayer(H1[i],
                               num_units=K[i + 1][0],
                               W=W1[i].astype(float64),
                               nonlinearity=lambda x: psoftplus(x, spb),
                               b=None)
        H2[i + 1] = DenseLayer(H2[i],
                               num_units=K[i + 1][1],
                               W=W2[i].astype(float64),
                               nonlinearity=lambda x: psoftplus(x, spb),
                               b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([H1[-1], H2[-1]])

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M +
                   Ro) + 0 * Th.mean(dum)
    for i in range(len(H1) - 1):
        cost += sp * Th.mean(abs(get_output(H1[i]))) + sp * Th.mean(
            abs(get_output(H2[i])))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([d]).astype(float64), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float64)).T + eps
    _r1 = nget(H1[-1], dum, array([0]).astype(float64)).T
    _r2 = nget(H2[-1], dum, array([0]).astype(float64)).T

    return _r, _r1, _r2, er
Ejemplo n.º 5
0
    def End2end_Early_stopping(self, numpy_rng, dataset, n_validate, data_name,
                               batch_size, end2end_lr, algo, norm, patience,
                               validation):

        train_X, test_X, actual = dataset
        valid_x = train_X.get_value()[:n_validate]
        train_x = train_X.get_value()[n_validate:]
        #train_x = train_x[:100]

        "for compute tm and vm before optimization process"
        t = theano.shared(numpy.asarray(train_x, dtype=theano.config.floatX),
                          borrow=True)
        v = theano.shared(numpy.asarray(valid_x, dtype=theano.config.floatX),
                          borrow=True)

        "Use downhill for training network"

        opt = downhill.build(algo=algo,
                             params=self.params,
                             loss=self.end2end_cost,
                             inputs=[self.x])

        train = downhill.Dataset(train_x, batch_size=batch_size, rng=numpy_rng)
        valid = downhill.Dataset(valid_x,
                                 batch_size=len(valid_x),
                                 rng=numpy_rng)

        "for monitoring before optimization process"
        stop_ep = 0

        for tm1, vm1 in opt.iterate(
                train,
                valid,
                patience=patience,
                validate_every=validation,
                min_improvement=1e-3,
                #learning_rate =  end2end_lr,
                momentum=0.0,
                nesterov=False):

            stop_ep = stop_ep + 1
            #
            ##            "******* Classification Results after End to End training ******"
            #            if ((stop_ep%1 == 0) and (stop_ep > 0)):
            #                lof,cen,dis,kde,svm05,svm01,ae = self.Compute_AUC_Hidden(train_X, test_X, actual, norm, data_name)
            #                a = [stop_ep, lof, cen, dis, kde, svm05, svm01, ae]
            #            monitor = np.append(monitor, a)

            if (stop_ep >= 1000):
                break

        #Plotting AUC and save to csv file


#        monitor = np.reshape(monitor, (-1,8))
#        Plotting_Monitor(monitor, 0.4, 1.0, data_name, path)
#        np.savetxt(path + data_name + "_monitor_auc.csv", monitor, delimiter=",", fmt='%f' )

        return [stop_ep, vm1['loss'], tm1['loss']]
def dnn_model(M,
              K=[20, 20],
              hh=.0001,
              ep=5000,
              d=0,
              wsp=0.0001,
              hsp=0,
              spb=3,
              bt=0,
              al='rprop'):

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # Setup the layers
    L = K + [M.T.shape[1]]
    H = len(L) * [None]
    Hd = len(L) * [None]

    # First layer
    H[0] = DenseLayer(I, num_units=K[0], nonlinearity=act, b=None)

    # All the rest
    for k in range(1, len(L)):
        # Optional dropout
        Hd[k - 1] = DropoutLayer(H[k - 1], d)

        # Next layer
        H[k] = DenseLayer(Hd[k - 1], num_units=L[k], nonlinearity=act, b=None)

    # Cost function
    Ro = get_output(H[-1]) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M + Ro)
    for k in range(len(L) - 1):
        cost += wsp * Th.mean(abs(H[k].W)) + hsp * Th.mean(get_output(H[k]))

    # Train it using Lasagne
    opt = downhill.build(al,
                         loss=cost,
                         inputs=[_M],
                         params=get_all_params(H[-1]))
    train = downhill.Dataset(M.T.astype(float64), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    h = [nget(H[k], _M, M.T.astype(float64)).T for k in range(len(L))]
    w = [H[k].W.get_value() for k in range(len(L))]

    return h, w, er
Ejemplo n.º 7
0
def build_rosen(algo, name=True, monitor_gradients=False):
    x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x' if name else None)
    return downhill.build(
        algo,
        loss=(100 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2).sum(),
        monitors=[('x', x[:-1].sum()), ('y', x[1:].sum())],
        monitor_gradients=monitor_gradients,
    ), [[]]
Ejemplo n.º 8
0
def build_rosen(algo, name=True, monitor_gradients=False):
    x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x' if name else None)
    return downhill.build(
        algo,
        loss=(100 * (x[1:] - x[:-1] ** 2) ** 2 + (1 - x[:-1]) ** 2).sum(),
        monitors=[('x', x[:-1].sum()), ('y', x[1:].sum())],
        monitor_gradients=monitor_gradients,
    ), [[]]
def downhill_models(M,
                    P,
                    FE,
                    z,
                    K=20,
                    hh=.001,
                    ep=5000,
                    dp=0,
                    wsp=.001,
                    plt=False):
    from paris.signal import bss_eval

    rng = theano.tensor.shared_randomstreams.RandomStreams(0)

    # Shared variables to use
    x = Th.matrix('x')
    y = theano.shared(M.astype(theano.config.floatX))
    d = theano.shared(float32(dp))

    # Network weights
    W0 = theano.shared(
        sqrt(2. / (K + M.shape[0])) *
        random.randn(K, M.shape[0]).astype(theano.config.floatX))
    W1 = theano.shared(
        sqrt(2. / (K + M.shape[0])) *
        random.randn(M.shape[0], K).astype(theano.config.floatX))

    # First layer is the transform to a non-negative subspace
    h = psoftplus(W0.dot(x), 3.)

    # Dropout
    if dp > 0:
        h *= (1. / (1. - d) * (rng.uniform(size=h.shape) > d).astype(
            theano.config.floatX)).astype(theano.config.floatX)

    # Second layer reconstructs the input
    r = psoftplus(W1.dot(h), 3.)

    # Approximate input using KL-like distance
    cost = Th.mean(y * (Th.log(y + eps) - Th.log(r + eps)) - y +
                   r) + wsp * Th.mean(abs(W1))

    # Make an optimizer and define the training input
    opt = downhill.build('rprop', loss=cost, inputs=[x], params=[W0, W1])
    train = downhill.Dataset(M.astype(theano.config.floatX), batch_size=0)

    # Train it
    downhill_train(opt, train, hh, ep, None)

    # Get approximation
    d = 0
    _, _r = theano.function(inputs=[x], outputs=[h, r],
                            updates=[])(M.astype(theano.config.floatX))
    o = FE.ife(_r, P)
    sxr = bss_eval(o, 0, array([z]))

    return W1.get_value(), sxr
Ejemplo n.º 10
0
def build_rosen(algo):
    x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x')
    return downhill.build(
        algo,
        loss=(100 * (x[1:] - x[:-1] ** 2) ** 2 + (1 - x[:-1]) ** 2).sum(),
        params=[x],
        inputs=[],
        updates=(),
        monitors=[('x', x[:-1].sum()), ('y', x[1:].sum())]), [[]]
def cnn_model(M,
              K=20,
              T=1,
              hh=.0001,
              ep=5000,
              d=0,
              hsp=0.0001,
              wsp=0,
              spb=3,
              bt=0,
              al='rprop'):
    # Facilitate reasonable convolutions core
    theano.config.dnn.conv.algo_fwd = 'fft_tiling'
    theano.config.dnn.conv.algo_bwd_filter = 'none'
    theano.config.dnn.conv.algo_bwd_data = 'none'

    # Reformat input data
    M3 = reshape(M.astype(float32), (1, M.shape[0], M.shape[1]))

    # Copy key variables to GPU
    _M = Th.tensor3('_M')

    # Input and forward transform
    I = InputLayer(shape=M3.shape, input_var=_M)

    # First layer is the transform to a non-negative subspace
    H = Conv1DLayer(I,
                    filter_size=T,
                    num_filters=K,
                    pad='same',
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Upper layer is the synthesizer
    R = Conv1DLayer(H,
                    filter_size=T,
                    num_filters=M.shape[0],
                    pad='same',
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro) \
      + hsp*Th.mean( get_output( H))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M3, batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation and hidden state
    _r = squeeze(nget(R, _M, M3))
    _h = squeeze(nget(H, _M, M3))

    return _r, R.W.get_value(), er, _h
Ejemplo n.º 12
0
    def End2end_Early_stopping(self, numpy_rng, dataset, n_validate, data_name,
                               batch_size, end2end_lr, algo, norm, patience,
                               validation):

        train_X, test_X, actual = dataset
        valid_x = train_X.get_value()[:n_validate]
        train_x = train_X.get_value()[n_validate:]
        "for compute tm and vm before optimization process"

        "Training network by downhill"
        #'adadelta' 'adagrad (default 0.01)' 'adam''esgd' 'nag''rmsprop' 'rprop' 'sgd'
        opt = downhill.build(algo=algo,
                             params=self.params,
                             loss=self.end2end_cost,
                             inputs=[self.x])
        train = downhill.Dataset(train_x, batch_size=batch_size, rng=numpy_rng)
        valid = downhill.Dataset(valid_x,
                                 batch_size=len(valid_x),
                                 rng=numpy_rng)

        "***** Monitor before optimization *****"
        stop_ep = 0
        RE = np.empty([0, 3])

        for tm, vm in opt.iterate(
                train,  # 5, 5, 1e-2, 0.9
                valid,
                patience=patience,  # 10
                validate_every=validation,  # 5
                min_improvement=1e-3,  # 1e-3
                #learning_rate =  end2end_lr,  # 1e-4
                momentum=0.0,
                nesterov=False):
            stop_ep = stop_ep + 1

            re = np.column_stack([stop_ep, vm['loss'], tm['loss']])
            RE = np.append(RE, re)

            if (stop_ep >= 1000):
                break

        RE = np.reshape(RE, (-1, 3))
        Plotting_End2End_RE(RE, stop_ep, 0.0, 0.4, data_name, path)
        np.savetxt(path + data_name + "_training_error1.csv",
                   RE,
                   delimiter=",",
                   fmt='%f')

        np.set_printoptions(precision=6, suppress=True)
        print("\n ", RE[stop_ep - 1])

        return RE[stop_ep - 1]
def lasagne_models(M,
                   P,
                   FE,
                   z,
                   K=20,
                   hh=.0001,
                   ep=5000,
                   d=0,
                   wsp=0.0001,
                   plt=True):
    from paris.signal import bss_eval

    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=M.T.shape, input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I,
                    num_units=K,
                    nonlinearity=lambda x: psoftplus(x, 3.),
                    b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute source modulator
    R = DenseLayer(H,
                   num_units=M.T.shape[1],
                   nonlinearity=lambda x: psoftplus(x, 3.),
                   b=None)

    # Cost function
    cost = (_M*(Th.log(_M+eps) - Th.log( get_output( R)+eps)) - _M + get_output( R)).mean() \
       + wsp*Th.mean( abs( R.W))

    # Train it using Lasagne
    opt = downhill.build('rprop',
                         loss=cost,
                         inputs=[_M],
                         params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)[-1]

    # Get approximation
    _r = nget(R, _M, M.T.astype(float32)).T
    _h = nget(H, _M, M.T.astype(float32)).T
    o = FE.ife(_r, P)
    sxr = bss_eval(o, 0, array([z]))

    return R, sxr
Ejemplo n.º 14
0
    def pretrain_Early_stopping(self, numpy_rng, train_set, n_validate,
                                data_name, batch_size, pre_lr, corruptions):

        RE = np.empty([10000, self.n_layers])
        stop_epoch = np.empty([self.n_layers])

        for i in range(self.n_layers):
            cost, updates = self.dA_layers[i].get_cost_updates(
                corruptions[i], pre_lr)
            if (i == 0):
                train_x1 = train_set.get_value()
            else:
                train_x1 = self.get_hidden_i(train_set, i - 1)

            valid_x = train_x1[:n_validate]
            train_x = train_x1[n_validate:]
            # adadelta, 'adagrad (default 0.01)' 'adam''esgd' 'nag''rmsprop' 'rprop' 'sgd'
            opt = downhill.build(algo='sgd',
                                 params=self.dA_layers[i].params,
                                 loss=cost)
            train = downhill.Dataset(train_x,
                                     batch_size=batch_size,
                                     rng=numpy_rng)
            valid = downhill.Dataset(valid_x,
                                     batch_size=len(valid_x),
                                     rng=numpy_rng)

            epoch = 0
            re = np.empty([10000])
            for tm1, vm1 in opt.iterate(
                    train,
                    valid,
                    patience=100,  #100
                    validate_every=5,  #5
                    min_improvement=1e-3,  #4
                    learning_rate=pre_lr,  #1e-2
                    momentum=0.0,
                    nesterov=False):
                re[epoch] = tm1['loss']
                epoch = epoch + 1
                if (epoch == 200):
                    break

            RE[:, i] = re
            stop_epoch[i] = epoch

        print(' + Stopping epoch:', stop_epoch)
        Plotting_Pre_RE1(RE, stop_epoch, self.n_layers, 0.0, 0.1, batch_size,
                         data_name, path)
def nn_model(M,
             K=20,
             hh=.0001,
             ep=5000,
             d=0,
             wsp=0.0001,
             hsp=0,
             spb=3,
             bt=0,
             al='rprop'):

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I, num_units=K, nonlinearity=act, b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute output
    R = DenseLayer(H, num_units=M.T.shape[1], nonlinearity=act, b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro)  \
      + wsp*Th.mean( abs( R.W[0])) + hsp*Th.mean( get_output( H0))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float64), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    _r = nget(R, _M, M.T.astype(float64)).T
    _h = nget(H, _M, M.T.astype(float64)).T

    return _r, R.W.get_value(), er, _h
def rnn_model(M,
              K=20,
              hh=.0001,
              ep=5000,
              d=0,
              wsp=0.0001,
              hsp=0,
              spb=3,
              bt=0,
              al='rmsprop',
              t=5):
    # Copy key variables to GPU
    _M = Th.matrix('_M')

    # Input and forward transform
    I = InputLayer(shape=(None, M.shape[0]), input_var=_M)

    # First layer is the transform to a non-negative subspace
    H0 = DenseLayer(I,
                    num_units=K,
                    nonlinearity=lambda x: psoftplus(x, spb),
                    b=None)

    # Optional dropout
    H = DropoutLayer(H0, d)

    # Compute output
    R = RecurrentLayer(H,
                       num_units=M.T.shape[1],
                       nonlinearity=lambda x: psoftplus(x, spb),
                       gradient_steps=t,
                       b=None)

    # Cost function
    Ro = get_output(R) + eps
    cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro)  \
      + hsp*Th.mean( get_output( H0))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R))
    train = downhill.Dataset(M.T.astype(float32), batch_size=bt)
    er = downhill_train(opt, train, hh, ep, None)

    # Get approximation
    _r = nget(R, _M, M.T.astype(float32)).T
    _h = nget(H, _M, M.T.astype(float32)).T

    return _r, (R.W_in_to_hid.get_value(), R.W_hid_to_hid.get_value()), er, _h
Ejemplo n.º 17
0
def build_factor(algo):
    a = np.arange(1000).reshape((100, 10)).astype('f')
    b = 0.1 + np.zeros((10, 100), 'f')

    x = TT.matrix('x')
    u = theano.shared(a, name='u')
    v = theano.shared(0.1 + b, name='v')
    return downhill.build(
        algo,
        loss=TT.sum(TT.sqr(x - TT.dot(u, v))),
        monitors=[
            ('u<1', (u < 1).mean()),
            ('u<-1', (u < -1).mean()),
            ('v<1', (v < 1).mean()),
            ('v<-1', (v < -1).mean()),
        ]), [[np.dot(a, b) + np.random.randn(100, 100).astype('f')]
             for _ in range(10)]
Ejemplo n.º 18
0
def build_factor(algo):
    a = np.arange(1000).reshape((100, 10)).astype('f')
    b = 0.1 + np.zeros((10, 100), 'f')

    x = TT.matrix('x')
    u = theano.shared(a, name='u')
    v = theano.shared(0.1 + b, name='v')
    return downhill.build(
        algo,
        loss=TT.sum(TT.sqr(x - TT.dot(u, v))),
        monitors=[
            ('u<1', (u < 1).mean()),
            ('u<-1', (u < -1).mean()),
            ('v<1', (v < 1).mean()),
            ('v<-1', (v < -1).mean()),
        ]), [[np.dot(a, b) + np.random.randn(100, 100).astype('f')]
             for _ in range(10)]
def build_model(algo):
    loss_value = []
    W1.set_value(W1_val)
    b1.set_value(b1_val)
    W2.set_value(W2_val)
    b2.set_value(b2_val)
    opt = downhill.build(algo, loss=loss)
    train = downhill.Dataset([train_X[:-1000], train_y_onehot[:-1000]], batch_size=1,
    iteration_size=1)
    valid = downhill.Dataset([train_X[-1000:], train_y_onehot[-1000:]])
    iterations = 0
    for tm, vm in opt.iterate(train, valid, patience=1000):
        iterations += 1
        loss_value.append(vm['loss'])
        if iterations > 1000:
            break
    return loss_value
def build_model(algo):
    loss_value = []
    W1.set_value(W1_val)
    b1.set_value(b1_val)
    W2.set_value(W2_val)
    b2.set_value(b2_val)
    opt = downhill.build(algo, loss=loss)
    train = downhill.Dataset([train_X[:-1000], train_y_onehot[:-1000]],
                             batch_size=1,
                             iteration_size=1)
    valid = downhill.Dataset([train_X[-1000:], train_y_onehot[-1000:]])
    iterations = 0
    for tm, vm in opt.iterate(train, valid, patience=1000):
        iterations += 1
        loss_value.append(vm['loss'])
        if iterations > 1000:
            break
    return loss_value
Ejemplo n.º 21
0
def build(algo, init):
    '''Build and return an optimizer for the rosenbrock function.

    In downhill, an optimizer can be constructed using the build() top-level
    function. This function requires several Theano quantities such as the loss
    being optimized and the parameters to update during optimization.
    '''
    x = theano.shared(np.array(init, 'f'), name='x')
    n = 0.1 * RandomStreams().normal((len(init) - 1, ))
    monitors = []
    if len(init) == 2:
        # this gives us access to the x and y locations during optimization.
        monitors.extend([('x', x[:-1].sum()), ('y', x[1:].sum())])
    return downhill.build(
        algo,
        loss=(n + 100 * (x[1:] - x[:-1] ** 2) ** 2 + (1 - x[:-1]) ** 2).sum(),
        params=[x],
        monitors=monitors,
        monitor_gradients=True)
Ejemplo n.º 22
0
def build(algo, init):
    '''Build and return an optimizer for the rosenbrock function.

    In downhill, an optimizer can be constructed using the build() top-level
    function. This function requires several Theano quantities such as the loss
    being optimized and the parameters to update during optimization.
    '''
    x = theano.shared(np.array(init, 'f'), name='x')
    monitors = []
    if len(init) == 2:
        # this gives us access to the x and y locations during optimization.
        monitors.extend([('x', x[:-1].sum()), ('y', x[1:].sum())])
    return downhill.build(
        algo,
        loss=(100 * (x[1:] - x[:-1] ** 2) ** 2 + (1 - x[:-1]) ** 2).sum(),
        params=[x],
        inputs=[],
        monitors=monitors,
        monitor_gradients=True)
Ejemplo n.º 23
0
    def itertrain(self, train, valid=None, **kwargs):
        '''Train a model using a training and validation set.

        This method yields a series of monitor values to the caller. After every
        iteration, a pair of monitor dictionaries is generated: one evaluated on
        the training dataset, and another evaluated on the validation dataset.
        The validation monitors might not be updated during every training
        iteration; in this case, the most recent validation monitors will be
        yielded along with the training monitors.

        Parameters
        ----------
        train : :class:`Dataset <theanets.dataset.Dataset>`
            A set of training data for computing updates to model parameters.
        valid : :class:`Dataset <theanets.dataset.Dataset>`
            A set of validation data for computing monitor values and
            determining when the loss has stopped improving.

        Yields
        ------
        training : dict
            A dictionary mapping monitor names to values, evaluated on the
            training dataset.
        validation : dict
            A dictionary containing monitor values evaluated on the validation
            dataset.
        '''
        for monitors in downhill.build(
                algo=self.algo,
                loss=self.network.loss(**kwargs),
                updates=self.network.updates(**kwargs),
                monitors=self.network.monitors(**kwargs),
                inputs=self.network.variables,
                params=self.network.params,
                monitor_gradients=kwargs.get('monitor_gradients', False),
        ).iterate(train, valid=valid, **kwargs):
            yield monitors
Ejemplo n.º 24
0
    def itertrain(self, train, valid=None, **kwargs):
        '''Train a model using a training and validation set.

        This method yields a series of monitor values to the caller. After every
        iteration, a pair of monitor dictionaries is generated: one evaluated on
        the training dataset, and another evaluated on the validation dataset.
        The validation monitors might not be updated during every training
        iteration; in this case, the most recent validation monitors will be
        yielded along with the training monitors.

        Parameters
        ----------
        train : :class:`Dataset <theanets.dataset.Dataset>`
            A set of training data for computing updates to model parameters.
        valid : :class:`Dataset <theanets.dataset.Dataset>`
            A set of validation data for computing monitor values and
            determining when the loss has stopped improving.

        Yields
        ------
        training : dict
            A dictionary mapping monitor names to values, evaluated on the
            training dataset.
        validation : dict
            A dictionary containing monitor values evaluated on the validation
            dataset.
        '''
        for monitors in downhill.build(
                algo=self.algo,
                loss=self.network.loss(**kwargs),
                updates=self.network.updates(**kwargs),
                monitors=self.network.monitors(**kwargs),
                inputs=self.network.variables,
                params=self.network.params,
                monitor_gradients=kwargs.get('monitor_gradients', False),
        ).iterate(train, valid=valid, **kwargs):
            yield monitors
Ejemplo n.º 25
0
import downhill
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import theano

x = theano.shared(np.array([-1, 0], 'f'), name='x')

opt = downhill.build('nag',
                     loss=(100 * (x[1:] - x[:-1]**2)**2 +
                           (1 - x[:-1])**2).sum(),
                     params=[x],
                     inputs=[],
                     monitors=[('x', x[:-1].sum()), ('y', x[1:].sum())],
                     monitor_gradients=True)

xs, ys, loss = [], [], []
for tm, _ in opt.iterate([[]],
                         learning_rate=0.001,
                         momentum=0.95,
                         max_gradient_norm=100):
    xs.append(tm['x'])
    ys.append(tm['y'])
    loss.append(tm['loss'])
    if len(loss) == 300:
        break

ax = plt.axes(projection='3d')

c = '#d62728'
ax.plot(xs,
def downhill_separate(M,
                      P,
                      FE,
                      W1,
                      W2,
                      z1,
                      z2,
                      hh=.001,
                      ep=5000,
                      d=0,
                      wsp=.0001,
                      plt=True):
    from paris.signal import bss_eval

    # Get dictionary sizes
    K = [W1.shape[1], W2.shape[1]]

    # Cache some things
    y = Th.matrix('y')
    w1 = theano.shared(W1.astype(theano.config.floatX), 'w1')
    w2 = theano.shared(W2.astype(theano.config.floatX), 'w2')

    # Activations to learn
    h1 = theano.shared(
        sqrt(2. / (K[0] + M.shape[1])) *
        random.randn(K[0], M.shape[1]).astype(theano.config.floatX))
    h2 = theano.shared(
        sqrt(2. / (K[1] + M.shape[1])) *
        random.randn(K[1], M.shape[1]).astype(theano.config.floatX))

    # Dropout
    if d > 0:
        dw1 = w1 * 1. / (1. - d) * (rng.uniform(size=w1.shape) > d).astype(
            theano.config.floatX)
        dw2 = w2 * 1. / (1. - d) * (rng.uniform(size=w2.shape) > d).astype(
            theano.config.floatX)
    else:
        dw1 = w1
        dw2 = w2

    # Approximate input
    r1 = psoftplus(dw1.dot(h1), 3.)
    r2 = psoftplus(dw2.dot(h2), 3.)
    r = r1 + r2

    # KL-distance to input
    cost = Th.mean( y * (Th.log( y+eps) - Th.log( r+eps)) - y + r) \
       + wsp*(Th.mean( abs( h1)) + Th.mean( abs( h2)))

    # Make it callable and derive updates
    ffwd_f = theano.function(inputs=[], outputs=[r1, r2, h1, h2], updates=[])

    # Make an optimizer and define the inputs
    opt = downhill.build('rprop', loss=cost, inputs=[y], params=[h1, h2])
    train = downhill.Dataset(M.astype(theano.config.floatX), batch_size=0)

    # Train it
    cst = downhill_train(opt, train, hh, ep, None)

    # So what happened?
    d = 0
    _r1, _r2, _h1, _h2 = ffwd_f()
    _r = _r1 + _r2 + eps
    o1 = FE.ife(_r1 * (M / _r), P)
    o2 = FE.ife(_r2 * (M / _r), P)
    sxr = bss_eval(o1, 0, vstack((z1, z2))) + bss_eval(o2, 1, vstack((z1, z2)))

    # Return things of note
    return o1, o2, (array(sxr[:3]) + array(sxr[3:])) / 2.
batch_size = 1
z1 = X.dot(W1) + b1
a1 = T.tanh(z1)
z2 = a1.dot(W2) + b2
y_hat = T.nnet.softmax(z2)
loss_reg = 1. / batch_size * reg_lambda / 2 * (T.sum(T.sqr(W1)) +
                                               T.sum(T.sqr(W2)))
loss = T.nnet.categorical_crossentropy(y_hat, y).mean() + loss_reg

prediction = T.argmax(y_hat, axis=1)
predict = theano.function([X], prediction)

train_loss = []
validation_loss = []

opt = downhill.build('adadelta', loss=loss)
train = downhill.Dataset([train_X[:-1000], train_y_onehot[:-1000]],
                         batch_size=batch_size,
                         iteration_size=1)
valid = downhill.Dataset([train_X[-1000:], train_y_onehot[-1000:]])
iterations = 0
for tm, vm in opt.iterate(train, valid, patience=1000):
    iterations += 1
    train_loss.append(tm['loss'])
    validation_loss.append(vm['loss'])
    if iterations > 5000:
        break

x_min, x_max = train_X[:, 0].min() - 0.5, train_X[:, 0].max() + 0.5
y_min, y_max = train_X[:, 1].min() - 0.5, train_X[:, 1].max() + 0.5
x_mesh, y_mesh = numpy.meshgrid(numpy.arange(x_min, x_max, 0.01),
batch_size = 1
#Our Loss function
z1 = X.dot(W1) + b1
a1 = T.tanh(z1)
z2 = a1.dot(W2) + b2
y_hat = T.nnet.softmax(z2)
loss_reg = 1./batch_size * reg_lambda/2 * (T.sum(T.sqr(W1)) + T.sum(T.sqr(W2)))
loss = T.nnet.categorical_crossentropy(y_hat, y).mean() + loss_reg
prediction = T.argmax(y_hat, axis=1)
predict = theano.function([X], prediction)


#Store the training and vlidation loss
train_loss = []
validation_loss = []
opt = downhill.build('sgd', loss=loss)
#Set up training and validation dataset splits, use only one example in a batch 
#and use only one batch per step/epoc
#Use everything except last 1000 examples for training
train = downhill.Dataset([train_X[:-1000], train_y_onehot[:-1000]], batch_size=batch_size,
iteration_size=1)
#Use last 1000 examples for valudation
valid = downhill.Dataset([train_X[-1000:], train_y_onehot[-1000:]])
#SGD
iterations = 0
for tm, vm in opt.iterate(train, valid, patience=10000):
    iterations += 1
    # Record the training and validation loss
    train_loss.append(tm['loss'])
    validation_loss.append(vm['loss'])
    if iterations > 1000:
def cnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):
    # Facilitate reasonable convolutions core
    theano.config.dnn.conv.algo_fwd = 'fft_tiling'
    theano.config.dnn.conv.algo_bwd_filter = 'none'
    theano.config.dnn.conv.algo_bwd_data = 'none'

    # Reformat input data
    M3 = reshape(M.astype(float32), (1, M.shape[0], M.shape[1]))

    # Copy key variables to GPU
    _M = theano.shared(M3.astype(float32))

    # Get dictionary shapes
    K = [W1.shape[1], W2.shape[1]]
    T = W1.shape[2]

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(1, K[0] + K[1], M.T.shape[0]).astype(float32))
    fI = InputLayer(shape=(1, K[0] + K[1], M.T.shape[0]), input_var=H)

    # Split in two pathways
    H1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    H2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Compute source modulators using previously learned convolutional dictionaries
    R1 = Conv1DLayer(H1,
                     filter_size=T,
                     W=W1,
                     num_filters=M.shape[0],
                     pad='same',
                     nonlinearity=lambda x: psoftplus(x, spb),
                     b=None)
    R2 = Conv1DLayer(H2,
                     filter_size=T,
                     W=W2,
                     num_filters=M.shape[0],
                     pad='same',
                     nonlinearity=lambda x: psoftplus(x, spb),
                     b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    dum = Th.vector('dum')
    Ro = get_output(R) + eps
    cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M +
                   Ro) + 0 * Th.mean(dum) + sp * Th.mean(abs(H))

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([0]).astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = squeeze(nget(R, dum, array([0]).astype(float32))) + eps
    _r1 = squeeze(nget(R1, dum, array([0]).astype(float32)))
    _r2 = squeeze(nget(R2, dum, array([0]).astype(float32)))

    return _r, _r1, _r2, er
Ejemplo n.º 30
0
def build_rosen(algo):
    x = theano.shared(-3 + np.zeros((2, ), 'f'), name='x')
    return downhill.build(
        algo,
        loss=(100 * (x[1:] - x[:-1]**2)**2 + (1 - x[:-1])**2).sum(),
        monitors=[('x', x[:-1].sum()), ('y', x[1:].sum())]), [[]]
def nn_model_ae(x, Kx, learning_rate=.001, ep=5000, dp=0.0, spb=3, al='rprop'):
    # Train NSAE for Source 1
    # Define NMF network

    rng = theano.tensor.shared_randomstreams.RandomStreams(0)

    # Latent dimensions

    def pl():
        clf()
        gcf().set_size_inches(6, 2)
        semilogy(cst)
        grid('on')
        title('Cost: %f, Epoch: %d' % (cst[-1], len(cst)))
        drawnow()

    # Dropout parameters
    d = theano.shared(float64(dp))

    # I/O container
    X = theano.tensor.matrix('X')

    # Weight matrices
    W1x = theano.shared(random.rand(Kx, x.shape[0]).astype(float64))
    W2x = theano.shared(random.rand(x.shape[0], Kx).astype(float64))

    # Get latent variables
    Hx = psoftplus(W1x.dot(X), spb)
    # Hx = act( W1x.dot( X))

    # Dropout
    if dp > 0:
        Hx *= (1. / (1. - d) * (rng.uniform(size=Hx.shape) > d).astype(
            theano.config.floatX)).astype(theano.config.floatX)

    # Get approximation
    Zx = psoftplus(W2x.dot(Hx), spb)
    # Zx = act( W2x.dot( Hx))

    # Low rank reconstruction should match smoothed amplitudes, use sparse W1
    cost = theano.tensor.mean( X * (theano.tensor.log( X+eps) - theano.tensor.log( Zx+eps)) - X + Zx) \
           + 1*theano.tensor.mean( abs( W2x)**2) +0.01*theano.tensor.mean( abs( Hx))

    # Make an optimizer and define the inputs
    opt = downhill.build(al, loss=cost, params=[W1x, W2x], inputs=[X])
    train = downhill.Dataset(x.astype(float64), batch_size=x.shape[0])

    # Train and show me the progress
    cst = []
    lt = time.time()
    for tm, _ in opt.iterate(train,
                             learning_rate=.001,
                             max_updates=ep,
                             patience=ep):
        cst.append(tm['loss'])
        if time.time() - lt > 4:
            pl()
            lt = time.time()
    pl()

    # Show me
    nn_nmf = theano.function(inputs=[X], outputs=[Zx, Hx, W2x], updates=[])
    z, h, w = nn_nmf(x.astype(float64))

    subplot(2, 1, 1)
    imagesc(x**.4)
    title('Input 1')
    subplot(2, 1, 2)
    imagesc(z**.4)
    title('Approximation')
    subplot(2, 2, 3)
    plot(W2x.get_value())
    title('NN bases')
    subplot(2, 2, 4)
    plot(h.T)
    title('Latent representation')
    tight_layout()
    return w, z
Ejemplo n.º 32
0
import climate
import downhill
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import theano

climate.enable_default_logging()

x = theano.shared(np.array([-1, 0], 'f'), name='x')

opt = downhill.build(
    'nag',
    loss=(100 * (x[1:] - x[:-1] ** 2) ** 2 + (1 - x[:-1]) ** 2).sum(),
    params=[x],
    inputs=[],
    monitors=[('x', x[:-1].sum()), ('y', x[1:].sum())],
    monitor_gradients=True)

xs, ys, loss = [], [], []
for tm, _ in opt.iterate([[]],
                         learning_rate=0.001,
                         momentum=0.95,
                         max_gradient_norm=100):
    xs.append(tm['x'])
    ys.append(tm['y'])
    loss.append(tm['loss'])
    if len(loss) == 300:
        break

ax = plt.axes(projection='3d')
Ejemplo n.º 33
0
def train(data_dir='data/smrt/',
          dim_proj=64,
          dim_att=32,
          maxlen=30,
          batch_size=256,
          keep_ratio=1.,
          shuffle_data=True,
          learning_rate=0.001,
          global_steps=50000,
          disp_freq=100,
          save_freq=100,
          test_freq=100,
          saveto_file='params.npz',
          tmsaveto_file='timeparams.npz',
          weight_decay=0.0005,
          sigmasqr = 1,
          tdim = 1.,
          reload_model=True,
          train=True):
    """
    MRSRMTPP model training.
    tdim: scale time down by how many times
    """
    options = locals().copy()
    #savedstep = '0'
    saveto = data_dir + saveto_file
    tmsaveto = data_dir + tmsaveto_file

    # for earlystopping
    best_map = 0
    prev_map = 0.001

    # loads graph
    Gp, Gs, Gi, node_index = data_utilsSMRT.load_graph(data_dir)
    #print nx.info(G)
    options['n_events'] = len(node_index)

    print options

    # creates and initializes shared variables.
    print 'Initializing variables...'
    params = init_params(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(saveto, params)
    tparams = init_tparams(params)

    timeparams = init_timeparams(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(tmsaveto, timeparams)
    timetparams = init_tparams(timeparams)

    # builds MRSRMTPP model
    print 'Building model...'
    model = tpgruSMRT_model.build_model(tparams, timetparams, options)

    print 'Loading test data...'
    test_examples = data_utilsSMRT.load_examples(data_dir,
                                             dataset='test',
                                             node_index=node_index,
                                             maxlen=maxlen,
                                             Gp=Gp,
                                             Gs=Gs,
                                             Gi=Gi)
    test_loader = data_utilsSMRT.Loader(test_examples, options=options)
    print 'Loaded %d test examples' % len(test_examples)

    if train:
        # prepares training data.
        print 'Loading train data...'
        train_examples = data_utilsSMRT.load_examples(data_dir,
                                                  dataset='train',
                                                  keep_ratio=options[
                                                      'keep_ratio'],
                                                  node_index=node_index,
                                                  maxlen=maxlen,
                                                  Gp=Gp,
                                                  Gs=Gs,
                                                  Gi=Gi)
        train_loader = data_utilsSMRT.Loader(train_examples, options=options)
        print 'Loaded %d training examples.' % len(train_examples)

        print 'Loading valid data...'
        valid_examples = data_utilsSMRT.load_examples(data_dir,
                                                  dataset='valid',
                                                  keep_ratio=options[
                                                      'keep_ratio'],
                                                  node_index=node_index,
                                                  maxlen=maxlen,
                                                  Gp=Gp)
        valid_loader = data_utilsSMRT.Loader(valid_examples, options=options)
        print 'Loaded %d validation examples.' % len(valid_examples)

        # compiles updates.
        optimizer = downhill.build(algo='adam',
                                   loss=model['cost'],
                                   params=tparams.values(),
                                   inputs=model['data'])

        updates = optimizer.get_updates(max_gradient_elem=5.,
                                        learning_rate=learning_rate)

        f_update = theano.function(model['data'],
                                   model['cost'],
                                   updates=list(updates))

        toptimizer = downhill.build(algo='adam',
                                   loss=model['timecost'],
                                   params=timetparams.values(),
                                   inputs=model['timedata'])

        tupdates = toptimizer.get_updates(max_gradient_elem=5.,
                                        learning_rate=0.0001)

        f_t_update = theano.function(model['timedata'],
                                   model['timecost'],
                                   updates=list(tupdates))

        # training loop.
        start_time = timeit.default_timer()

        n_examples = len(train_examples)
        batches_per_epoch = n_examples // options['batch_size'] + 1
        n_epochs = global_steps // batches_per_epoch + 1

        global_step = 0
        #cost_history = []
        for _ in range(n_epochs):
            for _ in range(batches_per_epoch):
                batch_data = train_loader()
                cost = f_update(*(batch_data[:-3]+(batch_data[-2],)))
                #cost_history += [cost]
                timecost = f_t_update(*(batch_data[:-2]+(batch_data[-1],)))

                if global_step % disp_freq == 0:
                    print 'global step %d, cost: %f' % (global_step, cost)
                    print 'timecost: %f' % (timecost)

                # dump model parameters.
                if global_step % save_freq == 0:
                    eva_map = evaluate_eval(model['f_prob'], valid_loader, model['f_tprob'], options['tdim'])
                    if (eva_map > best_map):
                        best_map = eva_map
                        params = unzip(tparams)
                        np.savez(data_dir + saveto_file, **params)
                        pickle.dump(options, open('%s.pkl' % (data_dir + saveto_file), 'wb'), -1)
                        timeparams = unzip(timetparams)
                        np.savez(data_dir + tmsaveto_file, **timeparams)
                    if (abs(eva_map - prev_map) / prev_map < 0.001):
                        scores = evaluate(model['f_prob'], test_loader, model['f_tprob'], options['tdim'])
                        pprint.pprint(scores)
                        return 0
                    else:
                        prev_map = eva_map

                global_step += 1
Ejemplo n.º 34
0
def lasagne_separate(M,
                     P,
                     FE,
                     W1,
                     W2,
                     z1,
                     z2,
                     hh=.0001,
                     ep=5000,
                     d=0,
                     wsp=.0001,
                     plt=True):
    # Gt dictionary shapes
    K = [W1.shape[0], W2.shape[0]]

    # GPU cached data
    _M = theano.shared(M.astype(float32))

    # Input is the learned dictionary set
    lW = hstack((W1.T, W2.T)).astype(float32)
    _lW = Th.matrix('_lW')
    fI = InputLayer(shape=lW.shape, input_var=_lW)

    # Split in two paths
    fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Dropout?
    dfW1 = DropoutLayer(fW1, d)
    dfW2 = DropoutLayer(fW2, d)

    N_sequence = 10
    # # Compute source modulators
    # R1 = LSTMLayer(dfW1, N_sequence)
    # R2 = LSTMLayer(dfW2, N_sequence)

    # Bring to standard orientation
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    cost = (
        _M * (Th.log(_M + eps) - Th.log(get_output(R) + eps)) - _M +
        get_output(R)).mean() + wsp * (Th.mean(abs(R1.W)) + Th.mean(abs(R2.W)))

    # Train it using Lasagne
    opt = downhill.build('rprop',
                         loss=cost,
                         inputs=[_lW],
                         params=get_all_params(R))
    train = downhill.Dataset(lW, batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)[-1]

    # Get outputs
    _r = nget(R, _lW, lW) + eps
    _r1 = nget(R1, _lW, lW)
    _r2 = nget(R2, _lW, lW)
    o1 = FE.ife(_r1 * (M / _r), P)
    o2 = FE.ife(_r2 * (M / _r), P)
    sxr = bss_eval(o1, 0, vstack((z1, z2))) + bss_eval(o2, 1, vstack((z1, z2)))

    return o1, o2, (array(sxr[:3]) + array(sxr[3:])) / 2.
batch_size = 1
#Our Loss function
z1 = X.dot(W1) + b1
a1 = T.tanh(z1)
z2 = a1.dot(W2) + b2
y_hat = T.nnet.softmax(z2)
loss_reg = 1. / batch_size * reg_lambda / 2 * (T.sum(T.sqr(W1)) +
                                               T.sum(T.sqr(W2)))
loss = T.nnet.categorical_crossentropy(y_hat, y).mean() + loss_reg
prediction = T.argmax(y_hat, axis=1)
predict = theano.function([X], prediction)

#Store the training and vlidation loss
train_loss = []
validation_loss = []
opt = downhill.build('sgd', loss=loss)
#Set up training and validation dataset splits, use only one example in a batch
#and use only one batch per step/epoc
#Use everything except last 1000 examples for training
train = downhill.Dataset([train_X[:-1000], train_y_onehot[:-1000]],
                         batch_size=batch_size,
                         iteration_size=1)
#Use last 1000 examples for valudation
valid = downhill.Dataset([train_X[-1000:], train_y_onehot[-1000:]])
#SGD
iterations = 0
for tm, vm in opt.iterate(train, valid, patience=10000):
    iterations += 1
    # Record the training and validation loss
    train_loss.append(tm['loss'])
    validation_loss.append(vm['loss'])
def nn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'):

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    # Get dictionary shapes
    K = [W1.shape[0], W2.shape[0]]

    # GPU cached data
    _M = theano.shared(M.T.astype(float64))
    dum = Th.vector('dum')

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0] + K[1]).astype(float64))
    fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H)

    # Split in two pathways
    fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Dropout?
    dfW1 = DropoutLayer(fW1, dum[0])
    dfW2 = DropoutLayer(fW2, dum[0])

    # Compute source modulators using previously learned dictionaries
    R1 = DenseLayer(dfW1,
                    num_units=M.T.shape[1],
                    W=W1.astype(float64),
                    nonlinearity=act,
                    b=None)
    R2 = DenseLayer(dfW2,
                    num_units=M.T.shape[1],
                    W=W2.astype(float64),
                    nonlinearity=act,
                    b=None)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    Ro = get_output(R) + eps
    cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \
       + sp*Th.mean( abs( H)) + 0*Th.mean( dum)

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    #train = downhill.Dataset( array( [0]).astype(float32), batch_size=0)
    if isinstance(d, list):
        train = downhill.Dataset(array([d[0]]).astype(float64), batch_size=0)
        er = downhill_train(opt, train, hh, ep / 2, None)
        train = downhill.Dataset(array([d[1]]).astype(float64), batch_size=0)
        er += downhill_train(opt, train, hh, ep / 2, None)
    else:
        train = downhill.Dataset(array([d]).astype(float64), batch_size=0)
        er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float64)).T + eps
    _r1 = nget(R1, dum, array([0]).astype(float64)).T
    _r2 = nget(R2, dum, array([0]).astype(float64)).T

    return _r, _r1, _r2, er
def nn_sep_ae(m, w1, w2, hh=.001, ep=5000, sp=.1, dp=0.0, spb=3, al='rprop'):
    from numpy import random
    import theano
    # from matplotlib.pyplot import gcf, clf, semilogy, grid, title, show
    from deep_sep_expr3 import downhill_train
    rng = theano.tensor.shared_randomstreams.RandomStreams(0)

    # Dropout parameters
    d = theano.shared(float32(dp))

    # Plot to make while training
    def pl():
        clf()
        gcf().set_size_inches(6, 2)
        semilogy(cst)
        grid('on')
        title('Cost: %f, Epoch: %d' % (cst[-1], len(cst)))
        drawnow()

    # Sort out the activation
    from inspect import isfunction
    if isfunction(spb):
        act = spb
    else:
        act = lambda x: psoftplus(x, spb)

    w_cat = hstack((w1, w2))
    K = [w1.shape[1], w2.shape[1]]
    # W2m = theano.shared(w_cat.astype(float64))

    W1m = theano.shared(
        random.rand(w_cat.shape[1], w_cat.shape[0]).astype(float64))
    #     W1z = theano.shared((linalg.pinv(w_cat)).astype(float64))

    M = theano.tensor.matrix('M')
    Hm = psoftplus(W1m.dot(M), spb)
    # Dropout
    if dp > 0:
        Hm *= (1. / (1. - d) * (rng.uniform(size=Hm.shape) > d).astype(
            theano.config.floatX)).astype(theano.config.floatX)

    W2s1 = theano.shared(
        hstack((w_cat[:, 0:K[0]], zeros(w2.shape))).astype(float64))
    W2s2 = theano.shared(
        hstack((zeros(w1.shape), w_cat[:, K[0]:K[0] + K[1]])).astype(float64))

    M1 = psoftplus(W2s1.dot(Hm), spb)
    M2 = psoftplus(W2s2.dot(Hm), spb)
    M_out = M1 + M2

    # -------------or----------------

    # M_out = psoftplus((W2s1 + W2s2).dot( Hm),spb);
    # M2 = psoftplus(M_out - psoftplus(W2s1.dot(Hm),spb),spb);
    # M1 = psoftplus(M_out - psoftplus(W2s2.dot(Hm),spb),spb);

    cost = theano.tensor.mean( M_out * (theano.tensor.log( M_out+eps) - theano.tensor.log( M+eps)) - M_out + M) \
           + 0.01*theano.tensor.mean( abs( W1m)**1) + 0.01*theano.tensor.mean( abs( Hm)**1)
    #cost = theano.tensor.mean( M * (theano.tensor.log( M+eps) - theano.tensor.log( M_out+eps)) - M + M_out) \
    #       + 0.1*theano.tensor.mean( abs( Hm)**1)    + 1*theano.tensor.mean( abs( W2m)**2)
    opt = downhill.build(al, loss=cost, params=[W1m], inputs=[M])
    # params = W1m
    train = downhill.Dataset(m.astype(float64), batch_size=m.shape[0])
    # batch_size = m.shape[0]
    cst = []
    lt = time.time()
    for tm, _ in opt.iterate(train,
                             learning_rate=hh,
                             max_updates=ep,
                             patience=ep):
        cst.append(tm['loss'])
        if time.time() - lt > 2:
            pl()
            lt = time.time()

    pl()
    # W2s1 = theano.shared(hstack((w1,zeros(w2.shape))).astype(float64));
    # W2s2 = theano.shared(hstack((zeros(w1.shape),w2)).astype(float64));

    #W2s1 = theano.shared(hstack((W2m.eval()[:,0:K[0]],zeros(w2.shape))).astype(float64));
    #W2s2 = theano.shared(hstack((zeros(w1.shape),W2m.eval()[:,K[0]:K[0]+K[1]])).astype(float64));
    #M1 = psoftplus(W2s1.dot(Hm),spb);
    #M2 = psoftplus(W2s2.dot(Hm),spb);

    nn_nmf_sep = theano.function(inputs=[M],
                                 outputs=[Hm, M1, M2, M_out],
                                 updates=[])
    h1m, m1, m2, m_out = nn_nmf_sep(m.astype(float64))

    subplot(2, 1, 1)
    imagesc(m1**.4)
    title('Source 1')
    subplot(2, 1, 2)
    imagesc(m2**.4)
    title('Source 2')
    # subplot( 2, 2, 3); plot( h1z[0:Kx].T); title( 'Latent representation for Source 1');
    # subplot( 2, 2, 4); plot( h1z[Kx:Kx+Ky].T); title( 'Latent representation for Source 2');
    tight_layout()

    return m_out, m1, m2, cst
Ejemplo n.º 38
0
def train(data_dir='data/memes/',
          dim_proj=512,
          maxlen=30,
          batch_size=256,
          keep_ratio=1.,
          shuffle_data=True,
          learning_rate=0.001,
          global_steps=50000,
          disp_freq=100,
          save_freq=1000,
          test_freq=1000,
          saveto_file='params.npz',
          weight_decay=0.0005,
          reload_model=False,
          train=True):
    """
    Topo-LSTM model training.
    """
    options = locals().copy()
    saveto = data_dir + saveto_file

    # loads graph
    G, node_index = data_utils.load_graph(data_dir)
    print nx.info(G)
    options['n_words'] = len(node_index)

    print options

    # creates and initializes shared variables.
    print 'Initializing variables...'
    params = init_params(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(saveto, params)
    tparams = init_tparams(params)

    # builds Topo-LSTM model
    print 'Building model...'
    model = tprnn_model.build_model(tparams, options)

    print 'Loading test data...'
    test_examples = data_utils.load_examples(data_dir,
                                             dataset='test',
                                             node_index=node_index,
                                             maxlen=maxlen,
                                             G=G)
    test_loader = data_utils.Loader(test_examples, options=options)
    print 'Loaded %d test examples' % len(test_examples)

    if train:
        # prepares training data.
        print 'Loading train data...'
        train_examples = data_utils.load_examples(
            data_dir,
            dataset='train',
            keep_ratio=options['keep_ratio'],
            node_index=node_index,
            maxlen=maxlen,
            G=G)
        train_loader = data_utils.Loader(train_examples, options=options)
        print 'Loaded %d training examples.' % len(train_examples)

        # compiles updates.
        optimizer = downhill.build(algo='adam',
                                   loss=model['cost'],
                                   params=tparams.values(),
                                   inputs=model['data'])

        updates = optimizer.get_updates(max_gradient_elem=5.,
                                        learning_rate=learning_rate)

        f_update = theano.function(model['data'],
                                   model['cost'],
                                   updates=list(updates))

        # training loop.
        start_time = timeit.default_timer()

        # downhill.minimize(
        #     loss=cost,
        #     algo='adam',
        #     train=train_loader,
        #     # inputs=input_list + [labels],
        #     # params=tparams.values(),
        #     # patience=0,
        #     max_gradient_clip=1,
        #     # max_gradient_norm=1,
        #     learning_rate=learning_rate,
        #     monitors=[('cost', cost)],
        #     monitor_gradients=False)

        n_examples = len(train_examples)
        batches_per_epoch = n_examples // options['batch_size'] + 1
        n_epochs = global_steps // batches_per_epoch + 1

        global_step = 0
        cost_history = []
        for _ in range(n_epochs):
            for _ in range(batches_per_epoch):
                cost = f_update(*train_loader())
                cost_history += [cost]

                if global_step % disp_freq == 0:
                    print 'global step %d, cost: %f' % (global_step, cost)

                # dump model parameters.
                if global_step % save_freq == 0:
                    params = unzip(tparams)
                    np.savez(saveto, **params)
                    pickle.dump(options, open('%s.pkl' % saveto, 'wb'), -1)

                # evaluate on test data.
                if global_step % test_freq == 0:
                    scores = evaluate(model['f_prob'], test_loader)
                    print 'eval scores: ', scores
                    end_time = timeit.default_timer()
                    print 'time used: %d seconds.' % (end_time - start_time)

                global_step += 1

    scores = evaluate(model['f_prob'], test_loader)
    pprint.pprint(scores)
def rnn_sep(M,
            W1,
            W2,
            hh=.0001,
            ep=5000,
            d=0,
            sp=.0001,
            spb=3,
            al='rmsprop',
            t=5):
    # Get dictionary shapes
    K = [W1[0].shape[0], W2[0].shape[0]]

    # GPU cached data
    _M = theano.shared(M.T.astype(float32))
    dum = Th.vector('dum')

    # We have weights to discover
    H = theano.shared(
        sqrt(2. / (K[0] + K[1] + M.shape[1])) *
        random.rand(M.T.shape[0], K[0] + K[1]).astype(float32))
    fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H)

    # Split in two pathways
    fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1)
    fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1)

    # Dropout?
    dfW1 = DropoutLayer(fW1, dum[0])
    dfW2 = DropoutLayer(fW2, dum[0])

    # Compute source modulators using previously learned dictionaries
    R1 = RecurrentLayer(dfW1,
                        num_units=M.T.shape[1],
                        b=None,
                        W_in_to_hid=W1[0].astype(float32),
                        W_hid_to_hid=W1[1].astype(float32),
                        nonlinearity=lambda x: psoftplus(x, spb),
                        gradient_steps=5)
    R2 = RecurrentLayer(dfW2,
                        num_units=M.T.shape[1],
                        b=None,
                        W_in_to_hid=W2[0].astype(float32),
                        W_hid_to_hid=W2[1].astype(float32),
                        nonlinearity=lambda x: psoftplus(x, spb),
                        gradient_steps=5)

    # Add the two approximations
    R = ElemwiseSumLayer([R1, R2])

    # Cost function
    Ro = get_output(R) + eps
    cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \
       + sp*Th.mean( abs( H)) + 0*Th.mean( dum)

    # Train it using Lasagne
    opt = downhill.build(al, loss=cost, inputs=[dum], params=[H])
    train = downhill.Dataset(array([d]).astype(float32), batch_size=0)
    er = downhill_train(opt, train, hh, ep, None)

    # Get outputs
    _r = nget(R, dum, array([0]).astype(float32)).T + eps
    _r1 = nget(R1, dum, array([0]).astype(float32)).T
    _r2 = nget(R2, dum, array([0]).astype(float32)).T

    return _r, _r1, _r2, er