Esempi in Python per fvector

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: theano.tensor

Metodo/funzione: fvector

Esempi su hotexamples.com: 60

fvector in Python: 60 esempi trovati. Questi sono i migliori esempi reali in Python per theano.tensor.fvector, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: tmetrics_tests.py Progetto: jonathanstrong/tmetrics

def test_vector_clf_curve():
    yt = T.fvector('yt')
    yp = T.fvector('yp')
    tps = tmetrics.classification._vector_clf_curve(yt, yp)
    f = theano.function([yt, yp], tps, allow_input_downcast=True)
    true, predicted = np.random.binomial(n=1, p=.5, size=10).astype('float32'), np.random.random(10).astype('float32')
    fps, tps, _ = f(true, predicted)
    s_fps, s_tps, s_ = sklearn.metrics.ranking._binary_clf_curve(true, predicted)
    np.set_printoptions(suppress=True)
    print 'true'
    print true
    print 'predicted'
    print predicted
    print 'fps'
    print fps
    print 'sklearn fps'
    print s_fps
    print 'tps'
    print tps
    print 'sklearn tps'
    print s_tps
    print 'threshold values'
    print _
    print 'sklearn threshold values'
    print s_
    assert np.allclose(fps, s_fps)
    assert np.allclose(tps, s_tps)
    assert np.allclose(_, s_)

Esempio n. 2

Mostra file

File: test_dnn.py Progetto: huamichaelchen/Theano

    def test_cudnn_softmax_grad_opt(self):
        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is
        # applied when cudnn is required
        y = T.fvector("y")
        f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)]) == 1
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is not
        # applied when cudnn is excluded or not available
        mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
        y = T.fvector("y")
        f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)]) == 0
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 1

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
        # crash with manual graph
        y = T.fvector("y")
        o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
        f = theano.function([y], o, mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)]) == 1
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0

Esempio n. 3

Mostra file

File: test_elemwise.py Progetto: jaberg/TheanoWS

def test_0():

    N = 16*1000*10*1

    if 1:
        aval = abs(numpy.random.randn(N).astype('float32'))+.1
        bval = numpy.random.randn(N).astype('float32')
        a = T.fvector()
        b = T.fvector()
    else:
        aval = abs(numpy.random.randn(N))+.1
        bval = numpy.random.randn(N)
        a = T.dvector()
        b = T.dvector()

    f = theano.function([a,b], T.pow(a,b), mode='LAZY')
    theano_opencl.elemwise.swap_impls=False
    g = theano.function([a,b], T.pow(a,b), mode='LAZY')

    print 'ocl   time', timeit.Timer(lambda: f(aval, bval)).repeat(3,3)

    print 'gcc   time', timeit.Timer(lambda: g(aval, bval)).repeat(3,3)

    print 'numpy time', timeit.Timer(lambda: aval**bval).repeat(3,3)

    assert ((f(aval, bval) - aval**bval)**2).sum() < 1.1
    assert ((g(aval, bval) - aval**bval)**2).sum() < 1.1

Esempio n. 4

Mostra file

File: __init__.py Progetto: Tinrry/anna

 def __init__(self, name, path, learning_rate=0.001):
     self.r_symbol = T.fvector('r')
     self.gamma_symbol = T.fscalar('gamma')
     self.action_symbol = T.fmatrix('action')
     self.y_symbol = T.fvector('y')
     super(ReinforcementModel, self).__init__(
         name, path, learning_rate=learning_rate)

Esempio n. 5

Mostra file

File: test_criteria.py Progetto: vzhong/pystacks

    def setUp(self):
        self.x_true = np.random.uniform(size=5).astype('float32')
        self.x_false = np.random.uniform(size=5).astype('float32')

        x_true_var = T.fvector()
        x_false_var = T.fvector()
        self.test = function(inputs=[x_true_var, x_false_var], outputs=max_margin_loss(x_true_var, x_false_var, 1))

Esempio n. 6

Mostra file

File: optimize.py Progetto: lebek/reversible-raytracer

    def optimize(self, train_data, lam, fixed_length=3):
    
        i  = T.iscalar('i')
        lr = T.fscalar('lr');
        Xl = T.fvector('Xl')
        Xr = T.fvector('Xr')

        cost = self.ae.cost(Xl, Xr)  #+ lam * self.ae.penalty()
        grads = T.grad(cost, self.ae.params)
        update_vars = []

        for var, gvar in zip(self.ae.params, grads):
            if var.get_value().ndim == 1:
                update_vars.append((var, var - 0.1*lr*gvar))
            #elif var.get_value().ndim > 1:
            #    new_param = var - lr*gvar
            #    len_W = T.sqrt(T.sum(new_param**2, axis=0))
            #    desired_W = T.clip(len_W, 0., fixed_length)
            #    ratio = desired_W  / (len_W + 1e-7)
            #    new_param = new_param * ratio
            #    update_vars.append((var, new_param))
            else:
                update_vars.append((var, var - lr*gvar))

        opt = theano.function([i, lr], cost, updates=update_vars,
                givens={Xl: train_data[i,0], Xr: train_data[i,1]})#, allow_input_downcast=True)

        #get_grad = theano.function([], grads[3], givens={X:train_data[0]}, allow_input_downcast=True)
        #get_gradb = theano.function([], grads[-1], givens={X:train_data[0]}, allow_input_downcast=True)
        return opt#, get_grad, get_gradb

Esempio n. 7

Mostra file

File: tmetrics_tests.py Progetto: jonathanstrong/tmetrics

def test_brier_score_loss_from_scikit_learn_example():
    """
    from sklearn docs...
    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import brier_score_loss
    >>> y_true = np.array([0, 1, 1, 0])
    >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])
    >>> brier_score_loss(y_true, y_prob)  
    0.037...

    """
    y_true = T.fvector('y_true')
    y_predicted = T.fvector('y_predicted')
    brier_score = tmetrics.brier_score_loss(y_true, y_predicted)
    f = theano.function([y_true, y_predicted], brier_score)
    yt = np.array([0, 1, 1, 0], 'float32')
    yp = np.array([.1, .9, .8, .3], theano.config.floatX)
    refscore = sklearn.metrics.brier_score_loss(yt, yp)
    tol = .01
    score = f(yt, yp)
    assert (refscore - tol) < score < (refscore + tol)

    #also test the function is numpy/pandas compatible
    assert (refscore - tol) < tmetrics.brier_score_loss(yt, yp) < (refscore + tol)

Esempio n. 8

Mostra file

File: test_criteria.py Progetto: vzhong/pystacks

    def setUp(self):
        self.x_true = np.random.uniform(low=0, high=1, size=5).astype('float32')
        self.x_false_list = [np.random.uniform(low=0, high=1, size=5).astype('float32') for i in range(10)]

        x_true_var = T.fvector()
        x_false_var_list = [T.fvector() for t in self.x_false_list]
        self.test = function(inputs=[x_true_var] + x_false_var_list, outputs=negative_sampling_loss(x_true_var, x_false_var_list))

Esempio n. 9

Mostra file

File: objectives.py Progetto: 317070/kaggle-heart

    def __init__(self, input_layers, *args, **kwargs):
        super(RMSEObjective, self).__init__(input_layers, *args, **kwargs)
        self.input_systole = input_layers["systole:value"]
        self.input_diastole = input_layers["diastole:value"]

        self.target_vars["systole:value"] = T.fvector("systole_target_value")
        self.target_vars["diastole:value"] = T.fvector("diastole_target_value")

Esempio n. 10

Mostra file

File: elementaryTheanoFunctions.py Progetto: tpsjr7/PythonDeSTIN

def theanoVecVecMul(In1,In2,opt):
    var1 = T.fvector('var1')
    var2 = T.fvector('var2')
    if opt=='M':
        var3 = T.fot(var1,var2)
    else:
        var3 = T.mul(var1,var2)
    DivVec = function([var1,var2],var3)
    return DivVec(In1,In2)

Esempio n. 11

Mostra file

File: tree_rnn.py Progetto: BinbinBian/tree_rnn

    def __init__(self, num_emb, emb_dim, hidden_dim, output_dim,
                 degree=2, learning_rate=0.01, momentum=0.9,
                 trainable_embeddings=True,
                 labels_on_nonroot_nodes=False):
        assert emb_dim > 1 and hidden_dim > 1
        self.num_emb = num_emb
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.degree = degree
        self.learning_rate = learning_rate
        self.momentum = momentum

        self.params = []
        self.embeddings = theano.shared(self.init_matrix([self.num_emb, self.emb_dim]))
        if trainable_embeddings:
            self.params.append(self.embeddings)

        self.x = T.ivector(name='x')  # word indices
        self.tree = T.imatrix(name='tree')  # shape [None, self.degree]
        if labels_on_nonroot_nodes:
            self.y = T.fmatrix(name='y')  # output shape [None, self.output_dim]
            self.y_exists = T.fvector(name='y_exists')  # shape [None]
        else:
            self.y = T.fvector(name='y')  # output shape [self.output_dim]

        self.num_words = self.x.shape[0]  # total number of nodes (leaves + internal) in tree
        emb_x = self.embeddings[self.x]
        emb_x = emb_x * T.neq(self.x, -1).dimshuffle(0, 'x')  # zero-out non-existent embeddings

        self.tree_states = self.compute_tree(emb_x, self.tree)
        self.final_state = self.tree_states[-1]
        if labels_on_nonroot_nodes:
            self.output_fn = self.create_output_fn_multi()
            self.pred_y = self.output_fn(self.tree_states)
            self.loss = self.loss_fn_multi(self.y, self.pred_y, self.y_exists)
        else:
            self.output_fn = self.create_output_fn()
            self.pred_y = self.output_fn(self.final_state)
            self.loss = self.loss_fn(self.y, self.pred_y)

        updates = self.gradient_descent(self.loss)

        train_inputs = [self.x, self.tree, self.y]
        if labels_on_nonroot_nodes:
            train_inputs.append(self.y_exists)
        self._train = theano.function(train_inputs,
                                      [self.loss, self.pred_y],
                                      updates=updates)

        self._evaluate = theano.function([self.x, self.tree],
                                         self.final_state)

        self._predict = theano.function([self.x, self.tree],
                                        self.pred_y)

Esempio n. 12

Mostra file

File: tmetrics_tests.py Progetto: jonathanstrong/tmetrics

def test_roc_auc_score():
    true = np.random.binomial(n=1, p=.5, size=50).astype('float32')
    #true = np.array([0, 0, 1, 1]).astype('float32')
    predicted = np.random.random(size=50).astype('float32')
    #predicted = np.array([0.1, 0.4, 0.35, 0.8]).astype('float32')
    yt = T.fvector('y_true')
    yp = T.fvector('y_predicted')
    roc_auc_score_expr = tmetrics.classification.roc_auc_score(yt, yp)
    refscore = sklearn.metrics.roc_auc_score(true, predicted)
    print 'refscore'
    print refscore
    f = theano.function([yt, yp], roc_auc_score_expr)
    score = f(true, predicted)
    print 'score'
    print score
    try:
        assert np.allclose(refscore, score)
    except AssertionError:
        fps, tps, thresholds = tmetrics.classification._binary_clf_curve(yt, yp)
        fpr, tpr, _thresh = tmetrics.classification.roc_curve(yt, yp)
        f = theano.function([yt, yp], [fps, tps, thresholds, fpr, tpr, _thresh, roc_auc_score_expr])
        result = f(true, predicted)
        print '** tmetrics **'
        print 'fps'
        print result[0]
        print 'tps'
        print result[1]
        print 'thresholds'
        print result[2]
        print 'fpr'
        print result[3]
        print 'tpr'
        print result[4]
        print '_thresh'
        print result[5]
        print 'roc score'
        print result[6]

        print '** refscore **'
        curve = sklearn.metrics.ranking._binary_clf_curve(true, predicted)
        print 'fpr'
        print curve[0]
        print 'tpr'
        print curve[1]
        print 'thresholds'
        print curve[2]
        trapz = np.trapz(curve[1], curve[0])
        print 'trapz'
        print trapz
        print 'auc'
        print sklearn.metrics.ranking.auc(curve[0], curve[1])
        print 'roc_curve'
        print sklearn.metrics.roc_curve(true, predicted)
        raise

Esempio n. 13

Mostra file

File: one_lag_baseline.py Progetto: sl3368/DeepBirdBrain

def main():

    #loading in data set
    dataset_for_error = '/vega/stats/users/sl3368/Data_LC/NormData/LC_stim_15.mat'
    stimuli = load_class_data_batch(dataset_for_error)
    stim = stimuli[0]
    data = theano.shared( stim, borrow=True)
    print 'Number of rows: '
    print stim.shape[0]

    #setting variable for error 
    init = numpy.float64(0.0)
    mean_error = shared(init)

    #writing theano functions for computing mean square error for one lag 
    
    prediction = T.fvector('predict') # 60 row vector representing time t

    real = T.fvector('real') #row representing time t+1 

    cost = T.mean( (real - prediction) ** 2)

    #function for updating mean error
    batch_error = theano.function([prediction,real],cost,updates=[(mean_error, mean_error + cost)])


    increment = stim.shape[0]/100
    #iterating over batch and computing the error
    for index in range(stim.shape[0]-1):
        if index % increment == 0:
		print str(index/increment)+'% done...'
	recent = batch_error(stim[index],stim[index+1])

    #m_e_avg = mean_error / 9000000

    #printing result
    print 'Total error: '
    print mean_error.get_value()

    print 'Finding padding amount...'
    num_zero = float(0.0)
    #calculating zeros amount
    for index in range(stim.shape[0]):
        is_zero = True
        for i in range(60):
            if stim[index][i] != 0:
               is_zero = False
   
        if is_zero:
            num_zero = num_zero + 1

    print 'Percent Zero: '+str(float(num_zero/(increment * 100)))

Esempio n. 14

Mostra file

File: test_dnn.py Progetto: huamichaelchen/Theano

    def test_softmax_grad(self):
        def cmp(n, m, f, f_gpu):
            data = numpy.arange(n * m, dtype="float32").reshape(n, m)
            gdata = numpy.asarray(data)[:, :, None, None]

            out = f(data)
            gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
            utt.assert_allclose(out, gout)

        x = T.matrix("x", "float32")
        x_gpu = T.tensor4("x_gpu", "float32")
        f_z = T.nnet.softmax_op
        f_gpu = dnn.GpuDnnSoftmax("accurate", "channel")

        # Verify the grad operation
        dims = (2, 3, 4, 5)
        gdata = numpy.arange(numpy.product(dims), dtype="float32").reshape(dims)
        T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu)

        # Verify that the CPU and GPU implementations return the same results
        # up to a tolerance.

        self._test_softmax(x, x_gpu, f_z, f_gpu, cmp)

        self._test_softmax(x, x, f_z, f_z, self._cmp)

        # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
        # optimization is applied when cudnn is required
        y = T.fvector("y")
        f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0

        # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
        # optimization is not applied when cudnn is excluded or not
        # available
        mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
        y = T.fvector("y")
        f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 0
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 1

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
        # crash with manual graph
        y = T.fvector("y")
        o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
        f = theano.function([y], o, mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0

Esempio n. 15

Mostra file

File: beta_nmf_class.py Progetto: mikimaus78/groupNMF

    def get_div_function(self):
        tind = T.ivector('ind')
        if self.NMF_updates == 'beta':
            self.div = theano.function(inputs=[tind],
                                       outputs=costs.beta_div(self.X_buff[tind[1]:tind[2], ],
                                                              self.W[tind[0]].T,
                                                              self.H[tind[3]:tind[4], ],
                                                              self.beta),
                                       name="div",
                                       allow_input_downcast=True)
        if self.NMF_updates == 'groupNMF':
            tcomp = T.ivector('comp')
            tlambda = T.fvector('lambda')
            tSc = T.ivector('Sc')
            tCs = T.ivector('Cs')
            tparams = [tind, tcomp, tlambda, tSc, tCs]
            cost, beta_div, cls_dist, ses_dist = costs.group_div(self.X_buff[tind[1]:tind[2], ],
                                                                 self.W,
                                                                 self.H[tind[3]:tind[4], ],
                                                                 self.beta,
                                                                 tparams)

            self.div = theano.function(inputs=[tind, tcomp, tlambda, tSc, tCs],
                                       outputs=[cost,
                                                beta_div,
                                                cls_dist,
                                                ses_dist],
                                       name="div",
                                       allow_input_downcast=True,
                                       on_unused_input='ignore')

        if self.NMF_updates == 'noiseNMF':
            tcomp = T.ivector('comp')
            tlambda = T.fvector('lambda')
            tSc = T.ivector('Sc')
            tparams = [tind, tcomp, tlambda, tSc]
            cost, beta_div, cls_dist, ses_dist = costs.noise_div(self.X_buff[tind[1]:tind[2], ],
                                                                 self.W,
                                                                 self.Wn,
                                                                 self.H[tind[3]:tind[4], ],
                                                                 self.beta,
                                                                 tparams)

            self.div = theano.function(inputs=[tind, tcomp, tlambda, tSc],
                                       outputs=[cost,
                                                beta_div,
                                                cls_dist,
                                                ses_dist],
                                       name="div",
                                       allow_input_downcast=True,
                                       on_unused_input='ignore')

Esempio n. 16

Mostra file

File: tmetrics_tests.py Progetto: jonathanstrong/tmetrics

def test_1D_roc_auc_scores():
    yt = T.fvector('yt')
    yp = T.fvector('yp')
    y = np.array([0, 0, 1, 1]).astype('float32')
    scores = np.array([0.1, 0.4, 0.35, 0.8]).astype('float32')
    ref_fpr, ref_tpr, ref_thresh = sklearn.metrics.roc_curve(y, scores)
    roc_auc_scores = tmetrics.classification.roc_auc_scores(yt, yp)
    fpr, tpr, thresh = tmetrics.classification.roc_curves(yt, yp)
    f = theano.function([yt, yp], [fpr, tpr, thresh, roc_auc_scores])
    score_fpr, score_tpr, score_thresh, score_auc = f(y ,scores)
    assert np.allclose(ref_fpr, score_fpr)
    assert np.allclose(ref_tpr, score_tpr)
    assert np.allclose(ref_thresh, score_thresh)
    assert np.allclose(sklearn.metrics.roc_auc_score(y, scores), score_auc)

Esempio n. 17

Mostra file

File: tmetrics_tests.py Progetto: jonathanstrong/tmetrics

def test_precisison_recall_curves_vector(n_iter=1):
    yt = T.fvector('yt')
    yp = T.fvector('yp')
    p_expr, r_expr, thresh_expr = tmetrics.classification.precision_recall_curves(yt, yp)
    f = theano.function([yt, yp], [p_expr, r_expr, thresh_expr])
    for iterator in xrange(n_iter):
        y = np.random.binomial(n=1, p=.5, size=20).astype('float32')
        scores = np.random.random(20).astype('float32')
        ref_precision, ref_recall, ref_thresh = sklearn.metrics.precision_recall_curve(y, scores)
        precision, recall, thresh = f(y ,scores)
        #assert np.allclose(ref_precision, precision)
        #assert np.allclose(ref_recall, recall)
        #assert np.allclose(ref_thresh, thresh)
        try:
            assert np.allclose(sklearn.metrics.auc(ref_recall, ref_precision), sklearn.metrics.auc(recall, precision))
        except:
            print 'n_iter: {}'.format(n_iter)
            print 'y'
            print y
            print 'scores'
            print scores
            print 'ref precision'
            print ref_precision
            print ref_precision.shape
            #print np.r_[precision[1:], 1] 
            #print np.allclose(ref_precision, np.r_[precision[1:], 1] )
            print sklearn.metrics.auc(ref_recall, ref_precision)
            print sklearn.metrics.auc(recall, precision)
            print
            print 'ref recall'
            print ref_recall
            print ref_recall.shape
            print
            print 'ref thresh'
            print ref_thresh
            print ref_thresh.shape
            print
            print 'score precision'
            print precision
            print precision.shape
            print
            print 'score recall'
            print recall
            print recall.shape
            print 
            print 'score threshold'
            print thresh
            print thresh.shape
            raise

Esempio n. 18

Mostra file

File: test_basic_ops.py Progetto: delallea/Theano

def test_elemwise4():
    """ Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update"""

    shape = (3,4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a')
    b = tensor.fvector()
    c = tensor.fvector()
    f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        print >> sys.stdout, i, node
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(numpy.random.rand(4), dtype='float32'), theano._asarray(numpy.random.rand(3), dtype='float32'))

Esempio n. 19

Mostra file

File: test_multinomial.py Progetto: Jackwangyang/Theano

def test_multinomial_dtypes():
    p = tensor.dmatrix()
    u = tensor.dvector()
    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float64', m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform('float64')(p, u)
    assert m.dtype == 'float64', m.dtype

Esempio n. 20

Mostra file

File: tmetrics_tests.py Progetto: jonathanstrong/tmetrics

def test_hammming_loss():
    true = np.random.binomial(n=1, p=.5, size=10).astype('float32')
    predicted = np.round(np.random.random(10))
    refscore = hamming(true, predicted)
    yt = T.fvector('yt')
    yp = T.fvector('yp')
    f = theano.function([yt, yp], tmetrics.classification.hamming_loss(yt, yp), allow_input_downcast=True)
    score = f(true, predicted)
    print 'true'
    print true
    print 'predicted'
    print predicted
    print 'refscore {}'.format(refscore)
    print 'score {}'.format(score)
    assert np.allclose(refscore, score)

Esempio n. 21

Mostra file

File: test_multinomial.py Progetto: huamichaelchen/Theano

def test_multinomial_dtypes():
    p = tensor.dmatrix()
    u = tensor.dvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float64", m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float32", m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("float64")(p, u)
    assert m.dtype == "float64", m.dtype

Esempio n. 22

Mostra file

File: core.py Progetto: paulorauber/thesne

def find_sigma(X_shared, sigma_shared, N, perplexity, sigma_iters,
               metric, verbose=0):
    """Binary search on sigma for a given perplexity."""
    X = T.fmatrix('X')
    sigma = T.fvector('sigma')

    target = np.log(perplexity)

    P = T.maximum(p_Xp_given_X_var(X, sigma, metric), epsilon)

    entropy = -T.sum(P*T.log(P), axis=1)

    # Setting update for binary search interval
    sigmin_shared = theano.shared(np.full(N, np.sqrt(epsilon), dtype=floath))
    sigmax_shared = theano.shared(np.full(N, np.inf, dtype=floath))

    sigmin = T.fvector('sigmin')
    sigmax = T.fvector('sigmax')

    upmin = T.switch(T.lt(entropy, target), sigma, sigmin)
    upmax = T.switch(T.gt(entropy, target), sigma, sigmax)

    givens = {X: X_shared, sigma: sigma_shared, sigmin: sigmin_shared,
              sigmax: sigmax_shared}
    updates = [(sigmin_shared, upmin), (sigmax_shared, upmax)]

    update_intervals = theano.function([], entropy, givens=givens,
                                       updates=updates)

    # Setting update for sigma according to search interval
    upsigma = T.switch(T.isinf(sigmax), sigma*2, (sigmin + sigmax)/2.)

    givens = {sigma: sigma_shared, sigmin: sigmin_shared,
              sigmax: sigmax_shared}
    updates = [(sigma_shared, upsigma)]

    update_sigma = theano.function([], sigma, givens=givens, updates=updates)

    for i in range(sigma_iters):
        e = update_intervals()
        update_sigma()
        if verbose:
            print('Iteration: {0}.'.format(i+1))
            print('Perplexities in [{0:.4f}, {1:.4f}].'.format(np.exp(e.min()),
                  np.exp(e.max())))

    if np.any(np.isnan(np.exp(e))):
        raise Exception('Invalid sigmas. The perplexity is probably too low.')

Esempio n. 23

Mostra file

File: ctc.py Progetto: rezaprimasatya/Theano

    def make_node(self, activations, labels, input_lengths):
        t_activations = T.as_tensor_variable(activations)
        # Ensure activations array is C-contiguous
        t_activations = cpu_contiguous(t_activations)

        t_labels = T.as_tensor_variable(labels)
        t_input_lengths = T.as_tensor_variable(input_lengths)

        if t_activations.type.dtype != 'float32':
            raise TypeError('activations must use the float32 type!')

        if t_activations.ndim != 3:
            raise ValueError('activations must have 3 dimensions.')

        if t_labels.type.dtype != 'int32':
            raise TypeError('labels must use the int32 type!')

        if t_labels.ndim != 2:
            raise ValueError('labels must have 2 dimensions.')

        if t_input_lengths.type.dtype != 'int32':
            raise TypeError('input_lengths must use the int32 type!')

        if t_input_lengths.ndim != 1:
            raise ValueError('input_lengths must have 1 dimension.')

        costs = T.fvector(name="ctc_cost")
        outputs = [costs]
        if self.compute_grad:
            gradients = T.ftensor3(name="ctc_grad")
            outputs += [gradients]

        return gof.Apply(self, inputs=[t_activations, t_labels, t_input_lengths],
                         outputs=outputs)

Esempio n. 24

Mostra file

File: rnn_tree.py Progetto: dallascard/guac

    def __init__(self, nh, init_scale=0.2):

        self.W = theano.shared(name='W', value=init_scale * np.random.uniform(-1.0, 1.0, (nh, 1))
                               .astype(theano.config.floatX))
        self.b = theano.shared(name='b', value=np.array(0,
                                                        dtype=theano.config.floatX))

        self.params = [self.b, self.W]

        h = T.fmatrix('h')
        y = T.fvector('y')
        lr = T.scalar('lr')

        y_pred = T.dot(h, self.W) + self.b

        loss = T.sum(T.square(y_pred[:, 0] - y))

        gradients = T.grad(loss, self.params)

        updates = OrderedDict((p, p - lr * g) for p, g in zip(self.params, gradients))

        # These all assume a minibatch size > 1; "mb" functions below will massage single examples as required
        self.predict = theano.function(inputs=[h], outputs=y_pred)
        self.calc_loss = theano.function(inputs=[h, y], outputs=loss, updates=None)
        self.train = theano.function(inputs=[h, y, lr], outputs=loss, updates=updates)
        self.calc_gradients = theano.function(inputs=[h, y], outputs=gradients, updates=None)

Esempio n. 25

Mostra file

File: test_pfunc.py Progetto: gwtaylor/Theano

    def test_allow_downcast_floatX(self):
        a = tensor.fscalar('a')
        b = tensor.fvector('b')

        f = pfunc([a, b], (a + b), allow_input_downcast=True)
        g = pfunc([a, b], (a + b), allow_input_downcast=False)
        h = pfunc([a, b], (a + b), allow_input_downcast=None)

        # If the values can be accurately represented, OK
        assert numpy.all(f(0, [0]) == 0)
        assert numpy.all(g(0, [0]) == 0)
        assert numpy.all(h(0, [0]) == 0)

        # For the vector: OK iff allow_input_downcast is True
        assert numpy.allclose(f(0, [0.1]), 0.1)
        self.assertRaises(TypeError, g, 0, [0.1])
        self.assertRaises(TypeError, h, 0, [0.1])

        # For the scalar: OK if allow_input_downcast is True,
        # or None and floatX==float32
        assert numpy.allclose(f(0.1, [0]), 0.1)
        self.assertRaises(TypeError, g, 0.1, [0])
        if config.floatX == 'float32':
            assert numpy.allclose(h(0.1, [0]), 0.1)
        else:
            self.assertRaises(TypeError, h, 0.1, [0])

Esempio n. 26

Mostra file

File: sent_matching_model.py Progetto: hiroki13/neural-sentence-matching-system

    def compile(self):
        # 1D: n_words, 2D: batch * n_cands
        self.x = T.imatrix()
        self.y = T.fvector()
        self.train_inputs = [self.x, self.y]
        self.pred_inputs = [self.x]

        self.activation = self.args.activation
        self.n_d = self.args.hidden_dim
        self.n_e = self.emb_layers[0].n_d
        self.pad_id = self.emb_layers[0].vocab_map[PAD]
        self.dropout = theano.shared(np.float32(self.args.dropout).astype(theano.config.floatX))

        self._set_layers(args=self.args, n_d=self.n_d, n_e=self.n_e)

        ###########
        # Network #
        ###########
        h_in = self._input_layer(x=self.x)
        h = self._mid_layer(h_prev=h_in, x=self.x, pad_id=self.pad_id)
        y_scores = self._output_layer(h=h)
        self.y_pred = T.le(0.5, y_scores)

        #########################
        # Set an objective func #
        #########################
        self.set_params(layers=self.layers)
        self.loss = self.set_loss(self.y, y_scores)
        self.cost = self.set_cost(args=self.args, params=self.params, loss=self.loss)

Esempio n. 27

Mostra file

File: autoencode_ii.py Progetto: ririw/autoencoder-experiments

	def __init__(self,
				 word_vec_width,
				 batch_size,
				 num_hidden,
				 learning_rate=0.1):
		self.num_hidden = num_hidden
		self.learning_rate = learning_rate
		self.word_vec_width = word_vec_width
		self.batch_size = batch_size

		self.vocab_mat = T.fmatrix('vocab')
		self.word_onehot = T.fmatrix('word_onehot')
		b = T.fvector('b')
		W = T.fmatrix('W')
		f = 1 / (1 + T.exp(-(W * (self.word_onehot.dot(self.vocab_mat) + b))))
		s = T.sum(f)

		self.exec_fn = theano.function(
			[self.word_onehot, b, W, self.vocab_mat],
			f,
			allow_input_downcast=True)

		self.word_onehot_c = T.fmatrix('word_onehot_c')
		f_c = 1 / (1 + T.exp(-(W * (self.word_onehot_c.dot(self.vocab_mat)) + b)))
		s_c = T.sum(f_c)

		J = T.largest(0, 1 - s + s_c)
		self.grad = theano.grad(J, [b, W, self.vocab_mat])

		self.grad_fn = theano.function(
			[self.word_onehot, self.word_onehot_c, b, W, self.vocab_mat],
			self.grad,
			allow_input_downcast=True)

Esempio n. 28

Mostra file

File: test_multinomial_wo_replacement.py Progetto: DEVESHTARASIA/Theano

    def test_select_distinct(self):
        # Tests that ChoiceFromUniform always selects distinct elements

        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 1000
        all_indices = range(n_elements)
        np.random.seed(12345)
        expected = [
            np.asarray([[931, 318, 185, 209, 559]]),
            np.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]),
            np.asarray([[546, 28, 79, 665, 295, 779, 433, 531, 411, 716, 244, 234, 70, 88, 612, 639, 383, 335,
                         451, 100, 175, 492, 848, 771, 559, 214, 568, 596, 370, 486, 855, 925, 138, 300, 528, 507,
                         730, 199, 882, 357, 58, 195, 705, 900, 66, 468, 513, 410, 816, 672]])]

        for i in [5, 10, 50, 100, 500, n_elements]:
            uni = np.random.rand(i).astype(config.floatX)
            pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, uni, i)
            for ii in range(len(expected)):
                if expected[ii].shape == res.shape:
                    assert (expected[ii] == res).all()
            res = np.squeeze(res)
            assert len(res) == i
            assert np.all(np.in1d(np.unique(res), all_indices)), res

Esempio n. 29

Mostra file

File: test_multinomial_wo_replacement.py Progetto: 12190143/Theano

    def test_select_proportional_to_weight(self):
        """
        Tests that MultinomialWOReplacementFromUniform selects elements, on average,
        proportional to the their probabilities
        """
        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.MultinomialWOReplacementFromUniform('auto')(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
        numpy.random.seed(12345)
        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        avg_pvals = numpy.zeros((n_elements,), dtype=config.floatX)

        for rep in range(10000):
            uni = numpy.random.rand(n_selected).astype(config.floatX)
            res = f(pvals, uni, n_selected)
            res = numpy.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
        avg_diff = numpy.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol, avg_diff

Esempio n. 30

Mostra file

File: convolutional_network.py Progetto: chagge/OpenDeep

    def __init__(self, config=None, defaults=defaults, inputs_hook=None, hiddens_hook=None, params_hook=None,
                 use_data_layer=None, rand_crop=None, batch_size=None):
        # combine everything by passing to Model's init
        super(AlexNet, self).__init__(**{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'})
        # configs can now be accessed through self dictionary

        if self.inputs_hook or self.hiddens_hook or self.params_hook:
            log.error("Inputs_hook, hiddens_hook, and params_hook not implemented yet for AlexNet!")

        self.flag_datalayer = self.use_data_layer

        ####################
        # Theano variables #
        ####################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        self.x = T.ftensor4('x')
        self.y = T.lvector('y')
        self.rand = T.fvector('rand')

        ##########
        # params #
        ##########
        self.params = []

        # make the network!
        self.build_computation_graph()

Esempio n. 31

Mostra file

File: test_vrnn_disall-priorXt-sched.py Progetto: oneway3124/disaggregation-vrnn

def main(args):

    theano.optimizer = 'fast_compile'
    theano.config.exception_verbosity = 'high'

    trial = int(args['trial'])
    pkl_name = 'dp_disall-sch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    pickleModel = args['pickleModel']

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])
    loadType = int(args['loadType'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path
    print(str(windows))

    q_z_dim = 500
    p_z_dim = 500
    p_x_dim = 500
    x2s_dim = 200
    y2s_dim = 200
    z2s_dim = 200
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.5,
        valPer=0.25,
        testPer=0.25,
        typeLoad=loadType,
        flgAggSumScaled=1,
        flgFilterZeros=1)

    print("Mean ", reader.meanTrain)
    print("Std", reader.stdTrain)
    instancesPlot = {0: [4]}

    train_data = Dataport(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    #from experiment 18-05-31_18-48
    fmodel = open(pickleModel, 'rb')
    mainloop = cPickle.load(fmodel)
    fmodel.close()

    #define layers
    rnn = mainloop.model.nodes[0]
    x_1 = mainloop.model.nodes[1]
    y_1 = mainloop.model.nodes[2]
    z_1 = mainloop.model.nodes[3]
    phi_1 = mainloop.model.nodes[4]
    phi_mu = mainloop.model.nodes[5]
    phi_sig = mainloop.model.nodes[6]
    prior_1 = mainloop.model.nodes[7]
    prior_mu = mainloop.model.nodes[8]
    prior_sig = mainloop.model.nodes[9]
    theta_1 = mainloop.model.nodes[10]
    theta_mu1 = mainloop.model.nodes[11]
    theta_sig1 = mainloop.model.nodes[12]
    coeff1 = mainloop.model.nodes[13]

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1
    ]

    params = mainloop.model.params

    dynamicOutput = [None, None, None, None, None, None, None, None]
    #dynamicOutput_val = [None, None, None, None, None, None,None,  None, None]
    if (y_dim > 1):
        theta_mu2 = mainloop.model.nodes[14]
        theta_sig2 = mainloop.model.nodes[15]
        coeff2 = mainloop.model.nodes[16]
        nodes = nodes + [theta_mu2, theta_sig2, coeff2]
        dynamicOutput = dynamicOutput + [None, None, None, None
                                         ]  #mu, sig, coef and pred
    if (y_dim > 2):
        theta_mu3 = mainloop.model.nodes[17]
        theta_sig3 = mainloop.model.nodes[18]
        coeff3 = mainloop.model.nodes[19]
        nodes = nodes + [theta_mu3, theta_sig3, coeff3]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 3):
        theta_mu4 = mainloop.model.nodes[20]
        theta_sig4 = mainloop.model.nodes[21]
        coeff4 = mainloop.model.nodes[22]
        nodes = nodes + [theta_mu4, theta_sig4, coeff4]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 4):
        theta_mu5 = mainloop.model.nodes[23]
        theta_sig5 = mainloop.model.nodes[24]
        coeff5 = mainloop.model.nodes[25]
        nodes = nodes + [theta_mu5, theta_sig5, coeff5]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 5):
        theta_mu6 = mainloop.model.nodes[26]
        theta_sig6 = mainloop.model.nodes[27]
        coeff6 = mainloop.model.nodes[28]
        nodes = nodes + [theta_mu6, theta_sig6, coeff6]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 6):
        theta_mu7 = mainloop.model.nodes[29]
        theta_sig7 = mainloop.model.nodes[30]
        coeff7 = mainloop.model.nodes[31]
        nodes = nodes + [theta_mu7, theta_sig7, coeff7]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 7):
        theta_mu8 = mainloop.model.nodes[32]
        theta_sig8 = mainloop.model.nodes[33]
        coeff8 = mainloop.model.nodes[34]
        nodes = nodes + [theta_mu8, theta_sig8, coeff8]
        dynamicOutput = dynamicOutput + [None, None, None, None]

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    output_fn = [s_0] + dynamicOutput
    output_fn_val = [s_0] + dynamicOutput[2:]
    print(len(output_fn), len(output_fn_val))

    def inner_fn_test(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            prior_mu_t, prior_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1

        if (y_dim > 1):
            theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
            theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
            coeff2_t = coeff2.fprop([theta_1_t], params)
            y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
            y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1)
            tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t,
                                       y_pred2)

        if (y_dim > 2):
            theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
            theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
            coeff3_t = coeff3.fprop([theta_1_t], params)
            y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
            y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1)
            tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t,
                                       y_pred3)

        if (y_dim > 3):
            theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
            theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
            coeff4_t = coeff4.fprop([theta_1_t], params)
            y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
            y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1)
            tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t,
                                       y_pred4)

        if (y_dim > 4):
            theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
            theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
            coeff5_t = coeff5.fprop([theta_1_t], params)
            y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
            y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1)
            tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t,
                                       y_pred5)

        if (y_dim > 5):
            theta_mu6_t = theta_mu6.fprop([theta_1_t], params)
            theta_sig6_t = theta_sig6.fprop([theta_1_t], params)
            coeff6_t = coeff6.fprop([theta_1_t], params)
            y_pred6 = GMM_sampleY(theta_mu6_t, theta_sig6_t, coeff6_t)
            y_pred1 = T.concatenate([y_pred1, y_pred6], axis=1)
            tupleMulti = tupleMulti + (theta_mu6_t, theta_sig6_t, coeff6_t,
                                       y_pred6)

        if (y_dim > 6):
            theta_mu7_t = theta_mu7.fprop([theta_1_t], params)
            theta_sig7_t = theta_sig7.fprop([theta_1_t], params)
            coeff7_t = coeff7.fprop([theta_1_t], params)
            y_pred7 = GMM_sampleY(theta_mu7_t, theta_sig7_t, coeff7_t)
            y_pred1 = T.concatenate([y_pred1, y_pred7], axis=1)
            tupleMulti = tupleMulti + (theta_mu7_t, theta_sig7_t, coeff7_t,
                                       y_pred7)

        if (y_dim > 7):
            theta_mu8_t = theta_mu8.fprop([theta_1_t], params)
            theta_sig8_t = theta_sig8.fprop([theta_1_t], params)
            coeff8_t = coeff8.fprop([theta_1_t], params)
            y_pred8 = GMM_sampleY(theta_mu8_t, theta_sig8_t, coeff8_t)
            y_pred1 = T.concatenate([y_pred1, y_pred8], axis=1)
            tupleMulti = tupleMulti + (theta_mu8_t, theta_sig8_t, coeff8_t,
                                       y_pred8)

        pred_1_t = y_1.fprop([y_pred1], params)
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return (s_t, ) + tupleMulti
        #corr_temp, binary_temp

    (otherResults_val, updates_val) = theano.scan(fn=inner_fn_test,
                                                  sequences=[x_1_temp],
                                                  outputs_info=output_fn_val)

    for k, v in updates_val.iteritems():
        k.default_update = v

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    ######################## TEST (GENERATION) TIME
    s_temp_val, prior_mu_temp_val, prior_sig_temp_val, \
      theta_mu1_temp_val, theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val = otherResults_val[:7]
    restResults_val = otherResults_val[7:]

    #s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp_val.name = 'theta_mu1_val'
    theta_sig1_temp_val.name = 'theta_sig1_val'
    coeff1_temp_val.name = 'coeff1_val'
    y_pred1_temp_val.name = 'disaggregation1_val'
    y_pred1_temp_val = T.clip(y_pred1_temp_val, 0.0, np.inf)
    prediction_val = y_pred1_temp_val

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1], 1)))**2)
    mae1_val = T.mean(
        T.abs_(y_pred1_temp_val -
               y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred1_temp_val)
    totReal = T.sum(y[:, :, 0])
    relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned1_val = 1 - T.sum(
        T.abs_(y_pred1_temp_val - y[:, :, 0].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    #y_unNormalize = (y[:,:,0] * reader.stdTrain[0]) + reader.meanTrain[0]
    #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTrain[0]) + reader.meanTrain[0]
    #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

    mse1_val.name = 'mse1_val'
    mae1_val.name = 'mae1_val'

    theta_mu1_in_val = theta_mu1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig1_in_val = theta_sig1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    totaMSE_val = mse1_val
    totaMAE_val = mae1_val
    indexSepDynamic_val = 5

    #Initializing values of mse and mae
    mse2_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae2_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse3_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae3_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse4_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae4_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse5_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae5_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse6_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae6_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse7_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae7_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse8_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae8_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))

    relErr2_val = T.zeros((1, ))
    relErr3_val = T.zeros((1, ))
    relErr4_val = T.zeros((1, ))
    relErr5_val = T.zeros((1, ))
    relErr6_val = T.zeros((1, ))
    relErr7_val = T.zeros((1, ))
    relErr8_val = T.zeros((1, ))

    propAssigned2_val = T.zeros((1, ))
    propAssigned3_val = T.zeros((1, ))
    propAssigned4_val = T.zeros((1, ))
    propAssigned5_val = T.zeros((1, ))
    propAssigned6_val = T.zeros((1, ))
    propAssigned7_val = T.zeros((1, ))
    propAssigned8_val = T.zeros((1, ))

    if (y_dim > 1):
        theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val, y_pred2_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu2_temp_val.name = 'theta_mu2_val'
        theta_sig2_temp_val.name = 'theta_sig2_val'
        coeff2_temp_val.name = 'coeff2_val'
        y_pred2_temp_val.name = 'disaggregation2_val'
        y_pred2_temp_val = T.clip(y_pred2_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred2_temp_val],
                                       axis=2)  #before it gets unnormalized

        mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae2_val = T.mean(
            T.abs_(y_pred2_temp_val -
                   y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred2_temp_val)
        totReal = T.sum(y[:, :, 1])
        relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned2_val = 1 - T.sum(
            T.abs_(y_pred2_temp_val - y[:, :, 1].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,1] * reader.stdTrain[1]) + reader.meanTrain[1]
        #y_pred2_temp_val = (y_pred2_temp_val * reader.stdTrain[1]) + reader.meanTrain[1]
        #mse2_valUnNorm = T.mean((y_pred2_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae2_valUnNorm = T.mean( T.abs_(y_pred2_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse2_val.name = 'mse2_val'
        mae2_val.name = 'mae2_val'

        theta_mu2_in_val = theta_mu2_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig2_in_val = theta_sig2_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val

        totaMSE_val += mse2_val
        totaMAE_val += mae2_val
        indexSepDynamic_val += 2

    if (y_dim > 2):
        theta_mu3_temp_val, theta_sig3_temp_val, coeff3_temp_val, y_pred3_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu3_temp_val.name = 'theta_mu3_val'
        theta_sig3_temp_val.name = 'theta_sig3_val'
        coeff3_temp_val.name = 'coeff3_val'
        y_pred3_temp_val.name = 'disaggregation3_val'
        y_pred3_temp_val = T.clip(y_pred3_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred3_temp_val],
                                       axis=2)  #before it gets unnormalized

        mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae3_val = T.mean(
            T.abs_(y_pred3_temp_val -
                   y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred3_temp_val)
        totReal = T.sum(y[:, :, 2])
        relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned3_val = 1 - T.sum(
            T.abs_(y_pred3_temp_val - y[:, :, 2].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,2] * reader.stdTrain[2]) + reader.meanTrain[2]
        #y_pred3_temp_val = (y_pred3_temp_val * reader.stdTrain[2]) + reader.meanTrain[2]
        #mse3_valUnNorm = T.mean((y_pred3_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae3_valUnNorm = T.mean( T.abs_(y_pred3_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse3_val.name = 'mse3_val'
        mae3_val.name = 'mae3_val'

        theta_mu3_in_val = theta_mu3_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig3_in_val = theta_sig3_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu3_in_val, theta_sig3_in_val,
                                     coeff3_in_val)
        totaMSE_val += mse3_val
        totaMAE_val += mae3_val
        indexSepDynamic_val += 2

    if (y_dim > 3):
        theta_mu4_temp_val, theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu4_temp_val.name = 'theta_mu4_val'
        theta_sig4_temp_val.name = 'theta_sig4_val'
        coeff4_temp_val.name = 'coeff4_val'
        y_pred4_temp_val.name = 'disaggregation4_val'
        y_pred4_temp_val = T.clip(y_pred4_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred4_temp_val],
                                       axis=2)  #before it gets unnormalized

        mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae4_val = T.mean(
            T.abs_(y_pred4_temp_val -
                   y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred4_temp_val)
        totReal = T.sum(y[:, :, 3])
        relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned4_val = 1 - T.sum(
            T.abs_(y_pred4_temp_val - y[:, :, 3].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,3] * reader.stdTrain[3]) + reader.meanTrain[3]
        #y_pred4_temp_val = (y_pred4_temp_val * reader.stdTrain[3]) + reader.meanTrain[3]
        #mse4_valUnNorm = T.mean((y_pred4_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae4_valUnNorm = T.mean( T.abs_(y_pred4_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse4_val.name = 'mse4_val'
        mae4_val.name = 'mae4_val'

        theta_mu4_in_val = theta_mu4_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig4_in_val = theta_sig4_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu4_in_val, theta_sig4_in_val,
                                     coeff4_in_val)
        totaMSE_val += mse4_val
        totaMAE_val += mae4_val
        indexSepDynamic_val += 2

    if (y_dim > 4):
        theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val, y_pred5_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu5_temp_val.name = 'theta_mu5_val'
        theta_sig5_temp_val.name = 'theta_sig5_val'
        coeff5_temp_val.name = 'coeff5_val'
        y_pred5_temp_val.name = 'disaggregation5_val'
        y_pred5_temp_val = T.clip(y_pred5_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred5_temp_val],
                                       axis=2)  # before it gets unnormalized

        mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae5_val = T.mean(
            T.abs_(y_pred5_temp_val -
                   y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred5_temp_val)
        totReal = T.sum(y[:, :, 4])
        relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned5_val = 1 - T.sum(
            T.abs_(y_pred5_temp_val - y[:, :, 4].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,4] * reader.stdTrain[4]) + reader.meanTrain[4]
        #y_pred5_temp_val = (y_pred5_temp_val * reader.stdTrain[4]) + reader.meanTrain[4]
        #mse5_valUnNorm = T.mean((y_pred5_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae5_valUnNorm = T.mean( T.abs_(y_pred5_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse5_val.name = 'mse5_val'
        mae5_val.name = 'mae5_val'

        theta_mu5_in_val = theta_mu5_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig5_in_val = theta_sig5_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu5_in_val, theta_sig5_in_val,
                                     coeff5_in_val)
        totaMSE_val += mse5_val
        totaMAE_val += mae5_val
        indexSepDynamic_val += 2

    if (y_dim > 5):
        theta_mu6_temp_val, theta_sig6_temp_val, coeff6_temp_val, y_pred6_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu6_temp_val.name = 'theta_mu6_val'
        theta_sig6_temp_val.name = 'theta_sig6_val'
        coeff6_temp_val.name = 'coeff6_val'
        y_pred6_temp_val.name = 'disaggregation6_val'
        y_pred6_temp_val = T.clip(y_pred6_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred6_temp_val],
                                       axis=2)  #before it gets unnormalized

        mse6_val = T.mean((y_pred6_temp_val - y[:, :, 5].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae6_val = T.mean(
            T.abs_(y_pred6_temp_val -
                   y[:, :, 5].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred6_temp_val)
        totReal = T.sum(y[:, :, 5])
        relErr6_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned6_val = 1 - T.sum(
            T.abs_(y_pred6_temp_val - y[:, :, 5].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,5] * reader.stdTrain[5]) + reader.meanTrain[5]
        #y_pred6_temp_val = (y_pred6_temp_val * reader.stdTrain[5]) + reader.meanTrain[5]
        #mse6_valUnNorm = T.mean((y_pred6_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae6_valUnNorm = T.mean( T.abs_(y_pred6_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse6_val.name = 'mse6_val'
        mae6_val.name = 'mae6_val'

        theta_mu6_in_val = theta_mu6_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig6_in_val = theta_sig6_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff6_in_val = coeff6_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu6_in_val, theta_sig6_in_val,
                                     coeff6_in_val)
        totaMSE_val += mse6_val
        totaMAE_val += mae6_val
        indexSepDynamic_val += 2

    if (y_dim > 6):
        theta_mu7_temp_val, theta_sig7_temp_val, coeff7_temp_val, y_pred7_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu7_temp_val.name = 'theta_mu7_val'
        theta_sig7_temp_val.name = 'theta_sig7_val'
        coeff7_temp_val.name = 'coeff7_val'
        y_pred7_temp_val.name = 'disaggregation7_val'
        y_pred7_temp_val = T.clip(y_pred7_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred7_temp_val],
                                       axis=2)  # before it gets unnormalized

        mse7_val = T.mean((y_pred7_temp_val - y[:, :, 6].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae7_val = T.mean(
            T.abs_(y_pred7_temp_val -
                   y[:, :, 6].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred7_temp_val)
        totReal = T.sum(y[:, :, 6])
        relErr7_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned7_val = 1 - T.sum(
            T.abs_(y_pred7_temp_val - y[:, :, 6].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,6] * reader.stdTrain[6]) + reader.meanTrain[6]
        #y_pred7_temp_val = (y_pred7_temp_val * reader.stdTrain[6]) + reader.meanTrain[6]
        #mse7_valUnNorm = T.mean((y_pred7_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae7_valUnNorm = T.mean( T.abs_(y_pred7_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse7_val.name = 'mse7_val'
        mae7_val.name = 'mae7_val'

        theta_mu7_in_val = theta_mu7_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig7_in_val = theta_sig7_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff7_in_val = coeff7_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu7_in_val, theta_sig7_in_val,
                                     coeff7_in_val)
        totaMSE_val += mse7_val
        totaMAE_val += mae7_val
        indexSepDynamic_val += 2

    if (y_dim > 7):
        theta_mu8_temp_val, theta_sig8_temp_val, coeff8_temp_val, y_pred8_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu8_temp_val.name = 'theta_mu8_val'
        theta_sig8_temp_val.name = 'theta_sig8_val'
        coeff8_temp_val.name = 'coeff8_val'
        y_pred8_temp_val.name = 'disaggregation8_val'
        y_pred8_temp_val = T.clip(y_pred8_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred8_temp_val],
                                       axis=2)  # before it gets unnormalized

        mse8_val = T.mean((y_pred8_temp_val - y[:, :, 7].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae8_val = T.mean(
            T.abs_(y_pred8_temp_val -
                   y[:, :, 7].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred8_temp_val)
        totReal = T.sum(y[:, :, 7])
        relErr8_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned8_val = 1 - T.sum(
            T.abs_(y_pred8_temp_val - y[:, :, 7].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,7] * reader.stdTrain[7]) + reader.meanTrain[7]
        #y_pred8_temp_val = (y_pred8_temp_val * reader.stdTrain[7]) + reader.meanTrain[7]
        #mse8_valUnNorm = T.mean((y_pred8_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae8_valUnNorm = T.mean( T.abs_(y_pred8_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse8_val.name = 'mse8_val'
        mae8_val.name = 'mae8_val'

        theta_mu8_in_val = theta_mu8_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig8_in_val = theta_sig8_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff8_in_val = coeff8_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu8_in_val, theta_sig8_in_val,
                                     coeff8_in_val)
        totaMSE_val += mse8_val
        totaMAE_val += mae8_val
        indexSepDynamic_val += 2

    recon_val = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val,
        *argsGMM_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out'
    totaMSE_val = totaMSE_val / y_dim
    totaMAE_val = totaMAE_val / y_dim

    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term'

    ######################

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"

    lr_iterations = {0: lr}

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[
            prediction_val,
            recon_term_val,
            totaMSE_val,
            totaMAE_val,
            mse1_val,
            mse2_val,
            mse3_val,
            mse4_val,
            mse5_val,
            mse6_val,
            mse7_val,
            mse8_val,
            mae1_val,
            mae2_val,
            mae3_val,
            mae4_val,
            mae5_val,
            mae6_val,
            mae7_val,
            mae8_val,  #unnormalized mae and mse 16 items#
            relErr1_val,
            relErr2_val,
            relErr3_val,
            relErr4_val,
            relErr5_val,
            relErr6_val,
            relErr7_val,
            relErr8_val,
            propAssigned1_val,
            propAssigned2_val,
            propAssigned3_val,
            propAssigned4_val,
            propAssigned5_val,
            propAssigned6_val,
            propAssigned7_val,
            propAssigned8_val
        ],
        updates=updates_val)
    testOutput = []
    testMetrics2 = []
    perEnergyAssig = []

    bestInstsancesPred = []
    bestInstsancesDisa = []
    bestInstsancesAggr = []

    numBatchTest = 0

    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])
        testOutput.append(
            outputGeneration[1:20])  #before 36 including unnormalized metrics
        testMetrics2.append(outputGeneration[20:])

        ########## best mae
        predTest = np.transpose(outputGeneration[0], [1, 0, 2]).clip(min=0)
        realTest = np.transpose(batch[2], [1, 0, 2])

        batchMSE = np.mean(np.absolute(predTest - realTest), axis=(1, 2))
        idxMin = np.argmin(batchMSE)

        #print(np.asarray(idxMin).reshape(1,-1)[0,:])
        #print(batchMSE[idxMin])
        for idx in np.asarray(idxMin).reshape(1, -1)[0, :]:

            plt.figure(1)
            plt.plot(predTest[idx])
            plt.legend(appliances)
            plt.savefig(
                save_path +
                "/vrnn_disall_test-b{}_Pred_0-{}".format(numBatchTest, idx),
                format='eps')
            plt.clf()

            plt.figure(2)
            plt.plot(realTest[idx])
            plt.legend(appliances)
            plt.savefig(save_path +
                        "/vrnn_disall_test-b{}_RealDisag_0-{}".format(
                            numBatchTest, idx),
                        format='eps')
            plt.clf()

            plt.figure(3)
            plt.plot(np.transpose(batch[0], [1, 0, 2])[idx])
            plt.savefig(
                save_path +
                "/vrnn_disall_test-b{}_Realagg_0-{}".format(numBatchTest, idx),
                format='eps')
            plt.clf()

            bestInstsancesPred.append(predTest[idx])
            bestInstsancesDisa.append(realTest[idx])
            bestInstsancesAggr.append(np.transpose(batch[0], [1, 0, 2])[idx])

        numBatchTest += 1

        sumNumPred = np.sum(predTest, axis=(0, 1))
        sumNumReal = np.sum(batch[2], axis=(0, 1))
        perEnergy = np.sum(batch[0], axis=(0, 1))
        perEnergyAssig.append((sumNumReal / perEnergy, sumNumPred / perEnergy))

    scipy.io.savemat(save_path + '/testInstances.mat',
                     mdict={
                         'pred': bestInstsancesPred,
                         'disag': bestInstsancesDisa,
                         'agg': bestInstsancesAggr
                     })

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)

    testOutput[:, 19:] = 1000 * testOutput[:, 19:]  # kwtts a watts
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    mse1_test = testOutput[:, 3].mean()
    mae1_test = testOutput[:, 11].mean()
    mse2_test = testOutput[:, 4].mean()
    mae2_test = testOutput[:, 12].mean()
    mse3_test = testOutput[:, 5].mean()
    mae3_test = testOutput[:, 13].mean()
    mse4_test = testOutput[:, 6].mean()
    mae4_test = testOutput[:, 14].mean()
    mse5_test = testOutput[:, 7].mean()
    mae5_test = testOutput[:, 15].mean()
    mse6_test = testOutput[:, 8].mean()
    mae6_test = testOutput[:, 16].mean()
    mse7_test = testOutput[:, 9].mean()
    mae7_test = testOutput[:, 17].mean()
    mse8_test = testOutput[:, 10].mean()
    mae8_test = testOutput[:, 18].mean()

    print(testOutput[:, 3:11].mean(), testOutput[:, 11:19].mean())

    relErr1_test = testMetrics2[:, 0].mean()
    relErr2_test = testMetrics2[:, 1].mean()
    relErr3_test = testMetrics2[:, 2].mean()
    relErr4_test = testMetrics2[:, 3].mean()
    relErr5_test = testMetrics2[:, 4].mean()
    relErr6_test = testMetrics2[:, 5].mean()
    relErr7_test = testMetrics2[:, 6].mean()
    relErr8_test = testMetrics2[:, 7].mean()

    propAssigned1_test = testMetrics2[:, 8].mean()
    propAssigned2_test = testMetrics2[:, 9].mean()
    propAssigned3_test = testMetrics2[:, 10].mean()
    propAssigned4_test = testMetrics2[:, 11].mean()
    propAssigned5_test = testMetrics2[:, 12].mean()
    propAssigned6_test = testMetrics2[:, 13].mean()
    propAssigned7_test = testMetrics2[:, 14].mean()
    propAssigned8_test = testMetrics2[:, 15].mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n\n")
    fLog.write(
        "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test, mse6_test,mse7_test,mse8_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test, mae6_test,mae7_test,mae8_test,mseTest,maeTest\n"
    )
    #fLog.write("Unnorm,{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},0.0,0.0\n\n".format(mse1_valUnNorm,mse2_valUnNorm,mse3_valUnNorm,mse4_valUnNorm,mse5_valUnNorm, mse6_valUnNorm,mse7_valUnNorm,mse8_valUnNorm,mae1_valUnNorm,mae2_valUnNorm,mae3_valUnNorm,mae4_valUnNorm,mae5_valUnNorm, mae6_valUnNorm,mae7_valUnNorm,mae8_valUnNorm))
    fLog.write(
        "{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n\n"
        .format(recon_test, mse1_test, mse2_test, mse3_test, mse4_test,
                mse5_test, mse6_test, mse7_test, mse8_test, mae1_test,
                mae2_test, mae3_test, mae4_test, mae5_test, mae6_test,
                mae7_test, mae8_test, mse_test, mae_test))
    fLog.write(
        "relErr1,relErr2,relErr3,relErr4,relErr5,relErr6,relErr7,relErr8,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(
        relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test,
        relErr6_test, relErr7_test, relErr8_test, propAssigned1_test,
        propAssigned2_test, propAssigned3_test, propAssigned4_test,
        propAssigned5_test, propAssigned6_test, propAssigned7_test,
        propAssigned8_test))

    fLog.write(
        "batch,perReal1,perReal2,perReal3,perReal4,perReal5,perReal6,perReal7,perReal8,perPredict1,perPredict2,perPredict3,perPredict4,perPredict5,perPredict6,perPredict7,perPredict8\n"
    )
    for batch, item in enumerate(perEnergyAssig):
        fLog.write(
            "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(
                batch, item[0][0], item[0][1], item[0][2], item[0][3],
                item[0][4], item[0][5], item[0][6], item[0][7], item[1][0],
                item[1][1], item[1][2], item[1][3], item[1][4], item[1][5],
                item[1][6], item[1][7]))
    fLog.write(pickleModel)
    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()

Esempio n. 32

Mostra file

def evaluate_lenet5(learning_rate=0.02, n_epochs=4, L2_weight=1e-5, extra_size=4, emb_size=300, batch_size=100, filter_size=[3,3], maxSentLen=40, hidden_size=[300,300], max_term_len=4, p_mode = 'conc'):

    model_options = locals().copy()
    print "model options", model_options

    seed=1234
    np.random.seed(seed)
    rng = np.random.RandomState(seed)    #random seed, control the model generates the same results


    # all_sentences_l, all_masks_l, all_sentences_r, all_masks_r, all_word1,all_word2,all_word1_mask,all_word2_mask,all_labels, all_extra, word2id  =load_wordnet_hyper_vs_all_with_words(maxlen=maxSentLen, wordlen=max_term_len)  #minlen, include one label, at least one word in the sentence
    # test_sents_l, test_masks_l, test_sents_r, test_masks_r, test_labels, word2id  =load_ACE05_dataset(maxSentLen, word2id)
    word2id = load_word2id(root_dic+'LenciBenotto_word2id.pkl')
    test_sents_l, test_masks_l, test_sents_r, test_masks_r, test_word1,test_word2,test_word1_mask,test_word2_mask,test_labels, test_extra, word2id, group_size_list = load_task_hyper_vs_all_with_allDefComb(LenciBenotto_file,maxSentLen, word2id, wordlen=max_term_len)



    test_sents_l=np.asarray(test_sents_l, dtype='int32')

    test_masks_l=np.asarray(test_masks_l, dtype=theano.config.floatX)


    test_sents_r=np.asarray(test_sents_r, dtype='int32')


    test_masks_r=np.asarray(test_masks_r, dtype=theano.config.floatX)


    test_word1=np.asarray(test_word1, dtype='int32')
    test_word2=np.asarray(test_word2, dtype='int32')


    test_word1_mask=np.asarray(test_word1_mask, dtype=theano.config.floatX)
    test_word2_mask=np.asarray(test_word2_mask, dtype=theano.config.floatX)


    test_labels_store=np.asarray(test_labels, dtype='int32')

    test_extra=np.asarray(test_extra, dtype=theano.config.floatX)

    # train_size=len(train_labels_store)
    # dev_size=len(dev_labels_store)
    test_size=len(test_sents_l)
    print ' test size: ', test_size

    vocab_size=len(word2id)+1


    rand_values=rng.normal(0.0, 0.01, (vocab_size, emb_size))   #generate a matrix by Gaussian distribution
    rand_values[0]=np.array(np.zeros(emb_size),dtype=theano.config.floatX)
    id2word = {y:x for x,y in word2id.iteritems()}
    word2vec=load_word2vec()
    rand_values=load_word2vec_to_init(rand_values, id2word, word2vec)
    init_embeddings=theano.shared(value=np.array(rand_values,dtype=theano.config.floatX), borrow=True)   #wrap up the python variable "rand_values" into theano variable
    # load_model_from_file(root_dic+'Weeds_best_para_init_embeddings', [init_embeddings])

    #now, start to build the input form of the model
    sents_ids_l=T.imatrix()
    sents_mask_l=T.fmatrix()
    sents_ids_r=T.imatrix()
    sents_mask_r=T.fmatrix()
    word1_ids = T.imatrix()
    word2_ids = T.imatrix()
    word1_mask = T.fmatrix()
    word2_mask = T.fmatrix()
    extra = T.fvector()
    labels=T.ivector()
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    def embed_input(emb_matrix, sent_ids):
        return emb_matrix[sent_ids.flatten()].reshape((batch_size,maxSentLen, emb_size)).dimshuffle(0,2,1)

    embed_input_l=embed_input(init_embeddings, sents_ids_l)#embeddings[sents_ids_l.flatten()].reshape((batch_size,maxSentLen, emb_size)).dimshuffle(0,2,1) #the input format can be adapted into CNN or GRU or LSTM
    embed_input_r=embed_input(init_embeddings, sents_ids_r)#embeddings[sents_ids_r.flatten()].reshape((batch_size,maxSentLen, emb_size)).dimshuffle(0,2,1)

    embed_word1 = init_embeddings[word1_ids.flatten()].reshape((batch_size,word1_ids.shape[1], emb_size))
    embed_word2 = init_embeddings[word2_ids.flatten()].reshape((batch_size,word2_ids.shape[1], emb_size))
    word1_embedding = T.sum(embed_word1*word1_mask.dimshuffle(0,1,'x'), axis=1)
    word2_embedding = T.sum(embed_word2*word2_mask.dimshuffle(0,1,'x'), axis=1)


    '''create_AttentiveConv_params '''
    conv_W, conv_b=create_conv_para(rng, filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]))
    conv_W_context, conv_b_context=create_conv_para(rng, filter_shape=(hidden_size[1], 1, emb_size, 1))

    NN_para=[conv_W, conv_b,conv_W_context]

    '''
    attentive convolution function
    '''
    term_vs_term_layer = Conv_for_Pair(rng,
            origin_input_tensor3=embed_word1.dimshuffle(0,2,1),
            origin_input_tensor3_r = embed_word2.dimshuffle(0,2,1),
            input_tensor3=embed_word1.dimshuffle(0,2,1),
            input_tensor3_r = embed_word2.dimshuffle(0,2,1),
             mask_matrix = word1_mask,
             mask_matrix_r = word2_mask,
             image_shape=(batch_size, 1, emb_size, max_term_len),
             image_shape_r = (batch_size, 1, emb_size, max_term_len),
             filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]),
             filter_shape_context=(hidden_size[1], 1,emb_size, 1),
             W=conv_W, b=conv_b,
             W_context=conv_W_context, b_context=conv_b_context)
    tt_embeddings_l = term_vs_term_layer.attentive_maxpool_vec_l
    tt_embeddings_r = term_vs_term_layer.attentive_maxpool_vec_r

    p_ww = T.concatenate([tt_embeddings_l,tt_embeddings_r,tt_embeddings_l*tt_embeddings_r,tt_embeddings_l-tt_embeddings_r], axis=1)

    term_vs_def_layer = Conv_for_Pair(rng,
            origin_input_tensor3=embed_word1.dimshuffle(0,2,1),
            origin_input_tensor3_r = embed_input_r,
            input_tensor3=embed_word1.dimshuffle(0,2,1),
            input_tensor3_r = embed_input_r,
             mask_matrix = word1_mask,
             mask_matrix_r = sents_mask_r,
             image_shape=(batch_size, 1, emb_size, max_term_len),
             image_shape_r = (batch_size, 1, emb_size, maxSentLen),
             filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]),
             filter_shape_context=(hidden_size[1], 1,emb_size, 1),
             W=conv_W, b=conv_b,
             W_context=conv_W_context, b_context=conv_b_context)
    td_embeddings_l = term_vs_def_layer.attentive_maxpool_vec_l
    td_embeddings_r = term_vs_def_layer.attentive_maxpool_vec_r
    p_wd = T.concatenate([td_embeddings_l,td_embeddings_r,td_embeddings_l*td_embeddings_r,td_embeddings_l-td_embeddings_r], axis=1)


    def_vs_term_layer = Conv_for_Pair(rng,
            origin_input_tensor3=embed_input_l,
            origin_input_tensor3_r = embed_word2.dimshuffle(0,2,1),
            input_tensor3=embed_input_l,
            input_tensor3_r = embed_word2.dimshuffle(0,2,1),
             mask_matrix = sents_mask_l,
             mask_matrix_r = word2_mask,
             image_shape=(batch_size, 1, emb_size, maxSentLen),
             image_shape_r = (batch_size, 1, emb_size, max_term_len),
             filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]),
             filter_shape_context=(hidden_size[1], 1,emb_size, 1),
             W=conv_W, b=conv_b,
             W_context=conv_W_context, b_context=conv_b_context)
    dt_embeddings_l = def_vs_term_layer.attentive_maxpool_vec_l
    dt_embeddings_r = def_vs_term_layer.attentive_maxpool_vec_r

    p_dw = T.concatenate([dt_embeddings_l,dt_embeddings_r,dt_embeddings_l*dt_embeddings_r,dt_embeddings_l-dt_embeddings_r], axis=1)


    def_vs_def_layer = Conv_for_Pair(rng,
            origin_input_tensor3=embed_input_l,
            origin_input_tensor3_r = embed_input_r,
            input_tensor3=embed_input_l,
            input_tensor3_r = embed_input_r,
             mask_matrix = sents_mask_l,
             mask_matrix_r = sents_mask_r,
             image_shape=(batch_size, 1, emb_size, maxSentLen),
             image_shape_r = (batch_size, 1, emb_size, maxSentLen),
             filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]),
             filter_shape_context=(hidden_size[1], 1,emb_size, 1),
             W=conv_W, b=conv_b,
             W_context=conv_W_context, b_context=conv_b_context)
    dd_embeddings_l = def_vs_def_layer.attentive_maxpool_vec_l
    dd_embeddings_r = def_vs_def_layer.attentive_maxpool_vec_r
    p_dd = T.concatenate([dd_embeddings_l,dd_embeddings_r,dd_embeddings_l*dd_embeddings_r,dd_embeddings_l-dd_embeddings_r], axis=1)

    if p_mode == 'conc':
        p=T.concatenate([p_ww, p_wd, p_dw, p_dd], axis=1)
        p_len = 4*4*hidden_size[1]
    else:
        p = T.max(T.concatenate([p_ww.dimshuffle('x',0,1),p_wd.dimshuffle('x',0,1),p_dw.dimshuffle('x',0,1),p_dd.dimshuffle('x',0,1)],axis=0), axis=0)
        p_len =4*hidden_size[1]

    # HL_input = T.concatenate([p,cosine_matrix1_matrix2_rowwise(word1_embedding,word2_embedding).dimshuffle(0,'x'),extra.dimshuffle(0,'x')],axis=1)
    # HL_input_size=p_len+1+1
    #
    # HL_layer_1=HiddenLayer(rng, input=HL_input, n_in=HL_input_size, n_out=hidden_size[1], activation=T.tanh)


    "form input to LR classifier"
    LR_input = T.concatenate([p,cosine_matrix1_matrix2_rowwise(word1_embedding,word2_embedding).dimshuffle(0,'x'),extra.dimshuffle(0,'x')],axis=1)
    LR_input_size=p_len+1+1
    # LR_input = HL_layer_1.output
    # LR_input_size = hidden_size[1]

    U_a = create_ensemble_para(rng, 2, LR_input_size) # the weight matrix hidden_size*2
    LR_b = theano.shared(value=np.zeros((2,),dtype=theano.config.floatX),name='LR_b', borrow=True)  #bias for each target class
    LR_para=[U_a, LR_b]


    layer_LR=LogisticRegression(rng, input=LR_input, n_in=LR_input_size, n_out=2, W=U_a, b=LR_b, bias=0.25) #basically it is a multiplication between weight matrix and input feature vector
    loss=layer_LR.negative_log_likelihood(labels)  #for classification task, we usually used negative log likelihood as loss, the lower the better.


    # L2_reg = (conv_W**2).sum()+(conv_W_context**2).sum()+(U_a**2).sum()





    params = NN_para+LR_para #[init_embeddings]


    # load_model_from_file('/save/wenpeng/datasets/HypeNet/HyperDef_label_meta_best_para_0.938730853392', params)
    load_model_from_file(root_dic+'LenciBenotto_best_para_0.557286573332', params)

    '''
    0.552587544259; current ap: 0.574037513126 ap@100 0.918481316424
    0.557286573332; current ap: 0.576498645289 ap@100 0.909032657538
    '''

    test_model = theano.function([sents_ids_l, sents_mask_l, sents_ids_r, sents_mask_r, word1_ids,word2_ids,word1_mask,word2_mask,extra], [layer_LR.y_pred,layer_LR.prop_for_posi], allow_input_downcast=True, on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 50000000000  # look as this many examples regardless
    start_time = time.time()
    mid_time = start_time
    past_time= mid_time
    epoch = 0
    done_looping = False


    n_test_batches=test_size/batch_size
    n_test_remain = test_size%batch_size
    if n_test_remain!=0:
        test_batch_start=list(np.arange(n_test_batches)*batch_size)+[test_size-batch_size]
    else:
        test_batch_start=list(np.arange(n_test_batches)*batch_size)



    # max_acc_dev=0.0
    max_ap_test=0.0
    max_ap_topk_test=0.0
    max_f1=0.0


    pred_labels =[]
    probs = []
    gold_labels =[]
    error_sum=0.0
    for idd, test_batch_id in enumerate(test_batch_start): # for each test batch
        pred_i, prob_i=test_model(
                test_sents_l[test_batch_id:test_batch_id+batch_size],
                test_masks_l[test_batch_id:test_batch_id+batch_size],
                test_sents_r[test_batch_id:test_batch_id+batch_size],
                test_masks_r[test_batch_id:test_batch_id+batch_size],
                test_word1[test_batch_id:test_batch_id+batch_size],
                test_word2[test_batch_id:test_batch_id+batch_size],
                test_word1_mask[test_batch_id:test_batch_id+batch_size],
                test_word2_mask[test_batch_id:test_batch_id+batch_size],
                test_extra[test_batch_id:test_batch_id+batch_size])

        # error_sum+=error_i
        pred_labels+=list(pred_i)
        probs+=list(prob_i)

    print len(test_sents_l), len(probs)
    if n_test_remain !=0:
        probs = probs[:(len(test_batch_start)-1)*batch_size]+probs[-n_test_remain:]
    print len(test_sents_l), len(probs)
    assert len(test_sents_l) == len(probs)
    assert sum(group_size_list) == len(probs)
    #max prob in group
    max_probs = []
    prior_size = 0
    for i in range(len(group_size_list)):

        sub_probs = probs[prior_size:prior_size+group_size_list[i]]
        prior_size += group_size_list[i]
        max_probs.append(max(sub_probs))

    print len(group_size_list),len(max_probs),len(test_labels)
    assert len(test_labels) == len(max_probs)
    # test_acc=1.0-error_sum/(len(test_batch_start))
    test_ap = apk(test_labels, max_probs, k=len(test_labels))
    test_ap_top100 = apk(test_labels, max_probs, k=100)


    # if test_ap > max_ap_test:
    #     max_ap_test=test_ap
    #     store_model_to_file('/save/wenpeng/datasets/EVALution/HyperDef_label_4ways_conc_test_on_EVA_allDefComb_best_para_'+str(max_ap_test), params)
    # if test_ap_top100 > max_ap_topk_test:
    #     max_ap_topk_test=test_ap_top100
    print '\t\tcurrent ap:', test_ap,'ap@100', test_ap_top100

Esempio n. 33

Mostra file

  def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
    self.K = K
    lr = np.float32(2.5e-4)
    mu = np.float32(0)
    decay = np.float32(0.99)
    eps = np.float32(1e-10)

    # inputs and targets
    X = T.ftensor4('X')
    G = T.fvector('G')
    actions = T.ivector('actions')

    # create the graph
    self.conv_layers = []
    num_input_filters = 4 # number of filters / color channels
    for num_output_filters, filtersz, stride in conv_layer_sizes:
      layer = ConvLayer(num_input_filters, num_output_filters, filtersz, stride)
      self.conv_layers.append(layer)
      num_input_filters = num_output_filters


    ##### debug #####
    # Z = X / 255.0
    # j = 0
    # for layer in self.conv_layers:
    #   Z = layer.forward(Z)
    #   out = Z
    #   op = theano.function(inputs=[X], outputs=out, allow_input_downcast=True)
    #   test = op(np.random.randn(1, 4, IM_SIZE, IM_SIZE))
    #   print("output size after conv %d: %s" % (j, test.shape))
    #   j += 1


    # get conv output size
    Z = X / 255.0
    for layer in self.conv_layers:
      Z = layer.forward(Z)
    conv_out = Z.flatten(ndim=2)
    conv_out_op = theano.function(inputs=[X], outputs=conv_out, allow_input_downcast=True)
    test = conv_out_op(np.random.randn(1, 4, IM_SIZE, IM_SIZE))
    flattened_ouput_size = test.shape[1]


    # build fully connected layers
    self.layers = []
    M1 = flattened_ouput_size
    for M2 in hidden_layer_sizes:
      layer = HiddenLayer(M1, M2)
      self.layers.append(layer)
      M1 = M2

    # final layer
    layer = HiddenLayer(M1, K, lambda x: x)
    self.layers.append(layer)

    # collect params for copy
    self.params = []
    for layer in (self.conv_layers + self.layers):
      self.params += layer.params
    

    # calculate final output and cost
    Z = conv_out
    for layer in self.layers:
      Z = layer.forward(Z)
    Y_hat = Z

    selected_action_values = Y_hat[T.arange(actions.shape[0]), actions]
    cost = T.mean((G - selected_action_values)**2)

    # create train function
    # we need to ensure cache is updated before parameter update
    # by creating a list of new_caches
    # and using them in the parameter update
    grads = T.grad(cost, self.params)
    caches = [theano.shared(np.ones_like(p.get_value())) for p in self.params]
    new_caches = [decay*c + (np.float32(1) - decay)*g*g for c, g in zip(caches, grads)]

    c_update = [(c, new_c) for c, new_c in zip(caches, new_caches)]
    g_update = [
      (p, p - lr*g / T.sqrt(new_c + eps)) for p, new_c, g in zip(self.params, new_caches, grads)
    ]
    updates = c_update + g_update

    # compile functions
    self.train_op = theano.function(
      inputs=[X, G, actions],
      updates=updates,
      allow_input_downcast=True
    )

    self.predict_op = theano.function(
      inputs=[X],
      outputs=Y_hat,
      allow_input_downcast=True
    )

Esempio n. 34

Mostra file

    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=1,
                 activation_method="Sigmoid"):
        """This class is made to support a variable number of layers.

    :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.activation = T.nnet.sigmoid

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data

        # the data is presented as rasterized images
        self.x = T.matrix('x')

        # the labels are presented as 1D vector of [int] labels
        self.y = T.fvector('y')

        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=self.activation)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LinearRegression(input=self.sigmoid_layers[-1].output,
                                         n_in=hidden_layers_sizes[-1],
                                         n_out=n_outs,
                                         l2=0,
                                         l1=0)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

Esempio n. 35

Mostra file

File: neural_architectures.py Progetto: shiquanyang/GraphLSTM_release

def RelationStackMaker(chips, params, graph=False, weighted=False, batched=False):
    if batched:
        emb_input = T.itensor3('emb_input')
        entities_tv = [T.fmatrix('enidx_'+str(i)).astype(theano.config.floatX) for i in range(params['num_entity'])]
        if graph:
            if weighted:
                masks = T.ftensor4('child_mask')
            else:
                masks = T.ftensor3('child_mask')
        else:
            masks = T.fmatrix('batch_mask')
    else:
        emb_input = T.imatrix('emb_input')
        entities_tv = [T.fvector('enidx_'+str(i)).astype(theano.config.floatX) for i in range(params['num_entity'])]
        if graph:
            if weighted:
                masks = T.ftensor3('child_mask')
            else:
                masks = T.fmatrix('child_mask')
        else:
            masks = None
    #print masks, type(masks), masks.ndim
    current_chip = Start(params['voc_size'], emb_input)  
    print ('\n', 'Building Stack now', '\n', 'Start: ', params['voc_size'], 'out_tv dim:', current_chip.output_tv.ndim)
    instantiated_chips = stackLayers(chips, current_chip, params, entity_size=params['num_entity'])
    regularizable_params = computeLayers(instantiated_chips, current_chip, params, entities_input=entities_tv, mask=masks)
    ### Debug use: Get the attention co-efficiency and visualize. ###
    for c in instantiated_chips:
        if c[1].endswith('Entity_Att'):
            assert hasattr(c[0], 'att_wt_arry')
            assert hasattr(c[0], 'entity_tvs')
            attention_weights = c[0].att_wt_arry
            entity_tvs = c[0].entity_tvs
    
    current_chip = instantiated_chips[-1][0]
    if current_chip.output_tv.ndim == 2:
        pred_y = current_chip.output_tv #T.argmax(current_chip.output_tv, axis=1)
    else:
        pred_y = current_chip.output_tv #T.argmax(current_chip.output_tv) #, axis=1)
    gold_y = (current_chip.gold_y
            if hasattr(current_chip, 'gold_y')
            else None)
    # Show all parameters that would be needed in this system
    params_needed = calculate_params_needed(instantiated_chips)
    print ("Parameters Needed", params_needed)
    for k in params_needed:
        assert k in params, k
        print (k, params[k])
    assert hasattr(current_chip, 'score')
    cost = current_chip.score #/ params['nsentences'] 
    cost_arr = [cost]
    for layer in instantiated_chips[:-1]:
        if hasattr(layer[0], 'score'):
            print (layer[1])
            cost += params['cost_coef'] * layer[0].score
            cost_arr.append(params['cost_coef'] * layer[0].score)

    grads = T.grad(cost,
            wrt=regularizable_params)
            #[params[k] for k in params if (hasattr(params[k], 'is_regularizable') and params[k].is_regularizable)])
    print ('Regularizable parameters:')
    for k, v in params.items():
        if hasattr(v, 'is_regularizable'):
            print (k, v, v.is_regularizable)
    if graph or batched:
        #return (emb_input, masks, entities_tv, attention_weights, entity_tvs, gold_y, pred_y, cost, grads, regularizable_params) 
        return (emb_input, masks, entities_tv, gold_y, pred_y, cost, grads, regularizable_params) 
    else: 
        return (emb_input, entities_tv, gold_y, pred_y, cost, grads, regularizable_params)

Esempio n. 36

Mostra file

def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'dp_disall-sch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])
    loadType = int(args['loadType'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])
    typeActivFunc = args['typeActivFunc']

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path
    print(str(windows))

    q_z_dim = 500
    p_z_dim = 500
    p_x_dim = 500
    x2s_dim = 200
    y2s_dim = 200
    z2s_dim = 200
    lr_iterations = {0: lr}

    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_redd(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.5,
        valPer=0.25,
        testPer=0.25,
        typeLoad=loadType,
        flgAggSumScaled=1,
        flgFilterZeros=1)

    print(Xtrain.shape, Xval.shape, Xtest.shape, ytrain.shape, yval.shape,
          ytest.shape)
    print("Mean ", reader.meanTraining)
    print("Std", reader.stdTraining)
    instancesPlot = {0: [4]}

    train_data = Redd(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Redd(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Redd(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1', 'y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1', 's_tm1'],
                                  parent_dim=[x2s_dim, rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit=typeActivFunc,
                                    init_W=init_W,
                                    init_b=init_b)

    if (y_dim > 1):
        theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    if (y_dim > 2):
        theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    if (y_dim > 3):
        theta_mu4 = FullyConnectedLayer(name='theta_mu4',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    if (y_dim > 1):
        theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    if (y_dim > 2):
        theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    if (y_dim > 3):
        theta_sig4 = FullyConnectedLayer(name='theta_sig4',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    if (y_dim > 1):
        coeff2 = FullyConnectedLayer(name='coeff2',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    if (y_dim > 2):
        coeff3 = FullyConnectedLayer(name='coeff3',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    if (y_dim > 3):
        coeff4 = FullyConnectedLayer(name='coeff4',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1
    ]

    dynamicOutput = [None, None, None, None, None, None, None, None]
    if (y_dim > 1):
        nodes = nodes + [theta_mu2, theta_sig2, coeff2]
        dynamicOutput = dynamicOutput + [None, None, None, None
                                         ]  #mu, sig, coef and pred
    if (y_dim > 2):
        nodes = nodes + [theta_mu3, theta_sig3, coeff3]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 3):
        nodes = nodes + [theta_mu4, theta_sig4, coeff4]
        dynamicOutput = dynamicOutput + [None, None, None, None]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    output_fn = [s_0] + dynamicOutput
    output_fn_val = [s_0] + dynamicOutput[2:]
    print(len(output_fn), len(output_fn_val))

    def inner_fn(x_t, y_t, scheduleSamplingMask, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)

        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        ## prediction 1
        y_pred = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred

        if (y_dim > 1):
            theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
            theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
            coeff2_t = coeff2.fprop([theta_1_t], params)
            y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
            y_pred = T.concatenate([y_pred, y_pred2], axis=1)
            tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t,
                                       y_pred2)

        if (y_dim > 2):
            theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
            theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
            coeff3_t = coeff3.fprop([theta_1_t], params)
            y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
            y_pred = T.concatenate([y_pred, y_pred3], axis=1)
            tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t,
                                       y_pred3)

        if (y_dim > 3):
            theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
            theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
            coeff4_t = coeff4.fprop([theta_1_t], params)
            y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
            y_pred = T.concatenate([y_pred, y_pred4], axis=1)
            tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t,
                                       y_pred4)

        #s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)

        if (scheduleSamplingMask == 1):
            s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        else:
            y_t_aux = y_1.fprop([y_pred], params)
            s_t = rnn.fprop([[x_t, z_1_t, y_t_aux], [s_tm1]], params)

        return (s_t, ) + tupleMulti

        #corr_temp, binary_temp

    (otherResults, updates) = theano.scan(
        fn=inner_fn,
        sequences=[x_1_temp, y_1_temp, scheduleSamplingMask],
        outputs_info=output_fn)  #[s_0, (None)]

    s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,\
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp = otherResults[:9]
    restResults = otherResults[9:]

    for k, v in updates.iteritems():
        k.default_update = v

    #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'
    y_pred1_temp.name = 'disaggregation1'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1 = T.mean(
        T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1))

    ddoutMSEA = []
    ddoutYpreds = [y_pred1_temp]
    indexSepDynamic = 7  #plus two totalmse, totalmae

    totaMAE = T.copy(mae1)
    totaMSE = T.copy(mse1)
    mse2 = T.zeros((1, ))
    mae2 = T.zeros((1, ))
    mse3 = T.zeros((1, ))
    mae3 = T.zeros((1, ))
    mse4 = T.zeros((1, ))
    mae4 = T.zeros((1, ))

    if (y_dim > 1):
        theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu2_temp.name = 'theta_mu2'
        theta_sig2_temp.name = 'theta_sig2'
        coeff2_temp.name = 'coeff2'
        y_pred2_temp.name = 'disaggregation2'
        mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae2 = T.mean(
            T.abs_(y_pred2_temp -
                   y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))
        mse2.name = 'mse2'
        mae2.name = 'mae2'

        theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = theta_mu2_in, theta_sig2_in, coeff2_in

        ddoutMSEA = ddoutMSEA + [mse2, mae2]
        ddoutYpreds = ddoutYpreds + [y_pred2_temp]
        #totaMSE+=mse2
        indexSepDynamic += 2

    if (y_dim > 2):
        theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu3_temp.name = 'theta_mu3'
        theta_sig3_temp.name = 'theta_sig3'
        coeff3_temp.name = 'coeff3'
        y_pred3_temp.name = 'disaggregation3'
        mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae3 = T.mean(
            T.abs_(y_pred3_temp -
                   y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))
        mse3.name = 'mse3'
        mae3.name = 'mae3'

        theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu3_in, theta_sig3_in, coeff3_in)
        ddoutMSEA = ddoutMSEA + [mse3, mae3]
        ddoutYpreds = ddoutYpreds + [y_pred3_temp]
        #totaMSE+=mse3
        indexSepDynamic += 2

    if (y_dim > 3):
        theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu4_temp.name = 'theta_mu4'
        theta_sig4_temp.name = 'theta_sig4'
        coeff4_temp.name = 'coeff4'
        y_pred4_temp.name = 'disaggregation4'
        mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae4 = T.mean(
            T.abs_(y_pred4_temp -
                   y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))
        mse4.name = 'mse4'
        mae4.name = 'mae4'

        theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu4_in, theta_sig4_in, coeff4_in)
        ddoutMSEA = ddoutMSEA + [mse4, mae4]
        ddoutYpreds = ddoutYpreds + [y_pred4_temp]
        #totaMSE+=mse4
        indexSepDynamic += 2

    totaMSE = (mse1 + mse2 + mse3 + mse4) / y_dim
    totaMSE.name = 'mse'

    totaMAE = (mae1 + mae2 + mae3 + mae4) / y_dim
    totaMAE.name = 'mae'

    recon = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, *argsGMM
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    #nll_upper_bound_0 = recon_term + kl_term
    #nll_upper_bound_0.name = 'nll_upper_bound_0'
    if (flgMSE == 1):
        nll_upper_bound = recon_term + kl_term + totaMSE
    else:
        nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    ######################

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, totaMSE, totaMAE, mse1,
                mae1
            ] + ddoutMSEA + ddoutYpreds,
            indexSep=indexSepDynamic,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[recon_term, kl_term, nll_upper_bound, totaMSE, totaMAE],
        n_steps=n_steps,
        extension=extension,
        lr_iterations=lr_iterations,
        k_speedOfconvergence=kSchedSamp)

    mainloop.run()
    '''
    data=Iterator(test_data, batch_size)

    test_fn = theano.function(inputs=[x, y],#[x, y],
                              #givens={x:Xtest},
                              #on_unused_input='ignore',
                              #z=( ,200,1)
                              allow_input_downcast=True,
                              outputs=[prediction_val, recon_term_val, totaMSE_val, totaMAE_val, 
                                        mse1_val,mse2_val,mse3_val,mse4_val,
                                        mae1_val,mae2_val,mae3_val,mae4_val, #unnormalized mae and mse 16 items#
                                        relErr1_val,relErr2_val,relErr3_val,relErr4_val,
                                        propAssigned1_val, propAssigned2_val,propAssigned3_val,propAssigned4_val],
                              updates=updates_val
                              )
    testOutput = []
    testMetrics2 = []
    numBatchTest = 0
    for batch in data:
      outputGeneration = test_fn(batch[0], batch[2])
      testOutput.append(outputGeneration[1:12]) #before 36 including unnormalized metrics
      testMetrics2.append(outputGeneration[12:])
      #{0:[4,20], 2:[5,10]} 
      #if (numBatchTest==0):

      plt.figure(1)
      plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(2)
      plt.plot(np.transpose(batch[2],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(3)
      plt.plot(np.transpose(batch[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
      plt.clf()
      numBatchTest+=1

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)

    testOutput[:,19:] = 1000 * testOutput[:,19:] # kwtts a watts
    recon_test = testOutput[:, 0].mean()
    mse_test =  testOutput[:, 1].mean()
    mae_test =  testOutput[:, 2].mean()
    mse1_test =  testOutput[:, 3].mean()
    mae1_test =  testOutput[:, 7].mean()
    mse2_test =  testOutput[:, 4].mean()
    mae2_test =  testOutput[:, 8].mean()
    mse3_test =  testOutput[:, 5].mean()
    mae3_test =  testOutput[:, 9].mean()
    mse4_test =  testOutput[:, 6].mean()
    mae4_test =  testOutput[:, 10].mean()


    print(testOutput[:,3:11].mean(),testOutput[:,11:19].mean())

    relErr1_test = testMetrics2[:,0].mean()
    relErr2_test = testMetrics2[:,1].mean()
    relErr3_test = testMetrics2[:,2].mean()
    relErr4_test = testMetrics2[:,3].mean()

    propAssigned1_test = testMetrics2[:, 8].mean()
    propAssigned2_test = testMetrics2[:, 9].mean()
    propAssigned3_test = testMetrics2[:, 10].mean()
    propAssigned4_test = testMetrics2[:, 11].mean()
    '''

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n\n")

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    fLog.write("epoch,log,kl,mse1,mse2,mse3,mse4,mae1,mae2,mae3,mae4\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        e, f, g, h, j, k, l, n, p, q, r, s, t, u = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        ep = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        d = mainloop.trainlog.monitor['mse1'][i]
        m = mainloop.trainlog.monitor['mae1'][i]

        if (y_dim > 1):
            e = mainloop.trainlog.monitor['mse2'][i]
            n = mainloop.trainlog.monitor['mae2'][i]
        if (y_dim > 2):
            f = mainloop.trainlog.monitor['mse3'][i]
            p = mainloop.trainlog.monitor['mae3'][i]
        if (y_dim > 3):
            g = mainloop.trainlog.monitor['mse4'][i]
            q = mainloop.trainlog.monitor['mae4'][i]

        fLog.write(
            "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n"
            .format(ep, a, b, d, e, f, g, m, n, p, q))

Esempio n. 37

Mostra file

File: basic.py Progetto: amoliu/hedgehog

    def __init__(
        self, model, dataset, train, percept_preprocessor, action_map,
        base_dir, model_pickle_path, save_rate=100,
        epsilon=1, epsilon_anneal_frames=1000000, epsilon_end=0.1,
        discount_factor=0.8, k=4,
    ):
        # Validate and store parameters
        assert(model)
        self.model = model

        assert(dataset)
        self.dataset = dataset

        assert(train)
        self.train = train

        assert(percept_preprocessor)
        self.percept_preprocessor = percept_preprocessor

        assert(action_map and type(action_map) == dict)
        self.action_map = action_map

        assert(os.path.exists(base_dir))
        self.base_dir = base_dir

        assert(os.path.exists(os.path.dirname(model_pickle_path)))
        self.model_pickle_path = model_pickle_path

        assert(save_rate > 0)
        self.save_rate = save_rate

        assert(discount_factor > 0)
        if (discount_factor >= 1):
            log.warning("Discount factor >= 1, learning may diverge.")
        self.discount_factor = discount_factor

        assert(epsilon >= 0 and epsilon <= 1)
        self.epsilon = epsilon

        assert(epsilon_anneal_frames >= 0)
        self.epsilon_anneal_frames = epsilon_anneal_frames

        assert(epsilon_end >= 0)
        self.epsilon_end = epsilon_end

        self.epsilon_annealing_rate = 0
        if self.epsilon_anneal_frames > 0:
            self.epsilon_annealing_rate = float(self.epsilon - self.epsilon_end)
            self.epsilon_annealing_rate /= float(self.epsilon_anneal_frames)
        log.info('Epsilon annealing rate: %0.10f' % self.epsilon_annealing_rate)

        assert(k > 0)
        self.k = k

        self.train.dataset = self

        # How many actual actions does RL-Glue/ALE support? Can we query the available actions
        # for a given game and make this part more efficient? Using 20 for now.
        self.action_log = {i: 0 for i in range(20)}

        # Init helper member variables
        self.action_count = 0
        self.reward = 0  # Accumulator for reward values

        # Init frame memory
        self.frame_memory = col.deque(maxlen=self.k)

        # Compile action function
        log.info('BASIC AGENT: Compiling action function...'),
        phi_eq = T.tensor4()
        q_eq = self.model.fprop(phi_eq)
        action_eq = T.argmax(q_eq, axis=1)
        self.action_func = function([phi_eq], action_eq)
        log.info('Done.')

        # Compile max q
        log.info('BASIC AGENT: Compiling y function...'),
        max_action_eq = T.max(q_eq, axis=1)
        self.max_q_func = function([phi_eq], max_action_eq)
        log.info('Done.')

        # Compile maximum action function
        log.info('BASIC AGENT: Compiling y function...'),
        r = T.fvector('r')
        gamma = T.fscalar('gamma')
        y = r + gamma*max_action_eq
        self.y_func = function([r, gamma, phi_eq], y)
        log.info('Done.')

Esempio n. 38

Mostra file

File: train_mlprnn.py Progetto: sgangireddy/nnlm

def train_mlprnn(weight_path=sys.argv[1],
                 file_name1=sys.argv[2],
                 L1_reg=0.0,
                 L2_reg=0.0000,
                 path_name='/exports/work/inf_hcrc_cstr_udialogue/siva/data/'):

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size

    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size)
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size)

    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x1 = T.fvector('x1')
    x2 = T.fvector('x2')
    x3 = T.fvector('x3')
    ht1 = T.fvector('ht1')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate')

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                               allow_downcast=True)

    rng = numpy.random.RandomState()

    classifier = MLP_RNN(rng=rng,
                         input1=x1,
                         input2=x2,
                         input3=x3,
                         initial_hidden=ht1,
                         n_in=vocab_size,
                         fea_dim=int(sys.argv[3]),
                         context_size=2,
                         n_hidden=int(sys.argv[4]),
                         n_out=vocab_size)

    hidden_state = theano.shared(
        numpy.empty((int(sys.argv[4]), ), dtype='float32'))

    cost = classifier.cost(y)

    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = 0.05, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)

    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x1: test_set_x1,
                                           x2: test_set_x2,
                                           x3: test_set_x3,
                                           ht1: hidden_state,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [log_likelihood], \
                                     givens = {x1: valid_set_x1,
                                               x2: valid_set_x2,
                                               x3: valid_set_x3,
                                               ht1: hidden_state,
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))

    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.RNNhiddenlayer.output], updates = updates, \
                                 givens = {x1: train_set_x1,
                                           x2: train_set_x2,
                                           x3: train_set_x3,
                                           ht1: hidden_state,
                                           y: train_set_y})
    f = h5py.File(weight_path + file_name1, "r")
    for i in xrange(0, classifier.no_of_layers, 2):
        path_modified = '/' + 'MLP' + str(2) + '/layer' + str(i / 2)
        if i == 4:
            classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified +
                                                              "/W"].value,
                                                            dtype='float32'),
                                              borrow=True)
        else:
            classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified +
                                                              "/W"].value,
                                                            dtype='float32'),
                                              borrow=True)
            classifier.MLPparams[i + 1].set_value(numpy.asarray(
                f[path_modified + "/b"].value, dtype='float32'),
                                                  borrow=True)
    f.close()

    print '.....training'
    best_valid_loss = numpy.inf
    start_time = time.time()
    while (learnrate_schedular.get_rate() != 0):

        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()

        for feats_lab_tuple in dataprovider_train:

            features, labels = feats_lab_tuple

            if labels is None or features is None:
                continue
            frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features3 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                temp_features3[temp[1]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_x3.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_y.set_value(numpy.asarray([labels[i]],
                                                    dtype='int32'),
                                      borrow=True)
                out = train_model(
                    numpy.array(learnrate_schedular.get_rate(),
                                dtype='float32'))
                hidden_state.set_value(numpy.asarray(out[1], dtype='float32'),
                                       borrow=True)

            progress += 1
            if progress % 10000 == 0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1), dtype='float32'))
            train_set_x2.set_value(numpy.empty((1), dtype='float32'))
            train_set_x3.set_value(numpy.empty((1), dtype='float32'))
            train_set_y.set_value(numpy.empty((1), dtype='int32'))

        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))

        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time()  # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:
            features, labels = feats_lab_tuple
            if labels is None or features is None:
                continue
            valid_frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features3 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                temp_features3[temp[1]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_x3.set_value(numpy.asarray(temp_features3,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_y.set_value(numpy.asarray([labels[i]],
                                                    dtype='int32'),
                                      borrow=True)
                out = validate_model()
                #error_rate = out[0]
                likelihoods = out[0]
                #valid_losses.append(error_rate)
                log_likelihood.append(likelihoods)
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress % 1000 == 0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)

        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        #this_validation_loss = numpy.mean(valid_losses)
        entropy = (-numpy.sum(log_likelihood) / valid_frames_showed)
        print entropy, numpy.sum(log_likelihood)

        if entropy < best_valid_loss:
            learning_rate = learnrate_schedular.get_next_rate(entropy)
            best_valid_loss = entropy
        else:
            learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time()  # it is also stop of training time
    dataprovider_test.reset()

    for feats_lab_tuple in dataprovider_test:

        features, labels = feats_lab_tuple

        if labels is None or features is None:
            continue

        test_frames_showed += features.shape[0]
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype='float32')
            temp_features2 = numpy.zeros(vocab_size, dtype='float32')
            temp_features3 = numpy.zeros(vocab_size, dtype='float32')
            temp_features1[temp[0]] = 1
            temp_features2[temp[1]] = 1
            temp_features3[temp[1]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1,
                                                dtype='float32'),
                                  borrow=True)
            test_set_x2.set_value(numpy.asarray(temp_features2,
                                                dtype='float32'),
                                  borrow=True)
            test_set_x3.set_value(numpy.asarray(temp_features3,
                                                dtype='float32'),
                                  borrow=True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'),
                                 borrow=True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress % 1000 == 0:
            end_time_test_progress = time.time()
            print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                           %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    print numpy.sum(log_likelihood)

Esempio n. 39

Mostra file

File: dont-touch.py Progetto: millatidy/hit400_lstm

    def __theano_build__(self):
        E, W, U, V = self.E, self.W, self.U, self.V

        x = T.fvector('x')
        y = T.fvector('y')

        # initial hidden vector
        initial_hidden_vector = np.zeros(self.hidden_dim)

        def calculate(x, h_t_prev, E, W, U, V):
            x_t = T.dot(E,x)

            P = U[0].dot(h_t_prev)

            z_t = T.nnet.sigmoid(T.dot(W[0], x_t) +  U[0].dot(h_t_prev))
            r_t = T.nnet.sigmoid(T.dot(W[1], x_t) + U[1].dot(h_t_prev))
            _h_t = T.tanh(T.dot(W[2], x_t) + U[2].dot(h_t_prev * r_t))
            h_t = (T.ones_like(z_t) - z_t) * h_t_prev + z_t * _h_t

            # softmax returns a matrix thith one row only
            # the row we want
            o_t = T.nnet.softmax(V.dot(h_t))[0][0]

            return [o_t, h_t_prev]

        [o, h] , updates = theano.scan(
            calculate,
            # outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))],
            outputs_info=[None, initial_hidden_vector],
            non_sequences = [E, W, U, V],
            sequences=x,
        )

        prediction = T.argmax(o, axis=0)
        prediction_error = T.sum(T.nnet.categorical_crossentropy(o, y))

        # Total cost (Regularization can be done here)
        cost = prediction_error

        # gradients
        dE = T.grad(cost, E)
        dW = T.grad(cost, W)
        dU = T.grad(cost, U)
        dV = T.grad(cost, V)

        # assign functions
        self.predict = theano.function([x], o)
        self.prediction_class = theano.function([x], prediction)
        self.c_error = theano.function([x,y], cost)
        self.bptt = theano.function([x, y], [dW, dU, dV])

        # SDG parameters
        learning_rate = T.scalar('learning_rate')
        decay = T.scalar('decay')

        # rmsprop cache updates
        mE = decay * self.mE + (1 - decay) * dE ** 2
        mU = decay * self.mU + (1 - decay) * dU ** 2
        mW = decay * self.mW + (1 - decay) * dW ** 2
        mV = decay * self.mV + (1 - decay) * dV ** 2

        self.sgd_step = theano.function(
                [x, y, learning_rate, theano.In(decay, value=0.9)],
                [],
                updates = [
                            (E, E - learning_rate * dE / T.sqrt(mE + 1e-6)),
                            (W, W - learning_rate * dW / T.sqrt(mW + 1e-6)),
                            (U, U - learning_rate * dU / T.sqrt(mU + 1e-6)),
                            (V, V - learning_rate * dV / T.sqrt(mV + 1e-6)),
                            # (self.mE, mE),
                            (self.mU, mU),
                            (self.mW, mW),
                            (self.mV, mV)
                ]
        )

Esempio n. 40

Mostra file

 #Pi model variables:
 if model.network_type=="pi":
     input_b_var = T.tensor3('inputs_b')
     mask_train=T.vector('mask_train')
     unsup_weight_var = T.scalar('unsup_weight')
 elif model.network_type=="tempens":
 #tempens model variables:
     z_target_var = T.matrix('z_targets')
     mask_train = T.vector('mask_train')
     unsup_weight_var = T.scalar('unsup_weight')
 
 learning_rate_var = T.scalar('learning_rate')
 adam_beta1_var = T.scalar('adam_beta1')
   
 #negative loss
 negative_loss_alpha=T.fvector("negative_loss_alpha")
 negative_loss_lamda=T.fscalar("negative_loss_lamda") 
 
 #Keywords-attention
 input_root=T.fmatrix("input_root")
 input_e1=T.fmatrix("input_e1")
 input_e2=T.fmatrix("input_e2")
 
 """
 2.
 Bulit GRU network
 ADAM
 """
 gru_network,l_in,l_mask,l_gru_forward,l_split_cnn=model.bulit_gru(input_var,mask_var,input_root,input_e1,input_e2)
 
 #mask_train_input: where "1" is pass. where "0" isn't pass.

Esempio n. 41

Mostra file

def policy_network(state):
    input_state = InputLayer(input_var=state, shape=(None, n_input))

    dense_1 = DenseLayer(input_state, num_units=n_input, nonlinearity=tanh)

    dense_2 = DenseLayer(dense_1, num_units=n_input, nonlinearity=tanh)

    probs = DenseLayer(dense_2, num_units=n_output, nonlinearity=softmax)

    return probs


X_state = T.fmatrix()
X_action = T.bvector()
X_reward = T.fvector()

X_action_hot = to_one_hot(X_action, n_output)

prob_values = policy_network(X_state)

policy_ = get_output(prob_values)
policy = theano.function(inputs=[X_state],
                         outputs=policy_,
                         allow_input_downcast=True)

loss = categorical_crossentropy(policy_, X_action_hot) * X_reward
loss = loss.mean()

params = get_all_params(prob_values)

Esempio n. 42

Mostra file

File: test_nnet.py Progetto: hauklau/Theano

    def test_cudnn_softmax_grad(self):
        if not cuda.dnn.dnn_available():
            raise SkipTest(cuda.dnn.dnn_available.msg)

        def cmp(n, m, f, f_gpu):
            data = numpy.arange(n * m, dtype='float32').reshape(n, m)
            gdata = numpy.asarray(data)[:, :, None, None]
            out = f(data)
            gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
            assert numpy.allclose(out, gout), numpy.absolute(out - gout)

        x = T.matrix('x', 'float32')
        x_gpu = T.tensor4('x_gpu', 'float32')
        f_z = T.nnet.softmax
        f_gpu = theano.sandbox.cuda.dnn.GpuDnnSoftmax('bc01', 'accurate',
                                                      'channel')

        # Verify the grad operation
        dims = (2, 3, 4, 5)
        gdata = numpy.arange(numpy.product(dims),
                             dtype='float32').reshape(dims)
        T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu)

        def check_types(graph, graph_gpu):
            self._check_types(graph, graph_gpu, -1, type(f_z),
                              theano.sandbox.cuda.dnn.GpuDnnSoftmax)

        def check_types_opt(graph, graph_gpu):
            assert isinstance(graph.maker.fgraph.toposort()[-1].op, type(f_z))
            assert len([
                n for n in graph_gpu.maker.fgraph.toposort()
                if isinstance(n.op, theano.sandbox.cuda.dnn.GpuDnnSoftmax)
            ]) == 1

        # Verify that the CPU and GPU implementations return the same results
        # up to a tolerance.
        self._test_softmax(x, x_gpu, f_z, f_gpu, cmp, mode_with_gpu,
                           check_types)

        mode_w_cudnn = mode_with_gpu.including("cudnn")
        self._test_softmax(x, x, f_z, f_z, self._cmp, mode_w_cudnn,
                           check_types_opt)

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is
        # applied when cudnn is required
        y = T.fvector('y')
        f = theano.function([y],
                            T.grad(T.nnet.softmax(y).mean(), y),
                            mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)
        ]) == 1)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 0)

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is not
        # applied when cudnn is excluded or not available
        mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
        y = T.fvector('y')
        f = theano.function([y],
                            T.grad(T.nnet.softmax(y).mean(), y),
                            mode=mode_wo_cudnn)
        sorted_f = f.maker.fgraph.toposort()
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)
        ]) == 0)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 1)

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
        # crash with manual graph
        y = T.fvector('y')
        o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
        f = theano.function([y], o, mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)
        ]) == 1)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 0)

Esempio n. 43

Mostra file

File: neural_architectures.py Progetto: shiquanyang/GraphLSTM_release

def MultitaskRelationStackMaker(Shared, Classifiers, params, num_tasks, graph=False, weighted=False, batched=False):
    if batched:
        emb_inputs = [T.itensor3('emb_input_'+str(i)) for i in range(num_tasks)]
        entities_tv = [[T.fmatrix('enidx_'+str(j)+'_t_'+str(i))
                    for j in range(params['num_entity_d'+str(i)])] 
                    for i in range(num_tasks)]
        if graph:
            if weighted:
                masks = [T.ftensor4('child_mask_d'+str(i)) for i in range(num_tasks)]
            else:
                masks = [T.ftensor3('child_mask_d'+str(i)) for i in range(num_tasks)]
        else:
            masks = [T.fmatrix('batch_mask_d'+str(i)) for i in range(num_tasks)]
    else:
        emb_inputs = [T.imatrix('emb_input_'+str(i)) for i in range(num_tasks)]
        entities_tv = [[T.fvector('enidx_'+str(j)+'_t_'+str(i))
                    for j in range(params['num_entity_d'+str(i)])] 
                    for i in range(num_tasks)]
        if graph:
            if weighted:
                masks = [T.ftensor3('child_mask_d'+str(i)) for i in range(num_tasks)]
            else:
                masks = [T.fmatrix('child_mask_d'+str(i)) for i in range(num_tasks)]
        else:
            masks = None
    current_chip = Start(params['voc_size'], None) 
    instantiated_chips = stackLayers(Shared, current_chip, params)
    print ('Building Classifiers for tasks, input dim:', current_chip.out_dim)
    pred_ys = []
    gold_ys = []
    costs_arr = []
    grads_arr = []
    regularizable_param_arr = []
    global_regularizable_params = []
    for i, clsfier in enumerate(Classifiers):
        #feature_size = len(params['features2idx_dicts'][i]) #params['feature_size_'+str(i)]
        current_chip = instantiated_chips[-1][0]
        decoder_chips = stackLayers(clsfier, current_chip, params, entity_size=params['num_entity_d'+str(i)])
        ## Note: this implementation only uses the LSTM hidden layer
        temp_chips = instantiated_chips + decoder_chips
        init_chip = Start(params['voc_size'], emb_inputs[i])
        if batched:
            regularizable_params = computeLayers(temp_chips, init_chip, params, entities_input=entities_tv[i], mask=masks[i])
        else:
            regularizable_params = computeLayers(temp_chips, init_chip, params, entities_input=entities_tv[i])
        global_regularizable_params.extend(regularizable_params)
        regularizable_param_arr.append(regularizable_params)
        #task_chips.append(temp_chips)
        current_chip = temp_chips[-1][0]
        if current_chip.output_tv.ndim == 2:
            pred_ys.append(current_chip.output_tv) #T.argmax(current_chip.output_tv, axis=1))
        else:
            pred_ys.append(current_chip.output_tv) #T.argmax(current_chip.output_tv, axis=0))
        gold_ys.append(current_chip.gold_y)
        assert hasattr(current_chip, 'score')
        cost = current_chip.score 
        costs_arr.append(cost) #/params['nsentences']
        grads_arr.append( T.grad(cost,
            wrt=regularizable_params) )
        # Show all parameters that would be needed in this system
        params_needed = ['voc_size', 'feature_size_'+str(i)]
        params_needed += calculate_params_needed(temp_chips)
    #cost = sum(costs_arr)
    #global_regularizable_params = list(set(global_regularizable_params))
    #grads = T.grad(cost,
    #        wrt=global_regularizable_params)
    print ('The joint model regularizable parameters:')
    for k, v in params.items():
        if hasattr(v, 'is_regularizable'):
            print (k, v, v.is_regularizable)
    #return (emb_inputs, entities_tv, gold_ys, pred_ys, costs_arr, cost, grads_arr, grads, regularizable_param_arr, global_regularizable_params)
    if batched or graph:
        return (emb_inputs, entities_tv, masks, gold_ys, pred_ys, costs_arr, grads_arr, regularizable_param_arr)
    else:
        return (emb_inputs, entities_tv, gold_ys, pred_ys, costs_arr, grads_arr, regularizable_param_arr)

Esempio n. 44

Mostra file

    def fit(self):

        #if self.batch_size is not None:

        index = T.lscalar('index')

        # create shared data-sets in case of mini-batch
        train_X = self.shared_dataset(self.X_dat)
        train_y = self.shared_dataset(self.y_dat)
        test_X = self.shared_dataset(self.X_test)

        if self.batch_size is not None:

            n_train_batches = train_X.get_value(
                borrow=True).shape[0] / self.batch_size
            n_test_batches = test_X.get_value(
                borrow=True).shape[0] / self.batch_size

        X = T.matrix()

        if self.linear_regression:

            Y = T.fvector()

        else:

            Y = T.matrix()

        if self.linear_regression:

            self.w = self.initialize_weights(
                (self.X_dat.shape[1]), self.X_dat.shape[1], 1,
                self.weights_initialization
            )  # initialize weights for the parameters ( linear regression )

            if self.add_bias:

                self.b = theano.shared(
                    np.asarray(0, dtype=theano.config.floatX)
                )  # initialize bias to zero ( linear regression -- a single value )

                py_x = T.dot(
                    X,
                    self.w) + self.b  # get predictions for linear regression

            else:

                py_x = T.dot(X, self.w)

        else:

            self.w = self.initialize_weights(
                (self.X_dat.shape[1], self.y_dat.shape[1]),
                self.X_dat.shape[1], self.y_dat.shape[1],
                self.weights_initialization
            )  # initialize weights for the parameters ( logistic regression )

            if self.add_bias:

                self.b = theano.shared(
                    np.zeros((self.y_dat.shape[1], ),
                             dtype=theano.config.floatX)
                )  # initialize bias to zeros ( logistic regression -- a numpy array )

                py_x = T.nnet.softmax(T.dot(X, self.w) +
                                      self.b)  # get probability predictions

            else:

                py_x = T.nnet.softmax(T.dot(X, self.w))

        cost = T.mean(
            self.objectives(py_x, Y, self.objective,
                            self.X_dat.shape[0]))  # objective function

        if self.L1 > 0.0 or self.L2 > 0.0:  # L1, L2 regularization [ when both used then 'elastic-net' ]

            if self.add_bias:

                reg_param_L1 = abs(T.sum(self.w) +
                                   T.sum(self.b))  # L1 regrularization

                reg_param_L2 = T.sum(T.sqr(self.w)) + T.sum(T.sqr(
                    self.b))  # L2 regularization

                cost = cost + self.L1 * reg_param_L1 + self.L2 * reg_param_L2

            else:

                reg_param_L1 = abs(T.sum(self.w))  # L1 regrularization

                reg_param_L2 = T.sum(T.sqr(self.w))  # L2 regularization

                cost = cost + self.L1 * reg_param_L1 + self.L2 * reg_param_L2

        if self.add_bias:

            Params = [self.w, self.b]

        else:

            Params = [self.w]

        if self.batch_size is None:

            train = theano.function(
                inputs=[index],
                outputs=cost,
                updates=Optimizers_update(cost, Params, self.learning_rate,
                                          self.optimizer).run_optimizer(),
                givens={
                    X: train_X[0:index],
                    Y: train_y[0:index]
                },
                allow_input_downcast=True
            )  # Compile [ call external class Optimizers_update ]

            predict_valid = theano.function(inputs=[index],
                                            outputs=py_x,
                                            givens={X: test_X[0:index]},
                                            allow_input_downcast=True)

        else:

            train = theano.function(
                inputs=[index],
                outputs=cost,
                updates=Optimizers_update(cost, Params, self.learning_rate,
                                          self.optimizer).run_optimizer(),
                givens={
                    X:
                    train_X[index * self.batch_size:(index + 1) *
                            self.batch_size],
                    Y:
                    train_y[index * self.batch_size:(index + 1) *
                            self.batch_size]
                },
                allow_input_downcast=True)

            predict_valid = theano.function(
                inputs=[index],
                outputs=py_x,
                givens={
                    X:
                    test_X[index * self.batch_size:(index + 1) *
                           self.batch_size]
                },
                allow_input_downcast=True
            )  # prediction function for validation set

        self.predict = theano.function(inputs=[X],
                                       outputs=py_x)  # predictions function

        early_stopping = []  # early stopping

        consecutive_increases_OR_decreases = 0

        for i in range(self.iters):

            if self.batch_size is None:

                cost_train = train(self.X_dat.shape[0])

                if self.custom_eval is None:

                    cost_valid = self.evaluate_early_stopping(
                        self.Y_test, self.predict(self.X_test),
                        self.linear_regression)

                else:

                    cost_valid = self.custom_eval[0](self.Y_test,
                                                     self.predict(self.X_test))

            else:

                for batch_index_train in range(n_train_batches):

                    cost_train = train(batch_index_train)

                if self.custom_eval is None:

                    cost_valid = np.mean([
                        self.evaluate_early_stopping(
                            self.Y_test[batch_index_test *
                                        self.batch_size:(batch_index_test +
                                                         1) * self.batch_size],
                            predict_valid(batch_index_test),
                            self.linear_regression)
                        for batch_index_test in range(n_test_batches)
                    ])

                else:

                    cost_valid = np.mean([
                        self.custom_eval[0](
                            self.Y_test[batch_index_test *
                                        self.batch_size:(batch_index_test +
                                                         1) * self.batch_size],
                            predict_valid(batch_index_test))
                        for batch_index_test in range(n_test_batches)
                    ])

            try:

                if self.custom_eval is None:

                    print 'iter', str(i + 1), '  train_loss ', str(
                        np.round(cost_train, 3)), '  test_loss ', str(
                            np.round(cost_valid, 3))

                else:

                    print 'iter', str(i + 1), '  train_loss ', str(
                        np.round(
                            cost_train,
                            3)), '  test_' + self.custom_eval[1], ' ', str(
                                np.round(cost_valid, 3))

            except:

                ValueError

            early_stopping.append(cost_valid)

            if not self.maximize:

                change_sign = len(early_stopping) >= 2 and early_stopping[
                    -1] > early_stopping[-2]
                increase = 'increases'

            else:

                change_sign = len(early_stopping) >= 2 and early_stopping[
                    -1] < early_stopping[-2]
                decrease = 'decreases'

            if change_sign:

                consecutive_increases_OR_decreases += 1
            else:
                consecutive_increases_OR_decreases = 0

            if (consecutive_increases_OR_decreases >=
                    self.early_stopping_rounds):

                if not self.maximize:

                    print 'regression stopped after ', str(
                        consecutive_increases_OR_decreases
                    ), ' consecutive ', increase, ' of loss and ', str(
                        i + 1), ' Epochs'

                    break

                else:
                    print 'regression stopped after ', str(
                        consecutive_increases_OR_decreases
                    ), ' consecutive ', decrease, ' of loss and ', str(
                        i + 1), ' Epochs'

                    break

            if np.isinf(cost_valid) or np.isnan(cost_valid):

                print 'Inf or nan values present after', str(i), 'Epochs'

                break

Esempio n. 45

Mostra file

File: evalJoint.py Progetto: pan1997/KBI

def model_eval(get_scores):
    entityPairs = T.fmatrix()
    entities = T.fmatrix()
    relations = T.fmatrix()
    testData_DM = T.imatrix()
    testData_MF = T.imatrix()
    entity_oov_embedding = T.fvector()
    entityPair_oov_embedding = T.fvector()
    normalize_eval = T.iscalar()
    normalize = T.iscalar()
    '''
        for a given (e1, ?): we can partition the filtered candidate e2s into:

        1) e2s such that (e1,e2) is trained -> allowedEP_MF
    '''
    allowedEP_MF = theano.typed_list.TypedListType(T.ivector)()
    set1_e2 = theano.typed_list.TypedListType(T.ivector)()
    set2_e2 = theano.typed_list.TypedListType(T.ivector)()
    set3_e2 = T.ivector()

    oov_flag_e1_DM = T.ivector()
    oov_flag_e2_DM = T.ivector()
    oov_flags_MF = T.ivector()

    nnet_W1 = T.fmatrix()
    nnet_W2 = T.fmatrix()
    nnet_W3 = T.fmatrix()

    nnet_b1 = T.fvector()
    nnet_b2 = T.fvector()
    nnet_b3 = T.fvector()
    aux_features = T.fmatrix()

    layers = [(nnet_W1, nnet_b1), (nnet_W2, nnet_b2), (nnet_W3, nnet_b3)]

    normalize_DM_W1 = T.fmatrix()
    normalize_DM_b1 = T.fvector()
    normalize_MF_W1 = T.fmatrix()
    normalize_MF_b1 = T.fvector()

    layers_normalize_DM = [(normalize_DM_W1, normalize_DM_b1)]
    layers_normalize_MF = [(normalize_MF_W1, normalize_MF_b1)]

    def MF_fn(testPoint_DM, testPoint_MF, i, oov_flag_e1, oov_flag_e2,
              oov_flag, entityPairs, entities, relations,
              entityPair_oov_embedding, entity_oov_embedding,
              allowed_entityPair, set1_e2, set2_e2, set3_e2, normalize_eval,
              normalize):
        # score of allowed e2s
        scores_MF = T.tanh(
            T.dot(entityPairs[allowed_entityPair[i]],
                  relations[testPoint_MF[0]]))
        #scores_MF = T.dot(entityPairs[allowed_entityPair[i]], relations[testPoint_MF[0]])
        # score for oov (e1,e2)s
        score_oov_MF = T.tanh(
            T.dot(entityPair_oov_embedding, relations[testPoint_MF[0]]))
        #score_oov_MF = T.dot(entityPair_oov_embedding,relations[testPoint_MF[0]])
        score_nonOOV_MF = T.tanh(
            T.dot(entityPairs[testPoint_MF[1]], relations[testPoint_MF[0]]))
        #score_nonOOV_MF = T.dot(entityPairs[testPoint_MF[1]], relations[testPoint_MF[0]])

        # based on whether (e1,e2) is OOV pick the score for the current testPoint
        score_testPoint_MF = T.switch(oov_flag, score_oov_MF, score_nonOOV_MF)

        e1_fact_embedding = T.switch(oov_flag_e1, entity_oov_embedding,
                                     entities[testPoint_DM[0]])
        e2_fact_embedding = T.switch(oov_flag_e2, entity_oov_embedding,
                                     entities[testPoint_DM[2]])

        # score of allowed e2s -> (e1,e2) seen -> e2 seen
        scores_DM = T.tanh(
            T.dot(e1_fact_embedding * entities[set1_e2[i]],
                  relations[testPoint_DM[1]]))
        #scores_DM   = T.dot(e1_fact_embedding*entities[set1_e2[i]], relations[testPoint_DM[1]])
        # score for the test point
        score_testPoint_DM = T.tanh(
            T.dot(relations[testPoint_DM[1]],
                  e1_fact_embedding * e2_fact_embedding))
        #score_testPoint_DM  = T.dot(relations[testPoint_DM[1]], e1_fact_embedding*e2_fact_embedding)
        score_oov_DM = T.tanh(
            T.dot(relations[testPoint_DM[1]],
                  e1_fact_embedding * entity_oov_embedding))
        #score_oov_DM = T.dot(relations[testPoint_DM[1]], e1_fact_embedding*entity_oov_embedding)

        # score for e2s such that (e1,e2) non seen but e2 non OOV.
        scores_DM_set2 = T.tanh(
            T.dot(e1_fact_embedding * entities[set2_e2[i]],
                  relations[testPoint_DM[1]]))
        #scores_DM_set2 = T.dot(e1_fact_embedding*entities[set2_e2[i]], relations[testPoint_DM[1]])

        #Normalize scores using pretrained weights
        scores_MF = T.switch(
            normalize, get_normalized_scores(layers_normalize_MF, scores_MF),
            scores_MF)
        score_testPoint_MF = T.switch(
            normalize,
            get_normalized_scores(layers_normalize_MF, score_testPoint_MF),
            score_testPoint_MF)
        score_oov_MF = T.switch(
            normalize, get_normalized_scores(layers_normalize_MF,
                                             score_oov_MF), score_oov_MF)
        scores_DM = T.switch(
            normalize, get_normalized_scores(layers_normalize_DM, scores_DM),
            scores_DM)
        score_testPoint_DM = T.switch(
            normalize,
            get_normalized_scores(layers_normalize_DM, score_testPoint_DM),
            score_testPoint_DM)
        score_oov_DM = T.switch(
            normalize, get_normalized_scores(layers_normalize_DM,
                                             score_oov_DM), score_oov_DM)
        scores_DM_set2 = T.switch(
            normalize,
            get_normalized_scores(layers_normalize_DM, scores_DM_set2),
            scores_DM_set2)

        #DM and MF score normalization

        mean_DM, std_DM = get_data_stats(
            T.concatenate([scores_DM, scores_DM_set2,
                           T.stack([score_oov_DM])]))
        scores_DM = T.switch(normalize_eval,
                             normalize_data(scores_DM, mean_DM, std_DM),
                             scores_DM)
        mean_MF, std_MF = get_data_stats(
            T.concatenate([scores_MF, T.stack([score_oov_MF])]))
        scores_MF = T.switch(normalize_eval,
                             normalize_data(scores_MF, mean_MF, std_MF),
                             scores_MF)
        score_oov_DM = T.switch(normalize_eval,
                                normalize_data(score_oov_DM, mean_DM, std_DM),
                                score_oov_DM)
        score_oov_MF = T.switch(normalize_eval,
                                normalize_data(score_oov_MF, mean_MF, std_MF),
                                score_oov_MF)
        score_testPoint_MF = T.switch(normalize_eval,
                                      (score_testPoint_MF - mean_MF) / std_MF,
                                      score_testPoint_MF)
        score_testPoint_DM = T.switch(normalize_eval,
                                      (score_testPoint_DM - mean_DM) / std_DM,
                                      score_testPoint_DM)
        #

        score_testPoint, scores_set1, scores_set2, scores_set3, f1 = get_scores(
            layers, aux_features[i], [scores_MF, scores_DM],
            [T.stack(score_oov_MF), scores_DM_set2],
            [score_oov_MF, score_oov_DM],
            [score_testPoint_MF, score_testPoint_DM])

        rank = 1 + T.sum(scores_set1 > score_testPoint) + T.sum(
            scores_set2 > score_testPoint)
        oov_comparison = score_testPoint < scores_set3
        rank = T.switch(oov_comparison, rank + set3_e2[i], rank)
        rank = T.switch(oov_flag_e2, rank + (set3_e2[i] / 2.0), rank)

        same = T.sum(T.eq(scores_set1, score_testPoint)) + T.sum(
            T.eq(scores_set2, score_testPoint))

        rank += same / 2.0

        same = same / (scores_set1.shape[0] + scores_set2.shape[0] * 1.0)
        '''
        dataStats = T.concatenate([get_data_stats(T.concatenate([scores_set1,scores_set2])),
        get_data_stats(scores_MF),
        get_data_stats(T.concatenate([scores_DM,scores_DM_set2]))])
        oov_scores = T.stack([score_oov_MF, score_oov_DM])
            
        return rank, f1, score_testPoint_DM, score_testPoint_MF, dataStats, oov_scores
        '''
        return rank, f1, score_testPoint_DM, score_testPoint_MF, same * 100.0

    ranks, ignore = theano.scan(MF_fn,
                                non_sequences=[
                                    entityPairs, entities, relations,
                                    entityPair_oov_embedding,
                                    entity_oov_embedding, allowedEP_MF,
                                    set1_e2, set2_e2, set3_e2, normalize_eval,
                                    normalize
                                ],
                                sequences=[
                                    testData_DM, testData_MF,
                                    theano.tensor.arange(testData_DM.shape[0]),
                                    oov_flag_e1_DM, oov_flag_e2_DM,
                                    oov_flags_MF
                                ])
    f = theano.function([
        normalize_eval, normalize, entityPairs, entities, relations,
        entityPair_oov_embedding, entity_oov_embedding, testData_DM,
        testData_MF, allowedEP_MF, set1_e2, set2_e2, oov_flag_e1_DM,
        oov_flag_e2_DM, oov_flags_MF, set3_e2, aux_features, nnet_W1, nnet_b1,
        nnet_W2, nnet_b2, nnet_W3, nnet_b3, normalize_DM_W1, normalize_DM_b1,
        normalize_MF_W1, normalize_MF_b1
    ],
                        ranks,
                        allow_input_downcast=True)

    return f

Esempio n. 46

Mostra file

def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps
    typeLoad = int(args['typeLoad'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 350
    p_z_dim = 400
    p_x_dim = 450
    x2s_dim = 400
    y2s_dim = 200
    z2s_dim = 350
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_ukdale(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1,
        typeLoad=typeLoad,
        trainPer=0.5,
        valPer=0.25,
        testPer=0.25)

    instancesPlot = {0: [5]}
    #instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0])

    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1', 'y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1', 's_tm1'],
                                  parent_dim=[x2s_dim, rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu4 = FullyConnectedLayer(name='theta_mu4',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu5 = FullyConnectedLayer(name='theta_mu5',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig4 = FullyConnectedLayer(name='theta_sig4',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig5 = FullyConnectedLayer(name='theta_sig5',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff2 = FullyConnectedLayer(name='coeff2',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff3 = FullyConnectedLayer(name='coeff3',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff4 = FullyConnectedLayer(name='coeff4',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff5 = FullyConnectedLayer(name='coeff5',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1,
        theta_mu2,
        theta_sig2,
        coeff2,
        theta_mu3,
        theta_sig3,
        coeff3,
        theta_mu4,
        theta_sig4,
        coeff4,
        theta_mu5,
        theta_sig5,
        coeff5
    ]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_test(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            prior_mu_t, prior_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
        theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
        coeff2_t = coeff2.fprop([theta_1_t], params)
        y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
        y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1)

        theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
        theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
        coeff3_t = coeff3.fprop([theta_1_t], params)
        y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
        y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1)

        theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
        theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
        coeff4_t = coeff4.fprop([theta_1_t], params)
        y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
        y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1)

        theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
        theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
        coeff5_t = coeff5.fprop([theta_1_t], params)
        y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
        y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1)

        pred_1_t = y_1.fprop([y_pred1], params)
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1, theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2, theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3, theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5
        #corr_temp, binary_temp

    ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, theta_mu1_temp_val,
      theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val,
      theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val,
      y_pred2_temp_val, theta_mu3_temp_val, theta_sig3_temp_val,
      coeff3_temp_val, y_pred3_temp_val, theta_mu4_temp_val,
      theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val,
      theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val,
      y_pred5_temp_val),
     updates_val) = theano.scan(fn=inner_fn_test,
                                sequences=[x_1_temp],
                                outputs_info=[
                                    s_0, None, None, None, None, None, None,
                                    None, None, None, None, None, None, None,
                                    None, None, None, None, None, None, None,
                                    None, None
                                ])

    for k, v in updates_val.iteritems():
        k.default_update = v

    def inner_fn(x_t, y_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)

        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
        theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
        coeff2_t = coeff2.fprop([theta_1_t], params)
        y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)

        theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
        theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
        coeff3_t = coeff3.fprop([theta_1_t], params)
        y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)

        theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
        theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
        coeff4_t = coeff4.fprop([theta_1_t], params)
        y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)

        theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
        theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
        coeff5_t = coeff5.fprop([theta_1_t], params)
        y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)

        s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1, theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2, theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3, theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5
        #corr_temp, binary_temp

    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp,
      theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp,
      theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp,
      theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp,
      theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred5_temp),
     updates) = theano.scan(fn=inner_fn,
                            sequences=[x_1_temp, y_1_temp],
                            outputs_info=[
                                s_0, None, None, None, None, None, None, None,
                                None, None, None, None, None, None, None, None,
                                None, None, None, None, None, None, None, None,
                                None
                            ])

    for k, v in updates.iteritems():
        k.default_update = v

    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'
    y_pred1_temp.name = 'disaggregation1'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1], 1)))**2)
    mae1 = T.mean(
        T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape

    theta_mu2_temp.name = 'theta_mu2'
    theta_sig2_temp.name = 'theta_sig2'
    coeff2_temp.name = 'coeff2'
    y_pred2_temp.name = 'disaggregation2'
    mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae2 = T.mean(
        T.abs_(y_pred2_temp - y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))
    mse2.name = 'mse2'
    mae2.name = 'mae2'

    theta_mu3_temp.name = 'theta_mu3'
    theta_sig3_temp.name = 'theta_sig3'
    coeff3_temp.name = 'coeff3'
    y_pred3_temp.name = 'disaggregation3'
    mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae3 = T.mean(
        T.abs_(y_pred3_temp - y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))
    mse3.name = 'mse3'
    mae3.name = 'mae3'

    theta_mu4_temp.name = 'theta_mu4'
    theta_sig4_temp.name = 'theta_sig4'
    coeff4_temp.name = 'coeff4'
    y_pred4_temp.name = 'disaggregation4'
    mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae4 = T.mean(
        T.abs_(y_pred4_temp - y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))
    mse4.name = 'mse4'
    mae4.name = 'mae4'

    theta_mu5_temp.name = 'theta_mu5'
    theta_sig5_temp.name = 'theta_sig5'
    coeff5_temp.name = 'coeff5'
    y_pred5_temp.name = 'disaggregation5'
    mse5 = T.mean((y_pred5_temp - y[:, :, 4].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae5 = T.mean(
        T.abs_(y_pred5_temp - y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))
    mse5.name = 'mse5'
    mae5.name = 'mae5'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu5_in = theta_mu5_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig5_in = theta_sig5_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff5_in = coeff5_temp.reshape((x_shape[0] * x_shape[1], -1))

    x_shape = x.shape
    y_shape = y.shape
    #x_in = x.reshape((x_shape[0]*x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in,
                          theta_mu2_in, theta_sig2_in, coeff2_in, theta_mu3_in,
                          theta_sig3_in, coeff3_in, theta_mu4_in,
                          theta_sig4_in, coeff4_in, theta_mu5_in,
                          theta_sig5_in, coeff5_in)
    #recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, theta_mu2_in, theta_sig2_in, coeff2_in,theta_mu3_in, theta_sig3_in, coeff3_in,theta_mu4_in, theta_sig4_in, coeff4_in,theta_mu5_in, theta_sig5_in, coeff5_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    ######################## TEST (GENERATION) TIME

    #s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp_val.name = 'theta_mu1_val'
    theta_sig1_temp_val.name = 'theta_sig1_val'
    coeff1_temp_val.name = 'coeff1_val'
    y_pred1_temp_val.name = 'disaggregation1_val'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1_val = T.mean(
        T.abs_(y_pred1_temp_val -
               y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))

    #NEURALNILM #(sum_output - sum_target) / max(sum_output, sum_target))
    totPred = T.sum(y_pred1_temp_val)
    totReal = T.sum(y[:, :, 0])
    relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned1_val = 1 - T.sum(
        T.abs_(y_pred1_temp_val - y[:, :, 0].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    #y_unNormalize = (y[:,:,0] * reader.stdTraining[0]) + reader.meanTraining[0]
    #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTraining[0]) + reader.meanTraining[0]

    #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))))
    mse1_val.name = 'mse1_val'
    mae1_val.name = 'mae1_val'

    theta_mu1_in_val = theta_mu1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig1_in_val = theta_sig1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu2_temp_val.name = 'theta_mu2_val'
    theta_sig2_temp_val.name = 'theta_sig2_val'
    coeff2_temp_val.name = 'coeff2_val'
    y_pred2_temp_val.name = 'disaggregation2_val'
    mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae2_val = T.mean(
        T.abs_(y_pred2_temp_val -
               y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred2_temp_val)
    totReal = T.sum(y[:, :, 1])
    relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned2_val = 1 - T.sum(
        T.abs_(y_pred2_temp_val - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse2_val.name = 'mse2_val'
    mae2_val.name = 'mae2_val'

    theta_mu2_in_val = theta_mu2_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig2_in_val = theta_sig2_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu3_temp_val.name = 'theta_mu3_val'
    theta_sig3_temp_val.name = 'theta_sig3_val'
    coeff3_temp_val.name = 'coeff3_val'
    y_pred3_temp_val.name = 'disaggregation3_val'
    mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae3_val = T.mean(
        T.abs_(y_pred3_temp_val -
               y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred3_temp_val)
    totReal = T.sum(y[:, :, 2])
    relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned3_val = 1 - T.sum(
        T.abs_(y_pred3_temp_val - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse3_val.name = 'mse3_val'
    mae3_val.name = 'mae3_val'

    theta_mu3_in_val = theta_mu3_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig3_in_val = theta_sig3_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu4_temp_val.name = 'theta_mu4_val'
    theta_sig4_temp_val.name = 'theta_sig4_val'
    coeff4_temp_val.name = 'coeff4_val'
    y_pred4_temp_val.name = 'disaggregation4_val'
    mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae4_val = T.mean(
        T.abs_(y_pred4_temp_val -
               y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred4_temp_val)
    totReal = T.sum(y[:, :, 3])
    relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned4_val = 1 - T.sum(
        T.abs_(y_pred4_temp_val - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse4_val.name = 'mse4_val'
    mae4_val.name = 'mae4_val'

    theta_mu4_in_val = theta_mu4_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig4_in_val = theta_sig4_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu5_temp_val.name = 'theta_mu5_val'
    theta_sig5_temp_val.name = 'theta_sig5_val'
    coeff5_temp_val.name = 'coeff5_val'
    y_pred5_temp_val.name = 'disaggregation5_val'
    mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae5_val = T.mean(
        T.abs_(y_pred5_temp_val -
               y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred5_temp_val)
    totReal = T.sum(y[:, :, 4])
    relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned5_val = 1 - T.sum(
        T.abs_(y_pred5_temp_val - y[:, :, 4].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse5_val.name = 'mse5_val'
    mae5_val.name = 'mae5_val'

    theta_mu5_in_val = theta_mu5_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig5_in_val = theta_sig5_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    prediction_val = T.concatenate([
        y_pred1_temp_val, y_pred2_temp_val, y_pred3_temp_val, y_pred4_temp_val,
        y_pred5_temp_val
    ],
                                   axis=2)

    recon_val = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val,
        theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val, theta_mu3_in_val,
        theta_sig3_in_val, coeff3_in_val, theta_mu4_in_val, theta_sig4_in_val,
        coeff4_in_val, theta_mu5_in_val, theta_sig5_in_val, coeff5_in_val)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out'
    totaMSE_val = (mse1_val + mse2_val + mse3_val + mse4_val +
                   mse5_val) / y_dim
    totaMAE_val = (mae1_val + mae2_val + mae3_val + mae4_val +
                   mae5_val) / y_dim
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term'

    ######################

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, mse1, mae1, mse2, mae2,
                mse3, mae3, mse4, mae4, mse5, mae5, y_pred1_temp, y_pred2_temp,
                y_pred3_temp, y_pred4_temp, y_pred5_temp
            ],
            indexSep=13,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        n_steps=n_steps,
                        extension=extension,
                        lr_iterations=lr_iterations,
                        k_speedOfconvergence=30)
    mainloop.run()

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[
            prediction_val, recon_term_val, totaMSE_val, totaMAE_val, mse1_val,
            mse2_val, mse3_val, mse4_val, mse5_val, mae1_val, mae2_val,
            mae3_val, mae4_val, mae5_val, relErr1_val, relErr2_val,
            relErr3_val, relErr4_val, relErr5_val, propAssigned1_val,
            propAssigned2_val, propAssigned3_val, propAssigned4_val,
            propAssigned5_val
        ]  #prediction_val, mse_val, mae_val
        ,
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )
    testOutput = []
    testMetrics2 = []
    numBatchTest = 0
    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])
        testOutput.append(outputGeneration[1:14])
        testMetrics2.append(outputGeneration[14:])
        #{0:[4,20], 2:[5,10]}
        #if (numBatchTest==0):

        plt.figure(1)
        plt.plot(np.transpose(outputGeneration[0],
                              [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(2)
        plt.plot(np.transpose(batch[2], [1, 0, 2])[4])
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(3)
        plt.plot(np.transpose(batch[0], [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
        plt.clf()
        numBatchTest += 1

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    mse1_test = testOutput[:, 3].mean()
    mae1_test = testOutput[:, 8].mean()
    mse2_test = testOutput[:, 4].mean()
    mae2_test = testOutput[:, 9].mean()
    mse3_test = testOutput[:, 5].mean()
    mae3_test = testOutput[:, 10].mean()
    mse4_test = testOutput[:, 6].mean()
    mae4_test = testOutput[:, 11].mean()
    mse5_test = testOutput[:, 7].mean()
    mae5_test = testOutput[:, 12].mean()

    relErr1_test = testMetrics2[:, 0].mean()
    relErr2_test = testMetrics2[:, 1].mean()
    relErr3_test = testMetrics2[:, 2].mean()
    relErr4_test = testMetrics2[:, 3].mean()
    relErr5_test = testMetrics2[:, 4].mean()

    propAssigned1_test = testMetrics2[:, 5].mean()
    propAssigned2_test = testMetrics2[:, 6].mean()
    propAssigned3_test = testMetrics2[:, 7].mean()
    propAssigned4_test = testMetrics2[:, 8].mean()
    propAssigned5_test = testMetrics2[:, 9].mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write(
        "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test,mseTest,maeTest\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n\n".format(
        recon_test, mse1_test, mse2_test, mse3_test, mse4_test, mse5_test,
        mae1_test, mae2_test, mae3_test, mae4_test, mae5_test, mse_test,
        mae_test))
    fLog.write(
        "relErr1,relErr2,relErr3,relErr4,relErr5,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{}\n".format(
        relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test,
        propAssigned1_test, propAssigned2_test, propAssigned3_test,
        propAssigned4_test, propAssigned5_test))

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    fLog.write(
        "epoch,log,kl,mse1,mse2,mse3,mse4,mse5,mae1,mae2,mae3,mae4,mae5\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        d, e, f, g, j, k, l, m = 0, 0, 0, 0, 0, 0, 0, 0
        ep = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['mse1'][i]
        h = mainloop.trainlog.monitor['mae1'][i]

        d = mainloop.trainlog.monitor['mse2'][i]
        j = mainloop.trainlog.monitor['mae2'][i]
        e = mainloop.trainlog.monitor['mse3'][i]
        k = mainloop.trainlog.monitor['mae3'][i]
        f = mainloop.trainlog.monitor['mse4'][i]
        l = mainloop.trainlog.monitor['mae4'][i]
        g = mainloop.trainlog.monitor['mse5'][i]
        m = mainloop.trainlog.monitor['mae5'][i]
        fLog.write(
            "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n"
            .format(ep, a, b, c, d, e, f, g, h, j, k, l, m))

    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()

Esempio n. 47

Mostra file

    def __init__(
            self,
            glimpse_shape,
            glimpse_times,
            dim_hidden,
            dim_fc,
            dim_out,
            reward_base,
            rng_std=1.0,
            activation=T.tanh,
            bptt_truncate=-1,
            lmbd=0.1  # gdupdate + lmbd*rlupdate
    ):
        if reward_base == None:
            reward_base = np.zeros((glimpse_times)).astype('float32')
            reward_base[-1] = 1.0
        x = T.ftensor3('x')  # N * W * H
        y = T.ivector('y')  # label
        lr = T.fscalar('lr')
        reward_base = theano.shared(name='reward_base',
                                    value=np.array(reward_base).astype(
                                        theano.config.floatX),
                                    borrow=True)  # Time (vector)
        reward_bias = T.fvector('reward_bias')
        rng = MRG_RandomStreams(np.random.randint(9999999))
        #       rng = theano.tensor.shared_randomstreams.RandomStreams(np.random.randint(9999999))

        i = InputLayer(x)
        au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng,
                           rng_std, activation, bptt_truncate)
        #       All hidden states are put into decoder
        #       layers = [i, au, InputLayer(au.output[:,:,:].flatten(2))]
        #       dim_fc = [glimpse_times*dim_hidden] + dim_fc + [dim_out]
        #       Only the last hidden states
        layers = [i, au, InputLayer(au.output[:, -1, :])]
        dim_fc = [dim_hidden] + dim_fc + [dim_out]
        for Idim, Odim in zip(dim_fc[:-1], dim_fc[1:]):
            fc = FullConnectLayer(layers[-1].output, Idim, Odim, activation,
                                  'FC')
            layers.append(fc)
        sm = SoftmaxLayer(layers[-1].output)
        layers.append(sm)

        output = sm.output  # N * classes
        hidoutput = au.output  # N * dim_output
        location = au.location  # N * T * dim_hidden
        prediction = output.argmax(1)  # N

        # calc
        equalvec = T.eq(prediction, y)  # [0, 1, 0, 0, 1 ...]
        correct = T.cast(T.sum(equalvec), 'float32')
        #       noequalvec = T.neq(prediction, y)
        #       nocorrect = T.cast(T.sum(noequalvec), 'float32')
        logLoss = T.log(output)[T.arange(y.shape[0]), y]  #
        reward_biased = T.outer(equalvec,
                                reward_base) - reward_bias.dimshuffle('x', 0)
        # N * Time
        # (R_t - b_t), where b = E[R]

        # gradient descent
        gdobjective = logLoss.sum() / x.shape[
            0]  # correct * dim_output (only has value on the correctly predicted sample)
        gdparams = reduce(lambda x, y: x + y.params, layers, [])
        gdupdates = map(lambda x: (x, x + lr * T.grad(gdobjective, x)),
                        gdparams)

        # reinforce learning
        rlobjective = (reward_biased.dimshuffle(0, 1, 'x') *
                       T.log(au.location_p)).sum() / x.shape[0]
        # location_p: N * Time * 2
        # location_logp: N * Time
        # reward_biased: N * 2
        rlparams = au.reinforceParams
        rlupdates = map(lambda x: (x, x + lr * lmbd * T.grad(rlobjective, x)),
                        rlparams)

        # Hidden state keeps unchange in time
        deltas = T.stack(*[((au.output[:, i, :].mean(0) -
                             au.output[:, i + 1, :].mean(0))**2).sum()
                           for i in xrange(glimpse_times - 1)])
        # N * Time * dim_hidden

        print 'compile step()'
        self.step = theano.function([x, y, lr, reward_bias], [
            gdobjective, rlobjective, correct,
            T.outer(equalvec, reward_base)
        ],
                                    updates=gdupdates + rlupdates)
        #       print 'compile gdstep()'
        #       self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates)
        #       print 'compile rlstep()'
        #       self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates)
        print 'compile predict()'
        self.predict = theano.function([x], prediction)
        #       print 'compile forward()'
        #       self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output])
        #       print 'compile error()'
        #       self.error = theano.function([x, y], gdobjective)
        print 'compile locate()'
        self.locate = theano.function(
            [x],
            [au.location_mean, location])  #[layers[-3].output, fc.output])
        print 'compile debug()'
        self.debug = theano.function([x, y, lr, reward_bias],
                                     [deltas, au.location_p],
                                     on_unused_input='warn')

        # self.xxx
        self.glimpse_times = glimpse_times

Esempio n. 48

Mostra file

    def __init__(self,
                 num_actions,
                 id_num,
                 shared_arr=None,
                 num_moves=None,
                 args=None):
        print "USING OPTION CRITIC"
        self.args = args
        self.id_num = id_num
        self.num_actions = num_actions
        self.num_moves = num_moves
        self.reset_storing()
        self.rng = np.random.RandomState(100 + id_num)
        # input is 8x8
        model_network = [{
            "model_type": "conv",
            "filter_size": [4, 4],
            "pool": [1, 1],
            "stride": [2, 2],
            "out_size": 32,
            "activation": "relu"
        }, {
            "model_type": "conv",
            "filter_size": [3, 3],
            "pool": [1, 1],
            "stride": [2, 2],
            "out_size": 64,
            "activation": "relu"
        }, {
            "model_type": "mlp",
            "out_size": 48,
            "activation": "relu"
        }, {
            "model_type": "mlp",
            "out_size": 32,
            "activation": "relu"
        }]
        out = [None, model_network[-1]["out_size"]]
        self.conv = Model(model_network,
                          input_size=[
                              None, args.concat_frames *
                              (1 if args.grayscale else 3), 8, 8
                          ])
        self.termination_model = Model([{
            "model_type": "mlp",
            "out_size": args.num_options,
            "activation": "sigmoid",
            "W": 0
        }],
                                       input_size=out)
        self.Q_val_model = Model([{
            "model_type": "mlp",
            "out_size": args.num_options,
            "activation": "linear",
            "W": 0
        }],
                                 input_size=out)
        self.options_model = MLP3D(input_size=out[1],
                                   num_options=args.num_options,
                                   out_size=num_actions,
                                   activation="softmax")
        self.params = self.conv.params + self.Q_val_model.params + self.options_model.params + self.termination_model.params
        self.set_rms_shared_weights(shared_arr)

        x = T.ftensor4()
        y = T.fvector()
        a = T.ivector()
        o = T.ivector()
        delib = T.fscalar()

        s = self.conv.apply(x / np.float32(255))
        intra_option_policy = self.options_model.apply(s, o)

        q_vals = self.Q_val_model.apply(s)
        disc_q = theano.gradient.disconnected_grad(q_vals)
        current_option_q = q_vals[T.arange(o.shape[0]), o]
        disc_opt_q = disc_q[T.arange(o.shape[0]), o]
        terms = self.termination_model.apply(s)
        o_term = terms[T.arange(o.shape[0]), o]
        V = T.max(q_vals, axis=1) * (1 - self.args.option_epsilon) + (
            self.args.option_epsilon * T.mean(q_vals, axis=1))
        disc_V = theano.gradient.disconnected_grad(V)

        aggr = T.mean  # T.sum
        log_eps = 0.0001

        critic_cost = aggr(args.critic_coef * 0.5 *
                           T.sqr(y - current_option_q))
        termination_grad = aggr(o_term * ((disc_opt_q - disc_V) + delib))
        entropy = -aggr(
            T.sum(intra_option_policy * T.log(intra_option_policy + log_eps),
                  axis=1)) * args.entropy_reg
        pg = aggr(
            (T.log(intra_option_policy[T.arange(a.shape[0]), a] + log_eps)) *
            (y - disc_opt_q))
        cost = pg + entropy - critic_cost - termination_grad

        grads = T.grad(cost * args.update_freq, self.params)
        # grads = T.grad(cost, self.params)
        updates, grad_rms, self.rms_weights = rmsprop(self.params,
                                                      grads,
                                                      clip=args.clip,
                                                      clip_type=args.clip_type)
        self.share_rms(shared_arr)

        self.get_state = theano.function([x], s, on_unused_input='warn')
        self.get_policy = theano.function([s, o], intra_option_policy)
        self.get_termination = theano.function([x], terms)
        self.get_q = theano.function([x], q_vals)
        self.get_q_from_s = theano.function([s], q_vals)
        self.get_V = theano.function([x], V)

        self.rms_grads = theano.function([x, a, y, o, delib],
                                         grad_rms,
                                         updates=updates,
                                         on_unused_input='warn')
        print "ALL COMPILED"

        if not self.args.testing:
            self.init_tracker()
        self.initialized = False

Esempio n. 49

Mostra file

File: rnnLearning.py Progetto: timchoh585/cs412finalproject

    def fit(self,
            learning_rate=1e-6,
            momentum=1e-8,
            batch=200,
            activation=T.tanh,
            depth=7):
        self.f = activation

        #define set of input and corresponding output for supervise learning
        X = [[]]
        Y = [[]]

        #theano input-output vectors
        thX = T.fvector('X')
        thY = T.fvector('Y')
        thK = T.iscalar('depth')

        #reccurent call of evaluation, return next pair of input\reccurent hidden values
        def recurrence(x_t, h_t1):
            #update reccurent hidden values
            #h_t = f(Wx*x + Wh*h_t1 + b)
            h_t = self.f(x_t.dot(self.Wx) + h_t1.dot(self.Wh) + self.bh)
            #calculate current output, note in our model it is the next time step disctribution
            #y_t = f(Wo*h_t + b)
            y_t = self.f(h_t.dot(self.Wo) + self.bo)
            return h_t, y_t

        #define theano scan function for call
        [h, y], _ = th.scan(
            fn=recurrence,
            outputs_info=[self.h0, None],
            sequences=thX,
            n_steps=thK,
        )

        #define prediction, should be normalyze function
        #temporal approache -- softmax
        prediction = T.softmax(Y)

        #define learning model
        #for the cost is usuall log loss function
        cost = -T.mean(T.log(Y[T.arange(thY.shape[0]), thY]))
        #for grad use theano grad function
        grads = T.gtrad(cost, self.params)
        #calculate the change of params for momentum
        #init to all zero
        dparams = [theano.shared(p.get_value() * 0) for p in self.params]

        #define the update using gradient decent algorithm with momentum
        #i.e. w <- w + momentum * dw - n * grad_w(E)
        #     dw<- momentum * dw - n * grad_w(E)
        updates = [(p, p + mu * dp - learning_rate * g)
                   for p, dp, g in zip(self.params, dparams, grads)
                   ] + [(dp, mu * dp - learning_rate * g)
                        for dp, g in zip(dparams, grads)]

        #define complete training model for theano
        self.predict_op = th.function(inputs=[thX, thK], outputs=prediction)
        self.train_op = th.function(inputs=[thX, thY],
                                    outputs=[cost, prediction, y],
                                    updates=updates)

Esempio n. 50

Mostra file

File: model_cnn.py Progetto: zhjpqq/denet

    def build_train_func(self,
                         solver_mode="sgd",
                         cost_factors=[],
                         use_acc_mode=False,
                         skip_build=False):

        #arguments to function
        logging.info(
            "Building training functions - solver: %s, use_acc_mode: %s" %
            (solver_mode, use_acc_mode))
        iteration = tensor.fscalar()
        learn_rate = tensor.fscalar()
        momentum = tensor.fvector()
        decay = tensor.fscalar()

        #find costs
        self.yt = []
        self.cost_list = []
        self.cost_layers = []
        self.cost_layer_names = []
        for layer in self.layers:
            yt_index = tensor.lvector("target index %i" %
                                      len(self.cost_layers))
            yt_value = tensor.fvector("target value %i" %
                                      len(self.cost_layers))
            cost = layer.cost(yt_index, yt_value)
            if not cost is None:
                self.yt += [yt_index, yt_value]
                self.cost_list.append(cost)
                self.cost_layers.append(layer)
                self.cost_layer_names.append(layer.type_name)

        self.cost_factors = [1.0] * len(self.cost_list) if len(
            cost_factors) == 0 else cost_factors
        assert len(self.cost_factors) == len(
            self.cost_list
        ), "Different number of cost factors (%i) and cost layers (%i)" % (len(
            self.cost_factors), len(self.cost_layers))
        logging.info("Found %i costs in model:" % len(self.cost_layers),
                     list(zip(self.cost_layer_names, self.cost_factors)))

        self.train_cost = tensor.as_tensor_variable(0)
        for i, cost in enumerate(self.cost_list):
            self.train_cost += self.cost_factors[i] * cost

        if self.gradient_clip > 0.0:
            logging.info("Clipping gradient to [%f,%f]" %
                         (-self.gradient_clip, self.gradient_clip))
            self.train_cost = theano.gradient.grad_clip(
                self.train_cost, -self.gradient_clip, self.gradient_clip)

        #find split points
        split_points = [0]
        self.use_split_mode = False
        for index, layer in enumerate(self.layers):
            if layer.has_split:
                self.use_split_mode = True
                split_points.append(index)
        split_points.append(len(self.layers))

        if self.use_split_mode:
            logging.verbose("Using split mode with split points:",
                            split_points)
            self.func["train_fwd"] = []
            self.func["train_bwd"] = []

        self.updates = []
        for sp in range(len(split_points) - 1):

            logging.info("Building training functions for layers %i-%i" %
                         (split_points[sp], split_points[sp + 1]))

            split_start = self.layers[split_points[sp]] if sp > 0 else None
            split_end = self.layers[split_points[sp + 1]] if (
                sp + 2) < len(split_points) else None
            split_cost = self.train_cost if split_end is None else None
            split_layers = []
            for i, layer in enumerate(self.layers):
                if (i > split_points[sp]) and (i < split_points[sp + 1]):
                    split_layers.append(layer)

            #determine known_grads provided by previous backward passes
            from collections import OrderedDict
            split_known_grads = OrderedDict()
            for i in range(sp + 1, len(split_points) - 1):
                split_known_grads.update(
                    self.layers[split_points[i]].split_known_grads())

            if len(split_known_grads) == 0:
                split_known_grads = None

            # print(split_known_grads)
            # print(split_known_grads)
            # print(sp+1, len(split_points)-1)

            #
            def get_sgd_updates(p, g):
                m = theano.shared(numpy.zeros(p.shape.eval(),
                                              dtype=theano.config.floatX),
                                  broadcastable=p.broadcastable,
                                  borrow=True)
                rho = tensor.switch(tensor.gt(iteration, 0), momentum[0], 0.0)
                m_update = rho * m + (1.0 - rho) * g
                p_update = p - learn_rate * m_update
                return [(p, p_update), (m, m_update)]

            def get_torch_updates(p, g):
                m = theano.shared(numpy.zeros(p.shape.eval(),
                                              dtype=theano.config.floatX),
                                  broadcastable=p.broadcastable,
                                  borrow=True)
                rho = tensor.switch(tensor.gt(iteration, 0), momentum[0], 0.0)
                m_update = rho * m + g
                p_update = p - learn_rate * (g + momentum[0] * m_update)
                return [(p, p_update), (m, m_update)]

            def get_adam_updates(p, g):
                eps = 1e-8
                m = theano.shared(numpy.zeros(p.shape.eval(),
                                              dtype=theano.config.floatX),
                                  broadcastable=p.broadcastable,
                                  borrow=True)
                v = theano.shared(numpy.zeros(p.shape.eval(),
                                              dtype=theano.config.floatX),
                                  broadcastable=p.broadcastable,
                                  borrow=True)
                m_update = momentum[0] * m + (1.0 - momentum[0]) * g
                v_update = momentum[1] * v + (1.0 - momentum[1]) * (g * g)
                m_hat = m_update / (1.0 -
                                    tensor.pow(momentum[0], iteration + 1))
                v_hat = v_update / (1.0 -
                                    tensor.pow(momentum[1], iteration + 1))
                p_update = p - learn_rate * m_hat / (tensor.sqrt(v_hat) + eps)
                return [(p, p_update), (m, m_update), (v, v_update)]

            #append parameter updates
            params = []
            params_decay = []
            for layer in split_layers:
                params += layer.weights()
                params_decay += [True] * len(layer.weights())
                params += layer.biases()
                params_decay += [False] * len(layer.biases())

            #build updates
            print("known grads:", split_known_grads)
            grads = tensor.grad(split_cost,
                                params,
                                known_grads=split_known_grads)
            solver_updates = []
            for p, g, p_decay in zip(params, grads, params_decay):

                #add L2 weight decay if needed
                if p_decay or self.bias_decay:
                    g += decay * p

                if solver_mode == "adam":
                    solver_updates += get_adam_updates(p, g)
                elif solver_mode == "torch" or solver_mode == "nesterov":
                    solver_updates += get_torch_updates(p, g)
                else:
                    solver_updates += get_sgd_updates(p, g)

            #append per layer updates
            local_updates = solver_updates + sum(
                [layer.updates(self.train_cost) for layer in split_layers], [])

            #all updates
            self.updates += local_updates

            #skipping actual theano function building (if you just want updates, etc)
            if skip_build:
                continue

            global debug_train
            if debug_train:
                logging.warning("WARNING: Debug mode is active!")
                from theano.compile.nanguardmode import NanGuardMode
                debug_mode = theano.compile.MonitorMode(
                    post_func=debug_detect_errors)
            else:
                debug_mode = None

            if self.use_split_mode:

                if not split_end is None:
                    updates = sum(
                        [layer.split_forward() for layer in split_layers], [])
                    updates += split_end.split_forward()

                    print("fwd updates:", updates)
                    f = theano.function([self.input], [],
                                        updates=updates,
                                        givens=[(denet.layer.get_train(),
                                                 tensor.cast(1, 'int8'))],
                                        on_unused_input='ignore',
                                        mode=debug_mode)
                    self.func["train_fwd"].append(f)

                outputs = ([self.train_cost] +
                           self.cost_list) if split_end is None else []
                updates = sum([
                    layer.split_backward(split_cost, split_known_grads)
                    for layer in split_layers
                ], [])
                if not split_start is None:
                    updates += split_start.split_backward(
                        split_cost, split_known_grads)

                print("bwd updates:", updates)
                updates += local_updates
                f = theano.function([
                    denet.layer.get_epoch(), iteration, learn_rate, momentum,
                    decay, self.input
                ] + self.yt,
                                    outputs,
                                    updates=updates,
                                    givens=[(denet.layer.get_train(),
                                             tensor.cast(1, 'int8'))],
                                    on_unused_input='ignore',
                                    mode=debug_mode)
                self.func["train_bwd"].insert(0, f)

            elif use_acc_mode:
                acc_counter = theano.shared(
                    numpy.array(0, dtype=theano.config.floatX))
                begin_updates = [(acc_counter, tensor.zeros_like(acc_counter))]
                step_updates = [(acc_counter, acc_counter + 1)]
                end_updates = []
                self.acc_params = []
                for p_dest, p_src in self.updates:
                    p_acc = theano.shared(numpy.zeros(
                        p_dest.shape.eval(), dtype=theano.config.floatX),
                                          broadcastable=p_dest.broadcastable,
                                          borrow=True)
                    begin_updates.append((p_acc, tensor.zeros_like(p_acc)))
                    step_updates.append((p_acc, p_acc + p_src))
                    end_updates.append((p_dest, p_acc / acc_counter))
                    self.acc_params.append(p_acc)

                logging.info(
                    "Constructing parameter accumulate update functions (solver=%s)"
                    % solver_mode)
                self.func["train_begin"] = theano.function(
                    [], [], updates=begin_updates)
                self.func["train_step"] = theano.function(
                    [
                        denet.layer.get_epoch(), iteration, learn_rate,
                        momentum, decay, self.input
                    ] + self.yt, [self.train_cost] + self.cost_list,
                    updates=step_updates,
                    givens=[(denet.layer.get_train(), tensor.cast(1, 'int8'))],
                    on_unused_input='ignore',
                    allow_input_downcast=True,
                    mode=debug_mode)
                self.func["train_end"] = theano.function([], [],
                                                         updates=end_updates)
            else:
                logging.info(
                    "Constructing parameter update function (solver=%s)" %
                    solver_mode)

                #making
                f_input = theano.In(self.input, borrow=True)
                f_yt = [theano.In(yt, borrow=True) for yt in self.yt]
                self.func["train_step"] = theano.function(
                    [
                        denet.layer.get_epoch(), iteration, learn_rate,
                        momentum, decay, f_input
                    ] + f_yt, [self.train_cost] + self.cost_list,
                    updates=self.updates,
                    givens=[(denet.layer.get_train(), tensor.cast(1, 'int8'))],
                    on_unused_input='ignore',
                    allow_input_downcast=True,
                    mode=debug_mode)

                logging.verbose("Exporting graph...")
                with open("graph.txt", "w") as f:
                    theano.printing.debugprint(self.func["train_step"],
                                               file=f,
                                               print_type=True)

Esempio n. 51

Mostra file

def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    """
    This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias

    We check that we loop when their is too much threads

    """

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, theano.compile.DebugMode):
        n_in = 4098
        n_out = 4099

    x = T.fmatrix('x')
    y = T.lvector('y')

    b = T.fvector('b')
    #W = T.fmatrix('W')

    #we precompute the dot with big shape before to allow the test of
    #GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    #(the launch timed out and was terminated) on GPU card not
    #powerful enough. We need the big shape to check for corner
    #case.
    dot_result = T.fmatrix('dot_result')

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    xx = numpy.asarray(numpy.random.rand(batch_size, n_in),
                       dtype=numpy.float32)
    #?????yy = numpy.ones((batch_size,),dtype='float32')
    yy = numpy.ones((batch_size, ), dtype='int32')
    b_values = numpy.zeros((n_out, ), dtype='float32')
    W_values = numpy.asarray(numpy.random.rand(n_in, n_out), dtype='float32')

    dot_value = numpy.asarray(numpy.dot(xx, W_values), dtype='float32')
    del W_values
    p_y_given_x = T.nnet.softmax(dot_result + b)
    y_pred = T.argmax(p_y_given_x, axis=-1)
    loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])
    dW = T.grad(loss, dot_result)
    classify = theano.function(inputs=[y, b, dot_result],
                               outputs=[loss, y_pred, dW],
                               mode=mode_without_gpu)
    classify_gpu = theano.function(inputs=[y, b, dot_result],
                                   outputs=[loss, y_pred, dW],
                                   mode=mode_with_gpu)
    #theano.printing.debugprint(classify)
    #theano.printing.debugprint(classify_gpu)

    assert any([
        isinstance(node.op, T.nnet.CrossentropySoftmaxArgmax1HotWithBias)
        for node in classify.maker.fgraph.toposort()
    ])
    assert any([
        isinstance(node.op, cuda.nnet.GpuCrossentropySoftmaxArgmax1HotWithBias)
        for node in classify_gpu.maker.fgraph.toposort()
    ])

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    assert numpy.allclose(out[0], gout[0])
    assert numpy.allclose(out[2], gout[2],
                          atol=3e-6), numpy.absolute(gout - out).max()
    assert numpy.allclose(out[1],
                          gout[1]), [(id, out[1][id], gout[1][id], val)
                                     for id, val in enumerate(out[1] - gout[1])
                                     if val != 0]

Esempio n. 52

Mostra file

File: objectives_clustering.py Progetto: Ketharan/speaker_clustering

    margin = T.scalar('margin')

    loss = mean_loss_kl_div(predictions, targets, margin)
    loss_fun = theano.function([predictions, targets, margin], loss)
    mean_err = loss_fun(test_pred, test_targ, test_margin)

    foreach_prep = foreach(predictions, targets, margin)
    foreach_fun = theano.function([predictions, targets, margin], foreach_prep)
    err_mat = foreach_fun(test_pred, test_targ, test_margin)
    err = err_mat.sum() / ((len(err_mat) - 1) * len(err_mat))

    def loss(predictions, targets, margin, f):
        assert len(predictions) == len(targets)
        L_sum = 0
        for i in range(len(predictions)):
            for j in range(len(predictions)):
                L_sum += f(predictions[i], targets[i], predictions[j],
                           targets[j], margin)
        return L_sum / (2 * len(predictions))

    xp = T.scalar('xp')
    xq = T.scalar('xq')
    p = T.fvector('P')
    q = T.fvector('Q')
    result = loss_with_kl_div(p, xp, q, xq, margin)
    f = theano.function([p, xp, q, xq, margin], result)
    mean_np = loss(test_pred, test_targ, test_margin, f)

    assert (mean_err == err == mean_np)
    print('Run without errors!')

Esempio n. 53

Mostra file

def soft_cascade_LR_1LNN(trX1, trY1, teX1, teY1, trX2, teX2, lambda_vector,
                         K1):

    (N, D1) = trX2.shape
    D = trX1.shape[1]
    C = 2
    t1 = ComputeComplexity([D1, C])
    t2 = ComputeComplexity([D, K1, C])

    n_it = 10000
    time1 = np.zeros((len(lambda_vector), 1))
    accuracy1 = np.zeros((len(lambda_vector), 1))
    F1 = np.zeros((len(lambda_vector), 1))
    nnz_first = np.zeros((len(lambda_vector), 1))

    for i, plambda in enumerate(lambda_vector):

        X = T.fmatrix()
        F = T.fmatrix()
        Y = T.fvector()

        w_l = CF.init_weights((D1, ))
        b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                            broadcastable=(True, ))
        # w_l.set_value(np.zeros((D1,)))
        # b_l.set_value(np.zeros((1,)))

        w_h1 = CF.init_weights((D, K1))
        b1 = CF.init_weights((K1, ))
        w_o = CF.init_weights((K1, ))
        bo = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                           broadcastable=(True, ))

        pygx1 = CF.model00(F, w_l, b_l)
        pygx2 = CF.model3(X, w_h1, w_o, b1, bo, 0, 1)
        pygx_final = pygx1 * pygx2

        yhat1 = (pygx1 > 0.5)
        yhat = (pygx2 > 0.5)

        reg = T.mean(t1 + t2 * pygx1)
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final,
                                                 Y)) + plambda * reg

        params = [w_l, b_l, w_h1, w_o, b1, bo]
        updates = lasagne.updates.rmsprop(cost,
                                          params,
                                          learning_rate=0.001 * 5,
                                          rho=0.9,
                                          epsilon=1e-06)
        # updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06)

        train = theano.function(inputs=[X, F, Y],
                                outputs=cost,
                                updates=updates,
                                allow_input_downcast=True)
        reg_value = theano.function(inputs=[F],
                                    outputs=reg,
                                    allow_input_downcast=True)

        predict_first = theano.function(inputs=[F],
                                        outputs=yhat1,
                                        allow_input_downcast=True)
        predict_second = theano.function(inputs=[X],
                                         outputs=yhat,
                                         allow_input_downcast=True)

        max_iter = 300
        for j in range(max_iter):
            c = train(trX1, trX2, trY1)
            r = reg_value(trX2)
            print(c - plambda * r, plambda * r)

        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX2)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test = np.where(teQ1 == 1)[0]
        nnz_first[i] = inds_test.shape[0]

        # check that we get 100 percent recall from the first stage
        inds_true = np.where(teY1 == 1)[0]
        int_result = np.intersect1d(inds_test, inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test.shape[0], inds_true.shape[0], int_result.shape[0]))
        r1 = int_result.shape[0] / inds_true.shape[0]
        p1 = int_result.shape[0] / inds_test.shape[0]
        a1 = np.mean(teY1 == teQ1)
        print("first stage: recall = %f, precision = %f, accuracy = %f" %
              (r1, p1, a1))

        teX11 = teX1[inds_test, :]

        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX11)
        end1 = time.clock()
        time1[i] += end1 - start1

        teY2 = np.zeros(teY1.shape, dtype=int)
        teY2.fill(0)
        teY2[inds_test] = teQ2

        inds_second = np.where(teY2 == 1)[0]
        int_result = np.intersect1d(inds_second, inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_second.shape[0], inds_true.shape[0], int_result.shape[0]))
        r2 = int_result.shape[0] / inds_true.shape[0]
        p2 = int_result.shape[0] / inds_second.shape[0]
        a2 = np.mean(teY1 == teY2)
        print("second stage: recall = %f, precision = %f, accuracy = %f" %
              (r2, p2, a2))
        F1[i] = 2 * r2 * p2 / (r2 + p2)
        accuracy1[i] = a2

    return time1, accuracy1, F1, nnz_first

Esempio n. 54

Mostra file

def cascade_three_stage(trX1, trY1, teX1, teY1, trX2, teX2, trX3, teX3, w_h1, w_h2, w_o, b1, b2, bo, v_h1, v_o, c1, co, plambda, a):
    
    (N,D) = trX3.shape
    lambda_vector = plambda
    
    n_it = 10000
    time1 = np.zeros((len(lambda_vector),1))
    accuracy1 = np.zeros((len(lambda_vector),1))
    F1 = np.zeros((len(lambda_vector),1))
    nnz_first = np.zeros((len(lambda_vector),1))
    nnz_second = np.zeros((len(lambda_vector),1))
    
    for i,plambda in enumerate(lambda_vector):
                    
        X = T.fmatrix()
        F = T.fmatrix()
        E = T.fmatrix()
        Y = T.fvector()
               
        w_l = CF.init_weights((D,))
        b_l  = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True,))        
        w_l.set_value(np.zeros((D,)))    
        b_l.set_value(np.zeros((1,)))               
               
        pygx1 = CF.model00(E, w_l, b_l)
        pygx2 = CF.model3(F, v_h1, v_o, c1, co, 0, 1)
        pygx = CF.model(X, w_h1, w_h2, w_o, b1, b2, bo, 0, 1)
        
        yhat1 = (pygx1 > 0.5)
        yhat2 = (pygx2 > 0.5)
        yhat = (pygx > 0.5)
        
        f = lambda x, a: 1/(1+T.exp(-a*(x-0.5)))
        
        pygx_final = (1-f(pygx1,a))*pygx1 + (1-f(pygx2,a))*f(pygx1,a)*pygx2 + f(pygx1, a)*f(pygx2, a)*pygx

        reg = T.mean(f(pygx1,a))  
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final, Y)) + plambda*reg
         
        params = [w_l, b_l]
        updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.5, rho=0.9, epsilon=1e-06)
        # updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06)
        
        train = theano.function(inputs=[X, F, E, Y], outputs=cost, updates=updates, allow_input_downcast=True)
        reg_value = theano.function(inputs=[E], outputs=reg, allow_input_downcast=True)
        
        predict_first = theano.function(inputs=[E], outputs=yhat1, allow_input_downcast=True)
        predict_second = theano.function(inputs=[F], outputs=yhat2, allow_input_downcast=True)
        predict_third = theano.function(inputs=[X], outputs=yhat, allow_input_downcast=True)
        
        max_iter = 500
        for j in range(max_iter):
            # c = train(trX1, trY1)
            c = train(trX1, trX2, trX3, trY1) 
            # r = reg_value(trX1)
            r = reg_value(trX3) 
            print(c-plambda*r,plambda*r)
            # cost = train(trX1, trY1)
        
        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX3)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test = np.where(teQ1 == 1)[0]
        nnz_first[i] = inds_test.shape[0]

        # check that we get 100 percent recall from the first stage
        inds_true = np.where( teY1 == 1 )[0]
        int_result = np.intersect1d(inds_test,inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %(inds_test.shape[0],inds_true.shape[0],int_result.shape[0]))
        r1 = int_result.shape[0] / inds_true.shape[0]
        p1 = int_result.shape[0] / inds_test.shape[0]
        a1 = np.mean(teY1 == teQ1)
        print("first stage: recall = %f, precision = %f, accuracy = %f" %(r1,p1,a1))
        
        teX22 = teX2[inds_test,:]
                
        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX22)
        end1 = time.clock()
        time1[i] += end1 - start1
        inds_test2 = np.where(teQ2 == 1)[0]
        nnz_second[i] = inds_test2.shape[0]
            
        teY2 = np.zeros(teY1.shape,dtype = int)
        teY2.fill(0)
        teY2[inds_test] = teQ2
        
        inds_second = np.where( teY2 == 1 )[0]            
        int_result = np.intersect1d(inds_second, inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %(inds_second.shape[0],inds_true.shape[0],int_result.shape[0]))
        r2 = int_result.shape[0] / inds_true.shape[0]
        p2 = int_result.shape[0] / inds_second.shape[0]
        a2 = np.mean(teY1 == teY2)
        print("second stage: recall = %f, precision = %f, accuracy = %f" %(r2,p2,a2))
            
        # teX1 = teX1[inds_test2,:]
        teX11 = teX1[inds_test[inds_test2],:]
            
        start1 = time.clock()
        for t in range(n_it):
            teQ3 = predict_third(teX11)
        end1 = time.clock()
        time1[i] += end1 - start1            
            
        teY3 = np.zeros(teY1.shape,dtype = int)
        teY3.fill(0)
        teY3[inds_test[inds_test2]] = teQ3
        accuracy1[i] = np.mean(teY1 == teY3)    
        
        inds_third = np.where( teY3 == 1 )[0]
        int_result2 = np.intersect1d(inds_third,inds_true)
        print("third stage nzs:%d,true nzs:%d,intersection:%d" %(inds_third.shape[0],inds_true.shape[0],int_result2.shape[0]))
        r3 = int_result2.shape[0] / inds_true.shape[0]
        p3 = int_result2.shape[0] / inds_third.shape[0]
        print("third stage: recall = %f, precision = %f, accuracy = %f" %(r3, p3, accuracy1[i]))
        F1[i] = 2*r3*p3/(r3 + p3)
        
    return time1, accuracy1, F1, nnz_first, nnz_second

Esempio n. 55

Mostra file

    def test_softmax_grad(self):
        def cmp(n, m, f, f_gpu):
            data = numpy.arange(n * m, dtype='float32').reshape(n, m)
            gdata = numpy.asarray(data)[:, :, None, None]

            out = f(data)
            gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
            utt.assert_allclose(out, gout)

        x = T.matrix('x', 'float32')
        x_gpu = T.tensor4('x_gpu', 'float32')
        f_z = T.nnet.softmax_op
        f_gpu = dnn.GpuDnnSoftmax('accurate', 'channel')

        # Verify the grad operation
        dims = (2, 3, 4, 5)
        gdata = numpy.arange(numpy.product(dims),
                             dtype='float32').reshape(dims)
        T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu)

        # Verify that the CPU and GPU implementations return the same results
        # up to a tolerance.

        self._test_softmax(x, x_gpu, f_z, f_gpu, cmp)

        self._test_softmax(x, x, f_z, f_z, self._cmp)

        # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
        # optimization is applied when cudnn is required
        y = T.fvector('y')
        f = theano.function([y],
                            T.grad(T.nnet.softmax(y).mean(), y),
                            mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        val = numpy.random.rand(5).astype('float32')
        out_dnn = f(val)
        assert (len(
            [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 0)

        # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
        # optimization is not applied when cudnn is excluded or not
        # available
        mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
        y = T.fvector('y')
        f = theano.function([y],
                            T.grad(T.nnet.softmax(y).mean(), y),
                            mode=mode_wo_cudnn)
        sorted_f = f.maker.fgraph.toposort()
        out_cpu = f(val)
        utt.assert_allclose(out_dnn, out_cpu)
        assert (len(
            [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 0)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 1)

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
        # crash with manual graph
        y = T.fvector('y')
        o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
        f = theano.function([y], o, mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert (len(
            [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 0)

Esempio n. 56

Mostra file

File: theano_layer_matrix.py Progetto: yochju/odl

import theano.tensor as T
import numpy as np
import odl
import odl.contrib.theano

# --- Wrap ODL operator as Theano operator --- #

# Define ODL operator
matrix = np.array([[1., 2.], [0., 0.], [0., 1.]])
odl_op = odl.MatrixOperator(matrix)

# Define evaluation point
x = [1., 2.]

# Create Theano placeholders
x_theano = T.fvector('x')

# Create Theano layer from ODL operator
odl_op_layer = odl.contrib.theano.TheanoOperator(odl_op)

# Build computation graph
y_theano = odl_op_layer(x_theano)
y_theano_func = theano.function([x_theano], y_theano)

# Evaluate using Theano and compare to odl_op(x)
print('Theano eval    : ', y_theano_func(x))
print('ODL eval       : ', odl_op(x))

# --- Wrap ODL functional as Theano operator --- #

# Define ODL cost and composed functional

Esempio n. 57

Mostra file

File: test_SimpleQA_v3.py Progetto: zhongyunuestc/KBQA_IBM_New

def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=2000,
                    word_nkerns=500,
                    char_nkerns=100,
                    batch_size=1,
                    window_width=3,
                    emb_size=500,
                    char_emb_size=100,
                    hidden_size=200,
                    margin=0.5,
                    L2_weight=0.0003,
                    update_freq=1,
                    norm_threshold=5.0,
                    max_truncate=40,
                    max_char_len=40,
                    max_des_len=20,
                    max_relation_len=5,
                    max_Q_len=30,
                    train_neg_size=6,
                    neg_all=100,
                    train_size=75893,
                    test_size=19168,
                    mark='_BiasedMaxPool_lr0.1_word500_char100_noDes_ent2.0'
                    ):  #train_size=75909, test_size=17386
    #     maxSentLength=max_truncate+2*(window_width-1)
    model_options = locals().copy()
    print "model options", model_options
    rootPath = '/mounts/data/proj/wenpeng/Dataset/freebase/SimpleQuestions_v2/'
    triple_files = [
        'annotated_fb_data_train.entitylinking.top20_succSet_asInput.txt',
        'annotated_fb_data_test.entitylinking.top20_succSet_asInput.fromMo_FB5M.txt'
    ]

    rng = numpy.random.RandomState(23455)
    word2id, char2id = load_word2id_char2id(mark)
    #     datasets, datasets_test, length_per_example_test, vocab_size, char_size=load_test_or_valid(triple_files[0], triple_files[1], max_char_len, max_des_len, max_relation_len, max_Q_len, train_size, test_size)#max_char_len, max_des_len, max_relation_len, max_Q_len

    datasets_test, length_per_example_test, word2id, char2id = load_test_or_valid(
        triple_files[1], char2id, word2id, max_char_len, max_des_len,
        max_relation_len, max_Q_len, test_size)
    vocab_size = len(word2id)
    char_size = len(char2id)
    print 'vocab_size:', vocab_size, 'char_size:', char_size

    #     train_data=datasets
    #     valid_data=datasets[1]
    test_data = datasets_test
    #     result=(pos_entity_char, pos_entity_des, relations, entity_char_lengths, entity_des_lengths, relation_lengths, mention_char_ids, remainQ_word_ids, mention_char_lens, remainQ_word_lens, entity_scores)
    #
    #     train_pos_entity_char=train_data[0]
    #     train_pos_entity_des=train_data[1]
    #     train_relations=train_data[2]
    #     train_entity_char_lengths=train_data[3]
    #     train_entity_des_lengths=train_data[4]
    #     train_relation_lengths=train_data[5]
    #     train_mention_char_ids=train_data[6]
    #     train_remainQ_word_ids=train_data[7]
    #     train_mention_char_lens=train_data[8]
    #     train_remainQ_word_len=train_data[9]
    #     train_entity_scores=train_data[10]

    test_pos_entity_char = test_data[0]
    #    test_pos_entity_des=test_data[1]
    test_relations = test_data[2]
    test_entity_char_lengths = test_data[3]
    #    test_entity_des_lengths=test_data[4]
    test_relation_lengths = test_data[5]
    test_mention_char_ids = test_data[6]
    test_remainQ_word_ids = test_data[7]
    test_mention_char_lens = test_data[8]
    test_remainQ_word_len = test_data[9]
    test_entity_scores = test_data[10]
    #
    #     test_pos_entity_char=test_data[0]       #matrix, each row for line example, all head and tail entity, iteratively: 40*2*51
    #     test_pos_entity_des=test_data[1]        #matrix, each row for a examle: 20*2*51
    #     test_relations=test_data[2]             #matrix, each row for a example: 5*51
    #     test_entity_char_lengths=test_data[3]   #matrix, each row for a example: 3*2*51  (three valies for one entity)
    #     test_entity_des_lengths=test_data[4]    #matrix, each row for a example: 3*2*51  (three values for one entity)
    #     test_relation_lengths=test_data[5]      #matrix, each row for a example: 3*51
    #     test_mention_char_ids=test_data[6]      #matrix, each row for a mention: 40
    #     test_remainQ_word_ids=test_data[7]      #matrix, each row for a question: 30
    #     test_mention_char_lens=test_data[8]     #matrix, each three values for a mention: 3
    #     test_remainQ_word_len=test_data[9]      #matrix, each three values for a remain question: 3

    #     train_sizes=[len(train_pos_entity_char), len(train_pos_entity_des), len(train_relations), len(train_entity_char_lengths), len(train_entity_des_lengths),\
    #            len(train_relation_lengths), len(train_mention_char_ids), len(train_remainQ_word_ids), len(train_mention_char_lens), len(train_remainQ_word_len), len(train_entity_scores)]
    #     if sum(train_sizes)/len(train_sizes)!=train_size:
    #         print 'weird size:', train_sizes
    #         exit(0)

    test_sizes=[len(test_pos_entity_char), len(test_relations), len(test_entity_char_lengths),\
           len(test_relation_lengths), len(test_mention_char_ids), len(test_remainQ_word_ids), len(test_mention_char_lens), len(test_remainQ_word_len), len(test_entity_scores)]
    if sum(test_sizes) / len(test_sizes) != test_size:
        print 'weird size:', test_sizes
        exit(0)


#     n_train_batches=train_size/batch_size
#     n_test_batches=test_size/batch_size

#     train_batch_start=list(numpy.arange(n_train_batches)*batch_size)
#     test_batch_start=list(numpy.arange(n_test_batches)*batch_size)

#     indices_train_pos_entity_char=pythonList_into_theanoIntMatrix(train_pos_entity_char)
#     indices_train_pos_entity_des=pythonList_into_theanoIntMatrix(train_pos_entity_des)
#     indices_train_relations=pythonList_into_theanoIntMatrix(train_relations)
#     indices_train_entity_char_lengths=pythonList_into_theanoIntMatrix(train_entity_char_lengths)
#     indices_train_entity_des_lengths=pythonList_into_theanoIntMatrix(train_entity_des_lengths)
#     indices_train_relation_lengths=pythonList_into_theanoIntMatrix(train_relation_lengths)
#     indices_train_mention_char_ids=pythonList_into_theanoIntMatrix(train_mention_char_ids)
#     indices_train_remainQ_word_ids=pythonList_into_theanoIntMatrix(train_remainQ_word_ids)
#     indices_train_mention_char_lens=pythonList_into_theanoIntMatrix(train_mention_char_lens)
#     indices_train_remainQ_word_len=pythonList_into_theanoIntMatrix(train_remainQ_word_len)
#     indices_train_entity_scores=pythonList_into_theanoFloatMatrix(train_entity_scores)

#     indices_test_pos_entity_char=pythonList_into_theanoIntMatrix(test_pos_entity_char)
#     indices_test_pos_entity_des=pythonList_into_theanoIntMatrix(test_pos_entity_des)
#     indices_test_relations=pythonList_into_theanoIntMatrix(test_relations)
#     indices_test_entity_char_lengths=pythonList_into_theanoIntMatrix(test_entity_char_lengths)
#     indices_test_entity_des_lengths=pythonList_into_theanoIntMatrix(test_entity_des_lengths)
#     indices_test_relation_lengths=pythonList_into_theanoIntMatrix(test_relation_lengths)
#     indices_test_mention_char_ids=pythonList_into_theanoIntMatrix(test_mention_char_ids)
#     indices_test_remainQ_word_ids=pythonList_into_theanoIntMatrix(test_remainQ_word_ids)
#     indices_test_mention_char_lens=pythonList_into_theanoIntMatrix(test_mention_char_lens)
#     indices_test_remainQ_word_len=pythonList_into_theanoIntMatrix(test_remainQ_word_len)
#     indices_test_entity_scores=pythonList_into_theanoIntMatrix(test_entity_scores)

    rand_values = random_value_normal((vocab_size + 1, emb_size),
                                      theano.config.floatX,
                                      numpy.random.RandomState(1234))
    #     rand_values[0]=numpy.array(numpy.zeros(emb_size),dtype=theano.config.floatX)
    #rand_values[0]=numpy.array([1e-50]*emb_size)
    #     rand_values=load_word2vec_to_init(rand_values, rootPath+'word_emb.txt')
    embeddings = theano.shared(value=rand_values, borrow=True)

    char_rand_values = random_value_normal((char_size + 1, char_emb_size),
                                           theano.config.floatX,
                                           numpy.random.RandomState(1234))
    #     char_rand_values[0]=numpy.array(numpy.zeros(char_emb_size),dtype=theano.config.floatX)
    char_embeddings = theano.shared(value=char_rand_values, borrow=True)

    # allocate symbolic variables for the data
    index = T.iscalar()
    chosed_indices = T.ivector()

    ent_char_ids_M = T.imatrix()
    ent_lens_M = T.imatrix()
    men_char_ids_M = T.imatrix()
    men_lens_M = T.imatrix()
    rel_word_ids_M = T.imatrix()
    rel_word_lens_M = T.imatrix()
    #desH_word_ids_M=T.imatrix()
    #desH_word_lens_M=T.imatrix()
    q_word_ids_M = T.imatrix()
    q_word_lens_M = T.imatrix()
    ent_scores = T.fvector()

    filter_size = (emb_size, window_width)
    char_filter_size = (char_emb_size, window_width)
    #poolsize1=(1, ishape[1]-filter_size[1]+1) #?????????????????????????????
    #     length_after_wideConv=ishape[1]+filter_size[1]-1

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    char_filter_shape = (char_nkerns, 1, char_filter_size[0],
                         char_filter_size[1])
    word_filter_shape = (word_nkerns, 1, filter_size[0], filter_size[1])
    char_conv_W, char_conv_b = create_conv_para(rng,
                                                filter_shape=char_filter_shape)
    q_rel_conv_W, q_rel_conv_b = create_conv_para(
        rng, filter_shape=word_filter_shape)
    #q_desH_conv_W, q_desH_conv_b=create_conv_para(rng, filter_shape=word_filter_shape)
    params = [
        char_embeddings, embeddings, char_conv_W, char_conv_b, q_rel_conv_W,
        q_rel_conv_b
    ]  #, q_desH_conv_W, q_desH_conv_b]
    load_model_from_file(rootPath, params, mark)

    def SimpleQ_matches_Triple(ent_char_ids_f, ent_lens_f, rel_word_ids_f,
                               rel_word_lens_f, men_char_ids_f, q_word_ids_f,
                               men_lens_f, q_word_lens_f):

        #         rng = numpy.random.RandomState(23455)
        ent_char_input = char_embeddings[ent_char_ids_f.flatten()].reshape(
            (batch_size, max_char_len,
             char_emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
        men_char_input = char_embeddings[men_char_ids_f.flatten()].reshape(
            (batch_size, max_char_len,
             char_emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)

        rel_word_input = embeddings[rel_word_ids_f.flatten()].reshape(
            (batch_size, max_relation_len,
             emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
        #desH_word_input = embeddings[desH_word_ids_f.flatten()].reshape((batch_size,max_des_len, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)

        #         desT_word_input = embeddings[desT_word_ids_f.flatten()].reshape((batch_size,max_des_len, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
        q_word_input = embeddings[q_word_ids_f.flatten()].reshape(
            (batch_size, max_Q_len,
             emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)

        #ent_mention
        ent_char_conv = Conv_with_input_para(rng,
                                             input=ent_char_input,
                                             image_shape=(batch_size, 1,
                                                          char_emb_size,
                                                          max_char_len),
                                             filter_shape=char_filter_shape,
                                             W=char_conv_W,
                                             b=char_conv_b)
        men_char_conv = Conv_with_input_para(rng,
                                             input=men_char_input,
                                             image_shape=(batch_size, 1,
                                                          char_emb_size,
                                                          max_char_len),
                                             filter_shape=char_filter_shape,
                                             W=char_conv_W,
                                             b=char_conv_b)
        #q-rel
        q_rel_conv = Conv_with_input_para(rng,
                                          input=q_word_input,
                                          image_shape=(batch_size, 1, emb_size,
                                                       max_Q_len),
                                          filter_shape=word_filter_shape,
                                          W=q_rel_conv_W,
                                          b=q_rel_conv_b)
        rel_conv = Conv_with_input_para(rng,
                                        input=rel_word_input,
                                        image_shape=(batch_size, 1, emb_size,
                                                     max_relation_len),
                                        filter_shape=word_filter_shape,
                                        W=q_rel_conv_W,
                                        b=q_rel_conv_b)
        #q_desH
        #q_desH_conv = Conv_with_input_para(rng, input=q_word_input,
        #        image_shape=(batch_size, 1, emb_size, max_Q_len),
        #        filter_shape=word_filter_shape, W=q_desH_conv_W, b=q_desH_conv_b)
        #desH_conv = Conv_with_input_para(rng, input=desH_word_input,
        #        image_shape=(batch_size, 1, emb_size, max_des_len),
        #        filter_shape=word_filter_shape, W=q_desH_conv_W, b=q_desH_conv_b)

        ent_conv_pool = Max_Pooling(rng,
                                    input_l=ent_char_conv.output,
                                    left_l=ent_lens_f[0],
                                    right_l=ent_lens_f[2])
        men_conv_pool = Max_Pooling(rng,
                                    input_l=men_char_conv.output,
                                    left_l=men_lens_f[0],
                                    right_l=men_lens_f[2])

        #q_rel_pool=Max_Pooling(rng, input_l=q_rel_conv.output, left_l=q_word_lens_f[0], right_l=q_word_lens_f[2])
        rel_conv_pool = Max_Pooling(rng,
                                    input_l=rel_conv.output,
                                    left_l=rel_word_lens_f[0],
                                    right_l=rel_word_lens_f[2])
        q_rel_pool = Average_Pooling_for_SimpleQA(
            rng,
            input_l=q_rel_conv.output,
            input_r=rel_conv_pool.output_maxpooling,
            left_l=q_word_lens_f[0],
            right_l=q_word_lens_f[2],
            length_l=q_word_lens_f[1] + filter_size[1] - 1,
            dim=max_Q_len + filter_size[1] - 1,
            topk=2)

        #q_desH_pool=Max_Pooling(rng, input_l=q_desH_conv.output, left_l=q_word_lens_f[0], right_l=q_word_lens_f[2])
        #desH_conv_pool=Max_Pooling(rng, input_l=desH_conv.output, left_l=desH_word_lens_f[0], right_l=desH_word_lens_f[2])


        overall_simi=cosine(ent_conv_pool.output_maxpooling, men_conv_pool.output_maxpooling)*0.33333+\
                    cosine(q_rel_pool.output_maxpooling, rel_conv_pool.output_maxpooling)*0.55
        #           0.0*cosine(q_desH_pool.output_maxpooling, desH_conv_pool.output_maxpooling)
        #                     cosine(q_desT_pool.output_maxpooling, desT_conv_pool.output_maxpooling)
        return overall_simi

    simi_list, updates = theano.scan(SimpleQ_matches_Triple,
                                     sequences=[
                                         ent_char_ids_M, ent_lens_M,
                                         rel_word_ids_M, rel_word_lens_M,
                                         men_char_ids_M, q_word_ids_M,
                                         men_lens_M, q_word_lens_M
                                     ])

    simi_list += 0.2 * ent_scores

    posi_simi = simi_list[0]
    nega_simies = simi_list[1:]
    loss_simi_list = T.maximum(
        0.0, margin - posi_simi.reshape((1, 1)) + nega_simies)
    loss_simi = T.sum(loss_simi_list)

    test_model = theano.function([
        ent_char_ids_M, ent_lens_M, men_char_ids_M, men_lens_M, rel_word_ids_M,
        rel_word_lens_M, q_word_ids_M, q_word_lens_M, ent_scores
    ], [loss_simi, simi_list],
                                 on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... testing'

    start_time = time.clock()
    mid_time = start_time

    epoch = 0

    test_loss = []
    succ = 0
    for i in range(test_size):

        #prepare data
        test_ent_char_ids_M = numpy.asarray(test_pos_entity_char[i],
                                            dtype='int32').reshape(
                                                (length_per_example_test[i],
                                                 max_char_len))
        test_ent_lens_M = numpy.asarray(test_entity_char_lengths[i],
                                        dtype='int32').reshape(
                                            (length_per_example_test[i], 3))
        test_men_char_ids_M = numpy.asarray(test_mention_char_ids[i],
                                            dtype='int32').reshape(
                                                (length_per_example_test[i],
                                                 max_char_len))
        test_men_lens_M = numpy.asarray(test_mention_char_lens[i],
                                        dtype='int32').reshape(
                                            (length_per_example_test[i], 3))
        test_rel_word_ids_M = numpy.asarray(test_relations[i],
                                            dtype='int32').reshape(
                                                (length_per_example_test[i],
                                                 max_relation_len))
        test_rel_word_lens_M = numpy.asarray(test_relation_lengths[i],
                                             dtype='int32').reshape(
                                                 (length_per_example_test[i],
                                                  3))
        #test_desH_word_ids_M =numpy.asarray( test_pos_entity_des[i], dtype='int32').reshape((length_per_example_test[i], max_des_len))
        #test_desH_word_lens_M = numpy.asarray(test_entity_des_lengths[i], dtype='int32').reshape((length_per_example_test[i], 3))
        test_q_word_ids_M = numpy.asarray(test_remainQ_word_ids[i],
                                          dtype='int32').reshape(
                                              (length_per_example_test[i],
                                               max_Q_len))
        test_q_word_lens_M = numpy.asarray(test_remainQ_word_len[i],
                                           dtype='int32').reshape(
                                               (length_per_example_test[i], 3))
        test_ent_scores = numpy.asarray(test_entity_scores[i],
                                        dtype=theano.config.floatX)

        loss_simi_i, simi_list_i = test_model(
            test_ent_char_ids_M, test_ent_lens_M, test_men_char_ids_M,
            test_men_lens_M, test_rel_word_ids_M, test_rel_word_lens_M,
            test_q_word_ids_M, test_q_word_lens_M, test_ent_scores)
        #                     print 'simi_list_i:', simi_list_i[:10]
        test_loss.append(loss_simi_i)
        if len(simi_list_i) == 1 or simi_list_i[0] >= max(simi_list_i[1:]):
            succ += 1
        if i % 1000 == 0:
            print 'testing', i, '...acc:', (succ * 1.0 /
                                            (i + 1)) * (19168 * 1.0 / 21687)
    succ = succ * 100.0 / 21687
    #now, check MAP and MRR
    print 'accu:', succ

    #     store_model_to_file(rootPath, params, succ, mark)

    print 'Epoch ', epoch, 'uses ', (time.clock() - mid_time) / 60.0, 'min'

Esempio n. 58

Mostra file

    def ready(self):
	args = self.args
	w_emb_layer = self.w_emb_layer
	c_emb_layer = self.c_emb_layer
	r_emb_layers = self.r_emb_layers
	r_matrix_layers = self.r_matrix_layers	

	char_dim = self.char_dim = args.char_dim
	char_lstm_dim = self.char_lstm_dim = args.char_lstm_dim
	word_dim = self.word_dim = args.word_dim
	word_lstm_dim = self.word_lstm_dim = args.word_lstm_dim
	
	dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

	word_ids = self.word_ids = T.ivector('word_ids')
	char_ids = self.char_ids = T.imatrix('char_ids')
	char_lens = self.char_lens = T.fvector('char_lens')
	char_masks = self.char_masks = T.imatrix('char_masks')
	up_ids = self.up_ids = T.imatrix('up_ids')
	up_rels = self.up_rels = T.imatrix('up_rels')
	up_id_masks = self.up_id_masks = T.imatrix('up_id_masks')
	down_ids = self.down_ids = T.imatrix('down_ids')
	down_rels = self.down_rels = T.imatrix('down_rels')
	down_id_masks = self.down_id_masks = T.imatrix('down_id_masks')
	tag_ids = self.tag_ids = T.ivector('tag_ids')
	
	layers = self.layers = [w_emb_layer, c_emb_layer]
	layers.extend(r_emb_layers)
	layers.extend(r_matrix_layers)	

	inputs = self.inputs = []

	inputs.append(self.word_ids)
	inputs.append(self.char_ids)
	inputs.append(self.char_lens)
	inputs.append(self.char_masks)
	inputs.append(self.up_ids)
	inputs.append(self.up_rels)
	inputs.append(self.up_id_masks)
	inputs.append(self.down_ids)
	inputs.append(self.down_rels)
	inputs.append(self.down_id_masks)
	inputs.append(self.tag_ids)
	wslices = w_emb_layer.forward(word_ids)
	cslices = c_emb_layer.forward(char_ids.ravel())
	cslices = cslices.reshape((char_ids.shape[0], char_ids.shape[1], char_dim))
	cslices = cslices.dimshuffle(1, 0, 2)
	
	bv_ur_slicess = []
        bv_dr_slicess = []
        b_ur_slicess = []
        b_dr_slicess = []
	
	bv_ur_matrixss = []
	bv_dr_matrixss = []
	b_ur_matrixss = []
	b_dr_matrixss = []
	
	for r_matrix_layer in r_matrix_layers:
            bv_ur_matrixs = r_matrix_layer.forward1(up_rels.ravel())
            bv_dr_matrixs = r_matrix_layer.forward1(down_rels.ravel())
            b_ur_matrixs = r_matrix_layer.forward2(up_rels.ravel())
            b_dr_matrixs = r_matrix_layer.forward2(down_rels.ravel())
            bv_ur_matrixss.append(bv_ur_matrixs.reshape((up_rels.shape[0], up_rels.shape[1], word_dim, word_dim)))
            bv_dr_matrixss.append(bv_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1], word_dim, word_dim)))
            b_ur_matrixss.append(b_ur_matrixs.reshape((up_rels.shape[0], up_rels.shape[1], word_dim, word_dim)))
            b_dr_matrixss.append(b_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1], word_dim, word_dim)))
	
	for r_emb_layer in r_emb_layers:
            bv_ur_slices = r_emb_layer.forward(up_rels.ravel())
            bv_dr_slices = r_emb_layer.forward(down_rels.ravel())
            b_ur_slices = r_emb_layer.forward2(up_rels.ravel())
            b_dr_slices = r_emb_layer.forward2(down_rels.ravel())
            bv_ur_slicess.append(bv_ur_slices.reshape((up_rels.shape[0], up_rels.shape[1], word_dim)))
            bv_dr_slicess.append(bv_dr_slices.reshape((down_rels.shape[0], down_rels.shape[1], word_dim)))
            b_ur_slicess.append(b_ur_slices.reshape((up_rels.shape[0], up_rels.shape[1], word_dim)))
            b_dr_slicess.append(b_dr_slices.reshape((down_rels.shape[0], down_rels.shape[1], word_dim)))

	char_masks = char_masks.dimshuffle(1, 0)

	prev_output = wslices
	prev_size = word_dim

	if char_dim:
	    layers.append(LSTM(
		n_in = char_dim,
		n_out = char_lstm_dim,
		direction = 'bi' if args.char_bidirect else 'si'	
	    ))
	    prev_output_2 = cslices
	    prev_output_2 = apply_dropout(prev_output_2, dropout, v2 = True)
	    prev_output_2 = layers[-1].forward_all(cslices, char_masks)
	    prev_output_2 = T.sum(prev_output_2, axis = 0)
	    prev_output_2 = prev_output_2 / (1e-6 * T.ones_like(char_lens) + char_lens).dimshuffle(0, 'x')

	    prev_size += char_lstm_dim
	    prev_output = T.concatenate([prev_output, prev_output_2], axis = 1)
	
	prev_output = apply_dropout(prev_output, dropout)
	if args.conv != 0:
	    for i in range(args.clayer):
            	layers.append(GKNNMultiHeadGate(
                        n_in = prev_size,
                        n_out = prev_size,
			n_head = args.head
                        ))
	    	prev_output = layers[-1].forward_all(prev_output, up_ids, up_id_masks, bv_ur_slicess[0], down_ids, down_id_masks, bv_dr_slicess[0])
	    	prev_output = apply_dropout(prev_output, dropout)
	
	
	#prev_size *= 2
	#layers.append(LSTM(
	#    n_in = prev_size,
	#    n_out = word_lstm_dim,
	#    direction = 'bi' if args.word_bidirect else 'si'
	#))
	
	#prev_output = prev_output.dimshuffle(0, 'x', 1)
	#prev_output = layers[-1].forward_all(prev_output)
	#prev_output = prev_output.reshape((prev_output.shape[0], prev_output.shape[-1]))
	
	#prev_size = word_lstm_dim
	
	layers.append(Layer(
	    n_in = prev_size,
	    n_out = args.classes,
	    activation = linear, #ReLU,
	    has_bias = False
	))

	n_tags = args.classes
	s_len = char_ids.shape[0]
	tags_scores = layers[-1].forward(prev_output)
	transitions = shared((n_tags + 2, n_tags + 2), 'transitions')
	small = -1000
        b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32)
        e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32)
        observations = T.concatenate(
            [tags_scores, small * T.ones((s_len, 2))],
            axis=1
        )
	
        observations = T.concatenate(
            [b_s, observations, e_s],
            axis=0
        )

        real_path_score = tags_scores[T.arange(s_len), tag_ids].sum()
	b_id = theano.shared(value=np.array([n_tags], dtype=np.int32))
        e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32))
        padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0)
	
	pre_ids = T.arange(s_len + 1)
	
	s_ids = T.arange(s_len + 1) + 1
	
        real_path_score += transitions[
           padded_tags_ids[pre_ids],
           padded_tags_ids[s_ids]
        ].sum()
	
	all_paths_scores = CRFForward(observations, transitions)
        self.nll_loss = nll_loss = - (real_path_score - all_paths_scores)
        preds = CRFForward(observations, transitions, viterbi = True,
                        return_alpha = False, return_best_sequence=True)
        
	self.pred = preds[1:-1]
	
	self.l2_sqr = None
        params = self.params = [transitions]
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

	
	#for l, i in zip(layers[3:], range(len(layers[3:]))):
        for l, i in zip(layers[2+len(r_emb_layers)+len(r_matrix_layers):], range(len(layers[2+len(r_emb_layers)+len(r_matrix_layers):]))):
	    say("layer {}: n_in={}\tn_out={}\n".format(
                    i, l.n_in, l.n_out
                ))

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))
	
	cost = self.nll_loss + self.l2_sqr

	lr_method_name = args.learning
	lr_method_parameters = {}
	lr_method_parameters['lr'] = args.learning_rate
	updates = Optimization(clip=5.0).get_updates(lr_method_name, cost, params, **lr_method_parameters)
	
	f_train = theano.function(
	    	inputs = self.inputs,
		outputs = [cost, nll_loss],
		updates = updates,
		allow_input_downcast = True
	)

	f_eval = theano.function(
		inputs = self.inputs[:-1],
		outputs = self.pred,
		allow_input_downcast = True
	)
	
	return f_train, f_eval

Esempio n. 59

Mostra file

beta = theano.shared(
    numpy.asarray(numpy.random.randn(784, 1), dtype=theano.config.floatX))
py_x = T.nnet.softmax(T.dot(X, beta))
y_pred = T.argmax(beta, axis=1)

cost = T.mean(T.nnet.categorical_crossentropy(py_x, y))


# energy function for normal distribution with normal momentum
def normal_en(pos, mom):
    total_en = T.dot(pos, pos) / 2 + T.dot(mom, mom) / 2
    f = theano.function([pos, mom], total_en)
    return (f)


beta_0 = T.fvector()
p_0 = T.fvector()
en = lambda beta_0, p_0: T.dot(beta_0, beta_0) * 0.5 + T.dot(p_0, p_0) * 0.5

#en_f = theano.function([],en)


def simulate_dynamics(initial_pos, initial_mom, stepsize, n_steps, energy_fn):
    def leapfrog(pos, mom, step):
        # from pos(t) and vel(t-stepsize//2), compute vel(t+stepsize//2)
        dE_dmom = T.grad(energy_fn(pos, mom), mom)
        new_pos = pos + step * dE_dmom
        dE_dpos = T.grad(energy_fn(new_pos, mom), new_pos)
        new_mom = mom - step * dE_dpos
        # from vel(t+stepsize//2) compute pos(t+stepsize)

Esempio n. 60

Mostra file

    def __init__(self, config):
        ModelBase.__init__(self)

        self.config = config
        self.verbose = self.config['verbose']
        self.name = 'alexnet'
        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']
        n_softmax_out = config['n_softmax_out']
        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x = T.ftensor4('x')
        y = T.lvector('y')
        rand = T.fvector('rand')
        lr = T.scalar('lr')

        if self.verbose: print 'AlexNet 2/16'
        self.layers = []
        params = []
        weight_types = []

        if flag_datalayer:
            data_layer = DataLayer(input=x,
                                   image_shape=(3, 256, 256, batch_size),
                                   cropsize=227,
                                   rand=rand,
                                   mirror=True,
                                   flag_rand=config['rand_crop'])

            layer1_input = data_layer.output
        else:
            layer1_input = x

        convpool_layer1 = ConvPoolLayer(input=layer1_input,
                                        image_shape=(3, 227, 227, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4,
                                        padsize=0,
                                        group=1,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        lrn=True,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, 27, 27, batch_size),
                                        filter_shape=(96, 5, 5, 256),
                                        convstride=1,
                                        padsize=2,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.1,
                                        lrn=True,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, 13, 13, batch_size),
                                        filter_shape=(256, 3, 3, 384),
                                        convstride=1,
                                        padsize=1,
                                        group=1,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.0,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 384),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.1,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 256),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        fc_layer6_input = T.flatten(
            convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        fc_layer6 = FCLayer(input=fc_layer6_input,
                            n_in=9216,
                            n_out=4096,
                            verbose=self.verbose)
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output,
                                      n_in=4096,
                                      n_out=4096,
                                      verbose=self.verbose)

        fc_layer7 = FCLayer(input=dropout_layer6.output,
                            n_in=4096,
                            n_out=4096,
                            verbose=self.verbose)
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output,
                                      n_in=4096,
                                      n_out=4096,
                                      verbose=self.verbose)

        softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output,
                                      n_in=4096,
                                      n_out=n_softmax_out,
                                      verbose=self.verbose)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################
        self.p_y_given_x = softmax_layer8.p_y_given_x
        self.y_pred = softmax_layer8.y_pred

        self.output = self.p_y_given_x

        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.error = softmax_layer8.errors(y)
        if n_softmax_out < 5:
            self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out)
        else:
            self.error_top_5 = softmax_layer8.errors_top_x(y, 5)
        self.params = params

        # inputs
        self.x = x
        self.y = y
        self.rand = rand
        self.lr = lr
        self.shared_x = theano.shared(
            np.zeros(
                (3, config['input_width'], config['input_height'],
                 config['file_batch_size']),  # for loading large batch
                dtype=theano.config.floatX),
            borrow=True)

        self.shared_y = theano.shared(np.zeros((config['file_batch_size'], ),
                                               dtype=int),
                                      borrow=True)
        self.shared_lr = theano.shared(np.float32(config['learning_rate']))

        # training related
        self.base_lr = np.float32(config['learning_rate'])
        self.step_idx = 0
        self.mu = config['momentum']  # def: 0.9 # momentum
        self.eta = config['weight_decay']  #0.0002 # weight decay
        self.weight_types = weight_types
        self.batch_size = batch_size

        self.grads = T.grad(self.cost, self.params)

        subb_ind = T.iscalar('subb')  # sub batch index
        #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval()
        self.subb_ind = subb_ind
        self.shared_x_slice = self.shared_x[:, :, :, subb_ind *
                                            self.batch_size:(subb_ind + 1) *
                                            self.batch_size]
        self.shared_y_slice = self.shared_y[subb_ind *
                                            self.batch_size:(subb_ind + 1) *
                                            self.batch_size]