Esempio n. 1
0
def test_vector_clf_curve():
    yt = T.fvector('yt')
    yp = T.fvector('yp')
    tps = tmetrics.classification._vector_clf_curve(yt, yp)
    f = theano.function([yt, yp], tps, allow_input_downcast=True)
    true, predicted = np.random.binomial(n=1, p=.5, size=10).astype('float32'), np.random.random(10).astype('float32')
    fps, tps, _ = f(true, predicted)
    s_fps, s_tps, s_ = sklearn.metrics.ranking._binary_clf_curve(true, predicted)
    np.set_printoptions(suppress=True)
    print 'true'
    print true
    print 'predicted'
    print predicted
    print 'fps'
    print fps
    print 'sklearn fps'
    print s_fps
    print 'tps'
    print tps
    print 'sklearn tps'
    print s_tps
    print 'threshold values'
    print _
    print 'sklearn threshold values'
    print s_
    assert np.allclose(fps, s_fps)
    assert np.allclose(tps, s_tps)
    assert np.allclose(_, s_)
Esempio n. 2
0
    def test_cudnn_softmax_grad_opt(self):
        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is
        # applied when cudnn is required
        y = T.fvector("y")
        f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)]) == 1
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is not
        # applied when cudnn is excluded or not available
        mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
        y = T.fvector("y")
        f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)]) == 0
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 1

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
        # crash with manual graph
        y = T.fvector("y")
        o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
        f = theano.function([y], o, mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)]) == 1
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0
Esempio n. 3
0
def test_0():

    N = 16*1000*10*1

    if 1:
        aval = abs(numpy.random.randn(N).astype('float32'))+.1
        bval = numpy.random.randn(N).astype('float32')
        a = T.fvector()
        b = T.fvector()
    else:
        aval = abs(numpy.random.randn(N))+.1
        bval = numpy.random.randn(N)
        a = T.dvector()
        b = T.dvector()

    f = theano.function([a,b], T.pow(a,b), mode='LAZY')
    theano_opencl.elemwise.swap_impls=False
    g = theano.function([a,b], T.pow(a,b), mode='LAZY')

    print 'ocl   time', timeit.Timer(lambda: f(aval, bval)).repeat(3,3)

    print 'gcc   time', timeit.Timer(lambda: g(aval, bval)).repeat(3,3)

    print 'numpy time', timeit.Timer(lambda: aval**bval).repeat(3,3)

    assert ((f(aval, bval) - aval**bval)**2).sum() < 1.1
    assert ((g(aval, bval) - aval**bval)**2).sum() < 1.1
Esempio n. 4
0
 def __init__(self, name, path, learning_rate=0.001):
     self.r_symbol = T.fvector('r')
     self.gamma_symbol = T.fscalar('gamma')
     self.action_symbol = T.fmatrix('action')
     self.y_symbol = T.fvector('y')
     super(ReinforcementModel, self).__init__(
         name, path, learning_rate=learning_rate)
Esempio n. 5
0
    def setUp(self):
        self.x_true = np.random.uniform(size=5).astype('float32')
        self.x_false = np.random.uniform(size=5).astype('float32')

        x_true_var = T.fvector()
        x_false_var = T.fvector()
        self.test = function(inputs=[x_true_var, x_false_var], outputs=max_margin_loss(x_true_var, x_false_var, 1))
Esempio n. 6
0
    def optimize(self, train_data, lam, fixed_length=3):
    
        i  = T.iscalar('i')
        lr = T.fscalar('lr');
        Xl = T.fvector('Xl')
        Xr = T.fvector('Xr')

        cost = self.ae.cost(Xl, Xr)  #+ lam * self.ae.penalty()
        grads = T.grad(cost, self.ae.params)
        update_vars = []

        for var, gvar in zip(self.ae.params, grads):
            if var.get_value().ndim == 1:
                update_vars.append((var, var - 0.1*lr*gvar))
            #elif var.get_value().ndim > 1:
            #    new_param = var - lr*gvar
            #    len_W = T.sqrt(T.sum(new_param**2, axis=0))
            #    desired_W = T.clip(len_W, 0., fixed_length)
            #    ratio = desired_W  / (len_W + 1e-7)
            #    new_param = new_param * ratio
            #    update_vars.append((var, new_param))
            else:
                update_vars.append((var, var - lr*gvar))

        opt = theano.function([i, lr], cost, updates=update_vars,
                givens={Xl: train_data[i,0], Xr: train_data[i,1]})#, allow_input_downcast=True)

        #get_grad = theano.function([], grads[3], givens={X:train_data[0]}, allow_input_downcast=True)
        #get_gradb = theano.function([], grads[-1], givens={X:train_data[0]}, allow_input_downcast=True)
        return opt#, get_grad, get_gradb
Esempio n. 7
0
def test_brier_score_loss_from_scikit_learn_example():
    """
    from sklearn docs...
    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.metrics import brier_score_loss
    >>> y_true = np.array([0, 1, 1, 0])
    >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])
    >>> brier_score_loss(y_true, y_prob)  
    0.037...

    """
    y_true = T.fvector('y_true')
    y_predicted = T.fvector('y_predicted')
    brier_score = tmetrics.brier_score_loss(y_true, y_predicted)
    f = theano.function([y_true, y_predicted], brier_score)
    yt = np.array([0, 1, 1, 0], 'float32')
    yp = np.array([.1, .9, .8, .3], theano.config.floatX)
    refscore = sklearn.metrics.brier_score_loss(yt, yp)
    tol = .01
    score = f(yt, yp)
    assert (refscore - tol) < score < (refscore + tol)

    #also test the function is numpy/pandas compatible
    assert (refscore - tol) < tmetrics.brier_score_loss(yt, yp) < (refscore + tol)
Esempio n. 8
0
    def setUp(self):
        self.x_true = np.random.uniform(low=0, high=1, size=5).astype('float32')
        self.x_false_list = [np.random.uniform(low=0, high=1, size=5).astype('float32') for i in range(10)]

        x_true_var = T.fvector()
        x_false_var_list = [T.fvector() for t in self.x_false_list]
        self.test = function(inputs=[x_true_var] + x_false_var_list, outputs=negative_sampling_loss(x_true_var, x_false_var_list))
Esempio n. 9
0
    def __init__(self, input_layers, *args, **kwargs):
        super(RMSEObjective, self).__init__(input_layers, *args, **kwargs)
        self.input_systole = input_layers["systole:value"]
        self.input_diastole = input_layers["diastole:value"]

        self.target_vars["systole:value"] = T.fvector("systole_target_value")
        self.target_vars["diastole:value"] = T.fvector("diastole_target_value")
def theanoVecVecMul(In1,In2,opt):
    var1 = T.fvector('var1')
    var2 = T.fvector('var2')
    if opt=='M':
        var3 = T.fot(var1,var2)
    else:
        var3 = T.mul(var1,var2)
    DivVec = function([var1,var2],var3)
    return DivVec(In1,In2)
Esempio n. 11
0
    def __init__(self, num_emb, emb_dim, hidden_dim, output_dim,
                 degree=2, learning_rate=0.01, momentum=0.9,
                 trainable_embeddings=True,
                 labels_on_nonroot_nodes=False):
        assert emb_dim > 1 and hidden_dim > 1
        self.num_emb = num_emb
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.degree = degree
        self.learning_rate = learning_rate
        self.momentum = momentum

        self.params = []
        self.embeddings = theano.shared(self.init_matrix([self.num_emb, self.emb_dim]))
        if trainable_embeddings:
            self.params.append(self.embeddings)

        self.x = T.ivector(name='x')  # word indices
        self.tree = T.imatrix(name='tree')  # shape [None, self.degree]
        if labels_on_nonroot_nodes:
            self.y = T.fmatrix(name='y')  # output shape [None, self.output_dim]
            self.y_exists = T.fvector(name='y_exists')  # shape [None]
        else:
            self.y = T.fvector(name='y')  # output shape [self.output_dim]

        self.num_words = self.x.shape[0]  # total number of nodes (leaves + internal) in tree
        emb_x = self.embeddings[self.x]
        emb_x = emb_x * T.neq(self.x, -1).dimshuffle(0, 'x')  # zero-out non-existent embeddings

        self.tree_states = self.compute_tree(emb_x, self.tree)
        self.final_state = self.tree_states[-1]
        if labels_on_nonroot_nodes:
            self.output_fn = self.create_output_fn_multi()
            self.pred_y = self.output_fn(self.tree_states)
            self.loss = self.loss_fn_multi(self.y, self.pred_y, self.y_exists)
        else:
            self.output_fn = self.create_output_fn()
            self.pred_y = self.output_fn(self.final_state)
            self.loss = self.loss_fn(self.y, self.pred_y)

        updates = self.gradient_descent(self.loss)

        train_inputs = [self.x, self.tree, self.y]
        if labels_on_nonroot_nodes:
            train_inputs.append(self.y_exists)
        self._train = theano.function(train_inputs,
                                      [self.loss, self.pred_y],
                                      updates=updates)

        self._evaluate = theano.function([self.x, self.tree],
                                         self.final_state)

        self._predict = theano.function([self.x, self.tree],
                                        self.pred_y)
Esempio n. 12
0
def test_roc_auc_score():
    true = np.random.binomial(n=1, p=.5, size=50).astype('float32')
    #true = np.array([0, 0, 1, 1]).astype('float32')
    predicted = np.random.random(size=50).astype('float32')
    #predicted = np.array([0.1, 0.4, 0.35, 0.8]).astype('float32')
    yt = T.fvector('y_true')
    yp = T.fvector('y_predicted')
    roc_auc_score_expr = tmetrics.classification.roc_auc_score(yt, yp)
    refscore = sklearn.metrics.roc_auc_score(true, predicted)
    print 'refscore'
    print refscore
    f = theano.function([yt, yp], roc_auc_score_expr)
    score = f(true, predicted)
    print 'score'
    print score
    try:
        assert np.allclose(refscore, score)
    except AssertionError:
        fps, tps, thresholds = tmetrics.classification._binary_clf_curve(yt, yp)
        fpr, tpr, _thresh = tmetrics.classification.roc_curve(yt, yp)
        f = theano.function([yt, yp], [fps, tps, thresholds, fpr, tpr, _thresh, roc_auc_score_expr])
        result = f(true, predicted)
        print '** tmetrics **'
        print 'fps'
        print result[0]
        print 'tps'
        print result[1]
        print 'thresholds'
        print result[2]
        print 'fpr'
        print result[3]
        print 'tpr'
        print result[4]
        print '_thresh'
        print result[5]
        print 'roc score'
        print result[6]

        print '** refscore **'
        curve = sklearn.metrics.ranking._binary_clf_curve(true, predicted)
        print 'fpr'
        print curve[0]
        print 'tpr'
        print curve[1]
        print 'thresholds'
        print curve[2]
        trapz = np.trapz(curve[1], curve[0])
        print 'trapz'
        print trapz
        print 'auc'
        print sklearn.metrics.ranking.auc(curve[0], curve[1])
        print 'roc_curve'
        print sklearn.metrics.roc_curve(true, predicted)
        raise
Esempio n. 13
0
def main():

    #loading in data set
    dataset_for_error = '/vega/stats/users/sl3368/Data_LC/NormData/LC_stim_15.mat'
    stimuli = load_class_data_batch(dataset_for_error)
    stim = stimuli[0]
    data = theano.shared( stim, borrow=True)
    print 'Number of rows: '
    print stim.shape[0]

    #setting variable for error 
    init = numpy.float64(0.0)
    mean_error = shared(init)

    #writing theano functions for computing mean square error for one lag 
    
    prediction = T.fvector('predict') # 60 row vector representing time t

    real = T.fvector('real') #row representing time t+1 

    cost = T.mean( (real - prediction) ** 2)

    #function for updating mean error
    batch_error = theano.function([prediction,real],cost,updates=[(mean_error, mean_error + cost)])


    increment = stim.shape[0]/100
    #iterating over batch and computing the error
    for index in range(stim.shape[0]-1):
        if index % increment == 0:
		print str(index/increment)+'% done...'
	recent = batch_error(stim[index],stim[index+1])

    #m_e_avg = mean_error / 9000000

    #printing result
    print 'Total error: '
    print mean_error.get_value()

    print 'Finding padding amount...'
    num_zero = float(0.0)
    #calculating zeros amount
    for index in range(stim.shape[0]):
        is_zero = True
        for i in range(60):
            if stim[index][i] != 0:
               is_zero = False
   
        if is_zero:
            num_zero = num_zero + 1

    print 'Percent Zero: '+str(float(num_zero/(increment * 100))) 
Esempio n. 14
0
    def test_softmax_grad(self):
        def cmp(n, m, f, f_gpu):
            data = numpy.arange(n * m, dtype="float32").reshape(n, m)
            gdata = numpy.asarray(data)[:, :, None, None]

            out = f(data)
            gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
            utt.assert_allclose(out, gout)

        x = T.matrix("x", "float32")
        x_gpu = T.tensor4("x_gpu", "float32")
        f_z = T.nnet.softmax_op
        f_gpu = dnn.GpuDnnSoftmax("accurate", "channel")

        # Verify the grad operation
        dims = (2, 3, 4, 5)
        gdata = numpy.arange(numpy.product(dims), dtype="float32").reshape(dims)
        T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu)

        # Verify that the CPU and GPU implementations return the same results
        # up to a tolerance.

        self._test_softmax(x, x_gpu, f_z, f_gpu, cmp)

        self._test_softmax(x, x, f_z, f_z, self._cmp)

        # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
        # optimization is applied when cudnn is required
        y = T.fvector("y")
        f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0

        # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
        # optimization is not applied when cudnn is excluded or not
        # available
        mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
        y = T.fvector("y")
        f = theano.function([y], T.grad(T.nnet.softmax(y).mean(), y), mode=mode_wo_cudnn)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 0
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 1

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
        # crash with manual graph
        y = T.fvector("y")
        o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
        f = theano.function([y], o, mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert len([i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1
        assert len([i for i in sorted_f if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)]) == 0
Esempio n. 15
0
    def get_div_function(self):
        tind = T.ivector('ind')
        if self.NMF_updates == 'beta':
            self.div = theano.function(inputs=[tind],
                                       outputs=costs.beta_div(self.X_buff[tind[1]:tind[2], ],
                                                              self.W[tind[0]].T,
                                                              self.H[tind[3]:tind[4], ],
                                                              self.beta),
                                       name="div",
                                       allow_input_downcast=True)
        if self.NMF_updates == 'groupNMF':
            tcomp = T.ivector('comp')
            tlambda = T.fvector('lambda')
            tSc = T.ivector('Sc')
            tCs = T.ivector('Cs')
            tparams = [tind, tcomp, tlambda, tSc, tCs]
            cost, beta_div, cls_dist, ses_dist = costs.group_div(self.X_buff[tind[1]:tind[2], ],
                                                                 self.W,
                                                                 self.H[tind[3]:tind[4], ],
                                                                 self.beta,
                                                                 tparams)

            self.div = theano.function(inputs=[tind, tcomp, tlambda, tSc, tCs],
                                       outputs=[cost,
                                                beta_div,
                                                cls_dist,
                                                ses_dist],
                                       name="div",
                                       allow_input_downcast=True,
                                       on_unused_input='ignore')

        if self.NMF_updates == 'noiseNMF':
            tcomp = T.ivector('comp')
            tlambda = T.fvector('lambda')
            tSc = T.ivector('Sc')
            tparams = [tind, tcomp, tlambda, tSc]
            cost, beta_div, cls_dist, ses_dist = costs.noise_div(self.X_buff[tind[1]:tind[2], ],
                                                                 self.W,
                                                                 self.Wn,
                                                                 self.H[tind[3]:tind[4], ],
                                                                 self.beta,
                                                                 tparams)

            self.div = theano.function(inputs=[tind, tcomp, tlambda, tSc],
                                       outputs=[cost,
                                                beta_div,
                                                cls_dist,
                                                ses_dist],
                                       name="div",
                                       allow_input_downcast=True,
                                       on_unused_input='ignore')
Esempio n. 16
0
def test_1D_roc_auc_scores():
    yt = T.fvector('yt')
    yp = T.fvector('yp')
    y = np.array([0, 0, 1, 1]).astype('float32')
    scores = np.array([0.1, 0.4, 0.35, 0.8]).astype('float32')
    ref_fpr, ref_tpr, ref_thresh = sklearn.metrics.roc_curve(y, scores)
    roc_auc_scores = tmetrics.classification.roc_auc_scores(yt, yp)
    fpr, tpr, thresh = tmetrics.classification.roc_curves(yt, yp)
    f = theano.function([yt, yp], [fpr, tpr, thresh, roc_auc_scores])
    score_fpr, score_tpr, score_thresh, score_auc = f(y ,scores)
    assert np.allclose(ref_fpr, score_fpr)
    assert np.allclose(ref_tpr, score_tpr)
    assert np.allclose(ref_thresh, score_thresh)
    assert np.allclose(sklearn.metrics.roc_auc_score(y, scores), score_auc)
Esempio n. 17
0
def test_precisison_recall_curves_vector(n_iter=1):
    yt = T.fvector('yt')
    yp = T.fvector('yp')
    p_expr, r_expr, thresh_expr = tmetrics.classification.precision_recall_curves(yt, yp)
    f = theano.function([yt, yp], [p_expr, r_expr, thresh_expr])
    for iterator in xrange(n_iter):
        y = np.random.binomial(n=1, p=.5, size=20).astype('float32')
        scores = np.random.random(20).astype('float32')
        ref_precision, ref_recall, ref_thresh = sklearn.metrics.precision_recall_curve(y, scores)
        precision, recall, thresh = f(y ,scores)
        #assert np.allclose(ref_precision, precision)
        #assert np.allclose(ref_recall, recall)
        #assert np.allclose(ref_thresh, thresh)
        try:
            assert np.allclose(sklearn.metrics.auc(ref_recall, ref_precision), sklearn.metrics.auc(recall, precision))
        except:
            print 'n_iter: {}'.format(n_iter)
            print 'y'
            print y
            print 'scores'
            print scores
            print 'ref precision'
            print ref_precision
            print ref_precision.shape
            #print np.r_[precision[1:], 1] 
            #print np.allclose(ref_precision, np.r_[precision[1:], 1] )
            print sklearn.metrics.auc(ref_recall, ref_precision)
            print sklearn.metrics.auc(recall, precision)
            print
            print 'ref recall'
            print ref_recall
            print ref_recall.shape
            print
            print 'ref thresh'
            print ref_thresh
            print ref_thresh.shape
            print
            print 'score precision'
            print precision
            print precision.shape
            print
            print 'score recall'
            print recall
            print recall.shape
            print 
            print 'score threshold'
            print thresh
            print thresh.shape
            raise
Esempio n. 18
0
def test_elemwise4():
    """ Test that two vectors can be broadcast to form an outer product (by performing rank-1 matrix update"""

    shape = (3,4)
    a = tcn.shared_constructor(theano._asarray(numpy.random.rand(*shape), dtype='float32'), 'a')
    b = tensor.fvector()
    c = tensor.fvector()
    f = pfunc([b,c], [], updates=[(a, (a+b.dimshuffle('x', 0)*c.dimshuffle(0, 'x')))], mode=mode_with_gpu)
    has_elemwise = False
    for i, node in enumerate(f.maker.env.toposort()):
        print >> sys.stdout, i, node
        has_elemwise = has_elemwise or isinstance(node.op, tensor.Elemwise)
    assert not has_elemwise
    #let debugmode catch errors
    f(theano._asarray(numpy.random.rand(4), dtype='float32'), theano._asarray(numpy.random.rand(3), dtype='float32'))
Esempio n. 19
0
def test_multinomial_dtypes():
    p = tensor.dmatrix()
    u = tensor.dvector()
    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float64', m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform('auto')(p, u)
    assert m.dtype == 'float32', m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform('float64')(p, u)
    assert m.dtype == 'float64', m.dtype
Esempio n. 20
0
def test_hammming_loss():
    true = np.random.binomial(n=1, p=.5, size=10).astype('float32')
    predicted = np.round(np.random.random(10))
    refscore = hamming(true, predicted)
    yt = T.fvector('yt')
    yp = T.fvector('yp')
    f = theano.function([yt, yp], tmetrics.classification.hamming_loss(yt, yp), allow_input_downcast=True)
    score = f(true, predicted)
    print 'true'
    print true
    print 'predicted'
    print predicted
    print 'refscore {}'.format(refscore)
    print 'score {}'.format(score)
    assert np.allclose(refscore, score)
Esempio n. 21
0
def test_multinomial_dtypes():
    p = tensor.dmatrix()
    u = tensor.dvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float64", m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("auto")(p, u)
    assert m.dtype == "float32", m.dtype

    p = tensor.fmatrix()
    u = tensor.fvector()
    m = multinomial.MultinomialFromUniform("float64")(p, u)
    assert m.dtype == "float64", m.dtype
Esempio n. 22
0
def find_sigma(X_shared, sigma_shared, N, perplexity, sigma_iters,
               metric, verbose=0):
    """Binary search on sigma for a given perplexity."""
    X = T.fmatrix('X')
    sigma = T.fvector('sigma')

    target = np.log(perplexity)

    P = T.maximum(p_Xp_given_X_var(X, sigma, metric), epsilon)

    entropy = -T.sum(P*T.log(P), axis=1)

    # Setting update for binary search interval
    sigmin_shared = theano.shared(np.full(N, np.sqrt(epsilon), dtype=floath))
    sigmax_shared = theano.shared(np.full(N, np.inf, dtype=floath))

    sigmin = T.fvector('sigmin')
    sigmax = T.fvector('sigmax')

    upmin = T.switch(T.lt(entropy, target), sigma, sigmin)
    upmax = T.switch(T.gt(entropy, target), sigma, sigmax)

    givens = {X: X_shared, sigma: sigma_shared, sigmin: sigmin_shared,
              sigmax: sigmax_shared}
    updates = [(sigmin_shared, upmin), (sigmax_shared, upmax)]

    update_intervals = theano.function([], entropy, givens=givens,
                                       updates=updates)

    # Setting update for sigma according to search interval
    upsigma = T.switch(T.isinf(sigmax), sigma*2, (sigmin + sigmax)/2.)

    givens = {sigma: sigma_shared, sigmin: sigmin_shared,
              sigmax: sigmax_shared}
    updates = [(sigma_shared, upsigma)]

    update_sigma = theano.function([], sigma, givens=givens, updates=updates)

    for i in range(sigma_iters):
        e = update_intervals()
        update_sigma()
        if verbose:
            print('Iteration: {0}.'.format(i+1))
            print('Perplexities in [{0:.4f}, {1:.4f}].'.format(np.exp(e.min()),
                  np.exp(e.max())))

    if np.any(np.isnan(np.exp(e))):
        raise Exception('Invalid sigmas. The perplexity is probably too low.')
Esempio n. 23
0
    def make_node(self, activations, labels, input_lengths):
        t_activations = T.as_tensor_variable(activations)
        # Ensure activations array is C-contiguous
        t_activations = cpu_contiguous(t_activations)

        t_labels = T.as_tensor_variable(labels)
        t_input_lengths = T.as_tensor_variable(input_lengths)

        if t_activations.type.dtype != 'float32':
            raise TypeError('activations must use the float32 type!')

        if t_activations.ndim != 3:
            raise ValueError('activations must have 3 dimensions.')

        if t_labels.type.dtype != 'int32':
            raise TypeError('labels must use the int32 type!')

        if t_labels.ndim != 2:
            raise ValueError('labels must have 2 dimensions.')

        if t_input_lengths.type.dtype != 'int32':
            raise TypeError('input_lengths must use the int32 type!')

        if t_input_lengths.ndim != 1:
            raise ValueError('input_lengths must have 1 dimension.')

        costs = T.fvector(name="ctc_cost")
        outputs = [costs]
        if self.compute_grad:
            gradients = T.ftensor3(name="ctc_grad")
            outputs += [gradients]

        return gof.Apply(self, inputs=[t_activations, t_labels, t_input_lengths],
                         outputs=outputs)
Esempio n. 24
0
    def __init__(self, nh, init_scale=0.2):

        self.W = theano.shared(name='W', value=init_scale * np.random.uniform(-1.0, 1.0, (nh, 1))
                               .astype(theano.config.floatX))
        self.b = theano.shared(name='b', value=np.array(0,
                                                        dtype=theano.config.floatX))

        self.params = [self.b, self.W]

        h = T.fmatrix('h')
        y = T.fvector('y')
        lr = T.scalar('lr')

        y_pred = T.dot(h, self.W) + self.b

        loss = T.sum(T.square(y_pred[:, 0] - y))

        gradients = T.grad(loss, self.params)

        updates = OrderedDict((p, p - lr * g) for p, g in zip(self.params, gradients))

        # These all assume a minibatch size > 1; "mb" functions below will massage single examples as required
        self.predict = theano.function(inputs=[h], outputs=y_pred)
        self.calc_loss = theano.function(inputs=[h, y], outputs=loss, updates=None)
        self.train = theano.function(inputs=[h, y, lr], outputs=loss, updates=updates)
        self.calc_gradients = theano.function(inputs=[h, y], outputs=gradients, updates=None)
Esempio n. 25
0
    def test_allow_downcast_floatX(self):
        a = tensor.fscalar('a')
        b = tensor.fvector('b')

        f = pfunc([a, b], (a + b), allow_input_downcast=True)
        g = pfunc([a, b], (a + b), allow_input_downcast=False)
        h = pfunc([a, b], (a + b), allow_input_downcast=None)

        # If the values can be accurately represented, OK
        assert numpy.all(f(0, [0]) == 0)
        assert numpy.all(g(0, [0]) == 0)
        assert numpy.all(h(0, [0]) == 0)

        # For the vector: OK iff allow_input_downcast is True
        assert numpy.allclose(f(0, [0.1]), 0.1)
        self.assertRaises(TypeError, g, 0, [0.1])
        self.assertRaises(TypeError, h, 0, [0.1])

        # For the scalar: OK if allow_input_downcast is True,
        # or None and floatX==float32
        assert numpy.allclose(f(0.1, [0]), 0.1)
        self.assertRaises(TypeError, g, 0.1, [0])
        if config.floatX == 'float32':
            assert numpy.allclose(h(0.1, [0]), 0.1)
        else:
            self.assertRaises(TypeError, h, 0.1, [0])
    def compile(self):
        # 1D: n_words, 2D: batch * n_cands
        self.x = T.imatrix()
        self.y = T.fvector()
        self.train_inputs = [self.x, self.y]
        self.pred_inputs = [self.x]

        self.activation = self.args.activation
        self.n_d = self.args.hidden_dim
        self.n_e = self.emb_layers[0].n_d
        self.pad_id = self.emb_layers[0].vocab_map[PAD]
        self.dropout = theano.shared(np.float32(self.args.dropout).astype(theano.config.floatX))

        self._set_layers(args=self.args, n_d=self.n_d, n_e=self.n_e)

        ###########
        # Network #
        ###########
        h_in = self._input_layer(x=self.x)
        h = self._mid_layer(h_prev=h_in, x=self.x, pad_id=self.pad_id)
        y_scores = self._output_layer(h=h)
        self.y_pred = T.le(0.5, y_scores)

        #########################
        # Set an objective func #
        #########################
        self.set_params(layers=self.layers)
        self.loss = self.set_loss(self.y, y_scores)
        self.cost = self.set_cost(args=self.args, params=self.params, loss=self.loss)
	def __init__(self,
				 word_vec_width,
				 batch_size,
				 num_hidden,
				 learning_rate=0.1):
		self.num_hidden = num_hidden
		self.learning_rate = learning_rate
		self.word_vec_width = word_vec_width
		self.batch_size = batch_size

		self.vocab_mat = T.fmatrix('vocab')
		self.word_onehot = T.fmatrix('word_onehot')
		b = T.fvector('b')
		W = T.fmatrix('W')
		f = 1 / (1 + T.exp(-(W * (self.word_onehot.dot(self.vocab_mat) + b))))
		s = T.sum(f)

		self.exec_fn = theano.function(
			[self.word_onehot, b, W, self.vocab_mat],
			f,
			allow_input_downcast=True)

		self.word_onehot_c = T.fmatrix('word_onehot_c')
		f_c = 1 / (1 + T.exp(-(W * (self.word_onehot_c.dot(self.vocab_mat)) + b)))
		s_c = T.sum(f_c)

		J = T.largest(0, 1 - s + s_c)
		self.grad = theano.grad(J, [b, W, self.vocab_mat])

		self.grad_fn = theano.function(
			[self.word_onehot, self.word_onehot_c, b, W, self.vocab_mat],
			self.grad,
			allow_input_downcast=True)
    def test_select_distinct(self):
        # Tests that ChoiceFromUniform always selects distinct elements

        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.ChoiceFromUniform(odtype='auto')(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 1000
        all_indices = range(n_elements)
        np.random.seed(12345)
        expected = [
            np.asarray([[931, 318, 185, 209, 559]]),
            np.asarray([[477, 887, 2, 717, 333, 665, 159, 559, 348, 136]]),
            np.asarray([[546, 28, 79, 665, 295, 779, 433, 531, 411, 716, 244, 234, 70, 88, 612, 639, 383, 335,
                         451, 100, 175, 492, 848, 771, 559, 214, 568, 596, 370, 486, 855, 925, 138, 300, 528, 507,
                         730, 199, 882, 357, 58, 195, 705, 900, 66, 468, 513, 410, 816, 672]])]

        for i in [5, 10, 50, 100, 500, n_elements]:
            uni = np.random.rand(i).astype(config.floatX)
            pvals = np.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
            pvals /= pvals.sum(1)
            res = f(pvals, uni, i)
            for ii in range(len(expected)):
                if expected[ii].shape == res.shape:
                    assert (expected[ii] == res).all()
            res = np.squeeze(res)
            assert len(res) == i
            assert np.all(np.in1d(np.unique(res), all_indices)), res
    def test_select_proportional_to_weight(self):
        """
        Tests that MultinomialWOReplacementFromUniform selects elements, on average,
        proportional to the their probabilities
        """
        p = tensor.fmatrix()
        u = tensor.fvector()
        n = tensor.iscalar()
        m = multinomial.MultinomialWOReplacementFromUniform('auto')(p, u, n)

        f = function([p, u, n], m, allow_input_downcast=True)

        n_elements = 100
        n_selected = 10
        mean_rtol = 0.0005
        numpy.random.seed(12345)
        pvals = numpy.random.randint(1, 100, (1, n_elements)).astype(config.floatX)
        pvals /= pvals.sum(1)
        avg_pvals = numpy.zeros((n_elements,), dtype=config.floatX)

        for rep in range(10000):
            uni = numpy.random.rand(n_selected).astype(config.floatX)
            res = f(pvals, uni, n_selected)
            res = numpy.squeeze(res)
            avg_pvals[res] += 1
        avg_pvals /= avg_pvals.sum()
        avg_diff = numpy.mean(abs(avg_pvals - pvals))
        assert avg_diff < mean_rtol, avg_diff
Esempio n. 30
0
    def __init__(self, config=None, defaults=defaults, inputs_hook=None, hiddens_hook=None, params_hook=None,
                 use_data_layer=None, rand_crop=None, batch_size=None):
        # combine everything by passing to Model's init
        super(AlexNet, self).__init__(**{arg: val for (arg, val) in locals().iteritems() if arg is not 'self'})
        # configs can now be accessed through self dictionary

        if self.inputs_hook or self.hiddens_hook or self.params_hook:
            log.error("Inputs_hook, hiddens_hook, and params_hook not implemented yet for AlexNet!")

        self.flag_datalayer = self.use_data_layer

        ####################
        # Theano variables #
        ####################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        self.x = T.ftensor4('x')
        self.y = T.lvector('y')
        self.rand = T.fvector('rand')

        ##########
        # params #
        ##########
        self.params = []

        # make the network!
        self.build_computation_graph()
def main(args):

    theano.optimizer = 'fast_compile'
    theano.config.exception_verbosity = 'high'

    trial = int(args['trial'])
    pkl_name = 'dp_disall-sch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    pickleModel = args['pickleModel']

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])
    loadType = int(args['loadType'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path
    print(str(windows))

    q_z_dim = 500
    p_z_dim = 500
    p_x_dim = 500
    x2s_dim = 200
    y2s_dim = 200
    z2s_dim = 200
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.5,
        valPer=0.25,
        testPer=0.25,
        typeLoad=loadType,
        flgAggSumScaled=1,
        flgFilterZeros=1)

    print("Mean ", reader.meanTrain)
    print("Std", reader.stdTrain)
    instancesPlot = {0: [4]}

    train_data = Dataport(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    #from experiment 18-05-31_18-48
    fmodel = open(pickleModel, 'rb')
    mainloop = cPickle.load(fmodel)
    fmodel.close()

    #define layers
    rnn = mainloop.model.nodes[0]
    x_1 = mainloop.model.nodes[1]
    y_1 = mainloop.model.nodes[2]
    z_1 = mainloop.model.nodes[3]
    phi_1 = mainloop.model.nodes[4]
    phi_mu = mainloop.model.nodes[5]
    phi_sig = mainloop.model.nodes[6]
    prior_1 = mainloop.model.nodes[7]
    prior_mu = mainloop.model.nodes[8]
    prior_sig = mainloop.model.nodes[9]
    theta_1 = mainloop.model.nodes[10]
    theta_mu1 = mainloop.model.nodes[11]
    theta_sig1 = mainloop.model.nodes[12]
    coeff1 = mainloop.model.nodes[13]

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1
    ]

    params = mainloop.model.params

    dynamicOutput = [None, None, None, None, None, None, None, None]
    #dynamicOutput_val = [None, None, None, None, None, None,None,  None, None]
    if (y_dim > 1):
        theta_mu2 = mainloop.model.nodes[14]
        theta_sig2 = mainloop.model.nodes[15]
        coeff2 = mainloop.model.nodes[16]
        nodes = nodes + [theta_mu2, theta_sig2, coeff2]
        dynamicOutput = dynamicOutput + [None, None, None, None
                                         ]  #mu, sig, coef and pred
    if (y_dim > 2):
        theta_mu3 = mainloop.model.nodes[17]
        theta_sig3 = mainloop.model.nodes[18]
        coeff3 = mainloop.model.nodes[19]
        nodes = nodes + [theta_mu3, theta_sig3, coeff3]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 3):
        theta_mu4 = mainloop.model.nodes[20]
        theta_sig4 = mainloop.model.nodes[21]
        coeff4 = mainloop.model.nodes[22]
        nodes = nodes + [theta_mu4, theta_sig4, coeff4]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 4):
        theta_mu5 = mainloop.model.nodes[23]
        theta_sig5 = mainloop.model.nodes[24]
        coeff5 = mainloop.model.nodes[25]
        nodes = nodes + [theta_mu5, theta_sig5, coeff5]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 5):
        theta_mu6 = mainloop.model.nodes[26]
        theta_sig6 = mainloop.model.nodes[27]
        coeff6 = mainloop.model.nodes[28]
        nodes = nodes + [theta_mu6, theta_sig6, coeff6]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 6):
        theta_mu7 = mainloop.model.nodes[29]
        theta_sig7 = mainloop.model.nodes[30]
        coeff7 = mainloop.model.nodes[31]
        nodes = nodes + [theta_mu7, theta_sig7, coeff7]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 7):
        theta_mu8 = mainloop.model.nodes[32]
        theta_sig8 = mainloop.model.nodes[33]
        coeff8 = mainloop.model.nodes[34]
        nodes = nodes + [theta_mu8, theta_sig8, coeff8]
        dynamicOutput = dynamicOutput + [None, None, None, None]

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    output_fn = [s_0] + dynamicOutput
    output_fn_val = [s_0] + dynamicOutput[2:]
    print(len(output_fn), len(output_fn_val))

    def inner_fn_test(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            prior_mu_t, prior_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1

        if (y_dim > 1):
            theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
            theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
            coeff2_t = coeff2.fprop([theta_1_t], params)
            y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
            y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1)
            tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t,
                                       y_pred2)

        if (y_dim > 2):
            theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
            theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
            coeff3_t = coeff3.fprop([theta_1_t], params)
            y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
            y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1)
            tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t,
                                       y_pred3)

        if (y_dim > 3):
            theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
            theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
            coeff4_t = coeff4.fprop([theta_1_t], params)
            y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
            y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1)
            tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t,
                                       y_pred4)

        if (y_dim > 4):
            theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
            theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
            coeff5_t = coeff5.fprop([theta_1_t], params)
            y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
            y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1)
            tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t,
                                       y_pred5)

        if (y_dim > 5):
            theta_mu6_t = theta_mu6.fprop([theta_1_t], params)
            theta_sig6_t = theta_sig6.fprop([theta_1_t], params)
            coeff6_t = coeff6.fprop([theta_1_t], params)
            y_pred6 = GMM_sampleY(theta_mu6_t, theta_sig6_t, coeff6_t)
            y_pred1 = T.concatenate([y_pred1, y_pred6], axis=1)
            tupleMulti = tupleMulti + (theta_mu6_t, theta_sig6_t, coeff6_t,
                                       y_pred6)

        if (y_dim > 6):
            theta_mu7_t = theta_mu7.fprop([theta_1_t], params)
            theta_sig7_t = theta_sig7.fprop([theta_1_t], params)
            coeff7_t = coeff7.fprop([theta_1_t], params)
            y_pred7 = GMM_sampleY(theta_mu7_t, theta_sig7_t, coeff7_t)
            y_pred1 = T.concatenate([y_pred1, y_pred7], axis=1)
            tupleMulti = tupleMulti + (theta_mu7_t, theta_sig7_t, coeff7_t,
                                       y_pred7)

        if (y_dim > 7):
            theta_mu8_t = theta_mu8.fprop([theta_1_t], params)
            theta_sig8_t = theta_sig8.fprop([theta_1_t], params)
            coeff8_t = coeff8.fprop([theta_1_t], params)
            y_pred8 = GMM_sampleY(theta_mu8_t, theta_sig8_t, coeff8_t)
            y_pred1 = T.concatenate([y_pred1, y_pred8], axis=1)
            tupleMulti = tupleMulti + (theta_mu8_t, theta_sig8_t, coeff8_t,
                                       y_pred8)

        pred_1_t = y_1.fprop([y_pred1], params)
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return (s_t, ) + tupleMulti
        #corr_temp, binary_temp

    (otherResults_val, updates_val) = theano.scan(fn=inner_fn_test,
                                                  sequences=[x_1_temp],
                                                  outputs_info=output_fn_val)

    for k, v in updates_val.iteritems():
        k.default_update = v

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    ######################## TEST (GENERATION) TIME
    s_temp_val, prior_mu_temp_val, prior_sig_temp_val, \
      theta_mu1_temp_val, theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val = otherResults_val[:7]
    restResults_val = otherResults_val[7:]

    #s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp_val.name = 'theta_mu1_val'
    theta_sig1_temp_val.name = 'theta_sig1_val'
    coeff1_temp_val.name = 'coeff1_val'
    y_pred1_temp_val.name = 'disaggregation1_val'
    y_pred1_temp_val = T.clip(y_pred1_temp_val, 0.0, np.inf)
    prediction_val = y_pred1_temp_val

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1], 1)))**2)
    mae1_val = T.mean(
        T.abs_(y_pred1_temp_val -
               y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred1_temp_val)
    totReal = T.sum(y[:, :, 0])
    relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned1_val = 1 - T.sum(
        T.abs_(y_pred1_temp_val - y[:, :, 0].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    #y_unNormalize = (y[:,:,0] * reader.stdTrain[0]) + reader.meanTrain[0]
    #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTrain[0]) + reader.meanTrain[0]
    #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

    mse1_val.name = 'mse1_val'
    mae1_val.name = 'mae1_val'

    theta_mu1_in_val = theta_mu1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig1_in_val = theta_sig1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    totaMSE_val = mse1_val
    totaMAE_val = mae1_val
    indexSepDynamic_val = 5

    #Initializing values of mse and mae
    mse2_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae2_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse3_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae3_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse4_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae4_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse5_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae5_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse6_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae6_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse7_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae7_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mse8_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))
    mae8_val = T.mean(T.zeros((y.shape[0], y.shape[1], 1)))

    relErr2_val = T.zeros((1, ))
    relErr3_val = T.zeros((1, ))
    relErr4_val = T.zeros((1, ))
    relErr5_val = T.zeros((1, ))
    relErr6_val = T.zeros((1, ))
    relErr7_val = T.zeros((1, ))
    relErr8_val = T.zeros((1, ))

    propAssigned2_val = T.zeros((1, ))
    propAssigned3_val = T.zeros((1, ))
    propAssigned4_val = T.zeros((1, ))
    propAssigned5_val = T.zeros((1, ))
    propAssigned6_val = T.zeros((1, ))
    propAssigned7_val = T.zeros((1, ))
    propAssigned8_val = T.zeros((1, ))

    if (y_dim > 1):
        theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val, y_pred2_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu2_temp_val.name = 'theta_mu2_val'
        theta_sig2_temp_val.name = 'theta_sig2_val'
        coeff2_temp_val.name = 'coeff2_val'
        y_pred2_temp_val.name = 'disaggregation2_val'
        y_pred2_temp_val = T.clip(y_pred2_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred2_temp_val],
                                       axis=2)  #before it gets unnormalized

        mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae2_val = T.mean(
            T.abs_(y_pred2_temp_val -
                   y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred2_temp_val)
        totReal = T.sum(y[:, :, 1])
        relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned2_val = 1 - T.sum(
            T.abs_(y_pred2_temp_val - y[:, :, 1].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,1] * reader.stdTrain[1]) + reader.meanTrain[1]
        #y_pred2_temp_val = (y_pred2_temp_val * reader.stdTrain[1]) + reader.meanTrain[1]
        #mse2_valUnNorm = T.mean((y_pred2_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae2_valUnNorm = T.mean( T.abs_(y_pred2_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse2_val.name = 'mse2_val'
        mae2_val.name = 'mae2_val'

        theta_mu2_in_val = theta_mu2_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig2_in_val = theta_sig2_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val

        totaMSE_val += mse2_val
        totaMAE_val += mae2_val
        indexSepDynamic_val += 2

    if (y_dim > 2):
        theta_mu3_temp_val, theta_sig3_temp_val, coeff3_temp_val, y_pred3_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu3_temp_val.name = 'theta_mu3_val'
        theta_sig3_temp_val.name = 'theta_sig3_val'
        coeff3_temp_val.name = 'coeff3_val'
        y_pred3_temp_val.name = 'disaggregation3_val'
        y_pred3_temp_val = T.clip(y_pred3_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred3_temp_val],
                                       axis=2)  #before it gets unnormalized

        mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae3_val = T.mean(
            T.abs_(y_pred3_temp_val -
                   y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred3_temp_val)
        totReal = T.sum(y[:, :, 2])
        relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned3_val = 1 - T.sum(
            T.abs_(y_pred3_temp_val - y[:, :, 2].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,2] * reader.stdTrain[2]) + reader.meanTrain[2]
        #y_pred3_temp_val = (y_pred3_temp_val * reader.stdTrain[2]) + reader.meanTrain[2]
        #mse3_valUnNorm = T.mean((y_pred3_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae3_valUnNorm = T.mean( T.abs_(y_pred3_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse3_val.name = 'mse3_val'
        mae3_val.name = 'mae3_val'

        theta_mu3_in_val = theta_mu3_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig3_in_val = theta_sig3_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu3_in_val, theta_sig3_in_val,
                                     coeff3_in_val)
        totaMSE_val += mse3_val
        totaMAE_val += mae3_val
        indexSepDynamic_val += 2

    if (y_dim > 3):
        theta_mu4_temp_val, theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu4_temp_val.name = 'theta_mu4_val'
        theta_sig4_temp_val.name = 'theta_sig4_val'
        coeff4_temp_val.name = 'coeff4_val'
        y_pred4_temp_val.name = 'disaggregation4_val'
        y_pred4_temp_val = T.clip(y_pred4_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred4_temp_val],
                                       axis=2)  #before it gets unnormalized

        mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae4_val = T.mean(
            T.abs_(y_pred4_temp_val -
                   y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred4_temp_val)
        totReal = T.sum(y[:, :, 3])
        relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned4_val = 1 - T.sum(
            T.abs_(y_pred4_temp_val - y[:, :, 3].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,3] * reader.stdTrain[3]) + reader.meanTrain[3]
        #y_pred4_temp_val = (y_pred4_temp_val * reader.stdTrain[3]) + reader.meanTrain[3]
        #mse4_valUnNorm = T.mean((y_pred4_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae4_valUnNorm = T.mean( T.abs_(y_pred4_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse4_val.name = 'mse4_val'
        mae4_val.name = 'mae4_val'

        theta_mu4_in_val = theta_mu4_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig4_in_val = theta_sig4_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu4_in_val, theta_sig4_in_val,
                                     coeff4_in_val)
        totaMSE_val += mse4_val
        totaMAE_val += mae4_val
        indexSepDynamic_val += 2

    if (y_dim > 4):
        theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val, y_pred5_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu5_temp_val.name = 'theta_mu5_val'
        theta_sig5_temp_val.name = 'theta_sig5_val'
        coeff5_temp_val.name = 'coeff5_val'
        y_pred5_temp_val.name = 'disaggregation5_val'
        y_pred5_temp_val = T.clip(y_pred5_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred5_temp_val],
                                       axis=2)  # before it gets unnormalized

        mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae5_val = T.mean(
            T.abs_(y_pred5_temp_val -
                   y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred5_temp_val)
        totReal = T.sum(y[:, :, 4])
        relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned5_val = 1 - T.sum(
            T.abs_(y_pred5_temp_val - y[:, :, 4].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,4] * reader.stdTrain[4]) + reader.meanTrain[4]
        #y_pred5_temp_val = (y_pred5_temp_val * reader.stdTrain[4]) + reader.meanTrain[4]
        #mse5_valUnNorm = T.mean((y_pred5_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae5_valUnNorm = T.mean( T.abs_(y_pred5_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse5_val.name = 'mse5_val'
        mae5_val.name = 'mae5_val'

        theta_mu5_in_val = theta_mu5_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig5_in_val = theta_sig5_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu5_in_val, theta_sig5_in_val,
                                     coeff5_in_val)
        totaMSE_val += mse5_val
        totaMAE_val += mae5_val
        indexSepDynamic_val += 2

    if (y_dim > 5):
        theta_mu6_temp_val, theta_sig6_temp_val, coeff6_temp_val, y_pred6_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu6_temp_val.name = 'theta_mu6_val'
        theta_sig6_temp_val.name = 'theta_sig6_val'
        coeff6_temp_val.name = 'coeff6_val'
        y_pred6_temp_val.name = 'disaggregation6_val'
        y_pred6_temp_val = T.clip(y_pred6_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred6_temp_val],
                                       axis=2)  #before it gets unnormalized

        mse6_val = T.mean((y_pred6_temp_val - y[:, :, 5].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae6_val = T.mean(
            T.abs_(y_pred6_temp_val -
                   y[:, :, 5].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred6_temp_val)
        totReal = T.sum(y[:, :, 5])
        relErr6_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned6_val = 1 - T.sum(
            T.abs_(y_pred6_temp_val - y[:, :, 5].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,5] * reader.stdTrain[5]) + reader.meanTrain[5]
        #y_pred6_temp_val = (y_pred6_temp_val * reader.stdTrain[5]) + reader.meanTrain[5]
        #mse6_valUnNorm = T.mean((y_pred6_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae6_valUnNorm = T.mean( T.abs_(y_pred6_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse6_val.name = 'mse6_val'
        mae6_val.name = 'mae6_val'

        theta_mu6_in_val = theta_mu6_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig6_in_val = theta_sig6_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff6_in_val = coeff6_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu6_in_val, theta_sig6_in_val,
                                     coeff6_in_val)
        totaMSE_val += mse6_val
        totaMAE_val += mae6_val
        indexSepDynamic_val += 2

    if (y_dim > 6):
        theta_mu7_temp_val, theta_sig7_temp_val, coeff7_temp_val, y_pred7_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu7_temp_val.name = 'theta_mu7_val'
        theta_sig7_temp_val.name = 'theta_sig7_val'
        coeff7_temp_val.name = 'coeff7_val'
        y_pred7_temp_val.name = 'disaggregation7_val'
        y_pred7_temp_val = T.clip(y_pred7_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred7_temp_val],
                                       axis=2)  # before it gets unnormalized

        mse7_val = T.mean((y_pred7_temp_val - y[:, :, 6].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae7_val = T.mean(
            T.abs_(y_pred7_temp_val -
                   y[:, :, 6].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred7_temp_val)
        totReal = T.sum(y[:, :, 6])
        relErr7_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned7_val = 1 - T.sum(
            T.abs_(y_pred7_temp_val - y[:, :, 6].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,6] * reader.stdTrain[6]) + reader.meanTrain[6]
        #y_pred7_temp_val = (y_pred7_temp_val * reader.stdTrain[6]) + reader.meanTrain[6]
        #mse7_valUnNorm = T.mean((y_pred7_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae7_valUnNorm = T.mean( T.abs_(y_pred7_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse7_val.name = 'mse7_val'
        mae7_val.name = 'mae7_val'

        theta_mu7_in_val = theta_mu7_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig7_in_val = theta_sig7_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff7_in_val = coeff7_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu7_in_val, theta_sig7_in_val,
                                     coeff7_in_val)
        totaMSE_val += mse7_val
        totaMAE_val += mae7_val
        indexSepDynamic_val += 2

    if (y_dim > 7):
        theta_mu8_temp_val, theta_sig8_temp_val, coeff8_temp_val, y_pred8_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu8_temp_val.name = 'theta_mu8_val'
        theta_sig8_temp_val.name = 'theta_sig8_val'
        coeff8_temp_val.name = 'coeff8_val'
        y_pred8_temp_val.name = 'disaggregation8_val'
        y_pred8_temp_val = T.clip(y_pred8_temp_val, 0.0, np.inf)

        prediction_val = T.concatenate([prediction_val, y_pred8_temp_val],
                                       axis=2)  # before it gets unnormalized

        mse8_val = T.mean((y_pred8_temp_val - y[:, :, 7].reshape(
            (y.shape[0], y.shape[1], 1)))**2)
        mae8_val = T.mean(
            T.abs_(y_pred8_temp_val -
                   y[:, :, 7].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred8_temp_val)
        totReal = T.sum(y[:, :, 7])
        relErr8_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned8_val = 1 - T.sum(
            T.abs_(y_pred8_temp_val - y[:, :, 7].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        #y_unNormalize = (y[:,:,7] * reader.stdTrain[7]) + reader.meanTrain[7]
        #y_pred8_temp_val = (y_pred8_temp_val * reader.stdTrain[7]) + reader.meanTrain[7]
        #mse8_valUnNorm = T.mean((y_pred8_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
        #mae8_valUnNorm = T.mean( T.abs_(y_pred8_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))) )

        mse8_val.name = 'mse8_val'
        mae8_val.name = 'mae8_val'

        theta_mu8_in_val = theta_mu8_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig8_in_val = theta_sig8_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff8_in_val = coeff8_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu8_in_val, theta_sig8_in_val,
                                     coeff8_in_val)
        totaMSE_val += mse8_val
        totaMAE_val += mae8_val
        indexSepDynamic_val += 2

    recon_val = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val,
        *argsGMM_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out'
    totaMSE_val = totaMSE_val / y_dim
    totaMAE_val = totaMAE_val / y_dim

    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term'

    ######################

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"

    lr_iterations = {0: lr}

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[
            prediction_val,
            recon_term_val,
            totaMSE_val,
            totaMAE_val,
            mse1_val,
            mse2_val,
            mse3_val,
            mse4_val,
            mse5_val,
            mse6_val,
            mse7_val,
            mse8_val,
            mae1_val,
            mae2_val,
            mae3_val,
            mae4_val,
            mae5_val,
            mae6_val,
            mae7_val,
            mae8_val,  #unnormalized mae and mse 16 items#
            relErr1_val,
            relErr2_val,
            relErr3_val,
            relErr4_val,
            relErr5_val,
            relErr6_val,
            relErr7_val,
            relErr8_val,
            propAssigned1_val,
            propAssigned2_val,
            propAssigned3_val,
            propAssigned4_val,
            propAssigned5_val,
            propAssigned6_val,
            propAssigned7_val,
            propAssigned8_val
        ],
        updates=updates_val)
    testOutput = []
    testMetrics2 = []
    perEnergyAssig = []

    bestInstsancesPred = []
    bestInstsancesDisa = []
    bestInstsancesAggr = []

    numBatchTest = 0

    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])
        testOutput.append(
            outputGeneration[1:20])  #before 36 including unnormalized metrics
        testMetrics2.append(outputGeneration[20:])

        ########## best mae
        predTest = np.transpose(outputGeneration[0], [1, 0, 2]).clip(min=0)
        realTest = np.transpose(batch[2], [1, 0, 2])

        batchMSE = np.mean(np.absolute(predTest - realTest), axis=(1, 2))
        idxMin = np.argmin(batchMSE)

        #print(np.asarray(idxMin).reshape(1,-1)[0,:])
        #print(batchMSE[idxMin])
        for idx in np.asarray(idxMin).reshape(1, -1)[0, :]:

            plt.figure(1)
            plt.plot(predTest[idx])
            plt.legend(appliances)
            plt.savefig(
                save_path +
                "/vrnn_disall_test-b{}_Pred_0-{}".format(numBatchTest, idx),
                format='eps')
            plt.clf()

            plt.figure(2)
            plt.plot(realTest[idx])
            plt.legend(appliances)
            plt.savefig(save_path +
                        "/vrnn_disall_test-b{}_RealDisag_0-{}".format(
                            numBatchTest, idx),
                        format='eps')
            plt.clf()

            plt.figure(3)
            plt.plot(np.transpose(batch[0], [1, 0, 2])[idx])
            plt.savefig(
                save_path +
                "/vrnn_disall_test-b{}_Realagg_0-{}".format(numBatchTest, idx),
                format='eps')
            plt.clf()

            bestInstsancesPred.append(predTest[idx])
            bestInstsancesDisa.append(realTest[idx])
            bestInstsancesAggr.append(np.transpose(batch[0], [1, 0, 2])[idx])

        numBatchTest += 1

        sumNumPred = np.sum(predTest, axis=(0, 1))
        sumNumReal = np.sum(batch[2], axis=(0, 1))
        perEnergy = np.sum(batch[0], axis=(0, 1))
        perEnergyAssig.append((sumNumReal / perEnergy, sumNumPred / perEnergy))

    scipy.io.savemat(save_path + '/testInstances.mat',
                     mdict={
                         'pred': bestInstsancesPred,
                         'disag': bestInstsancesDisa,
                         'agg': bestInstsancesAggr
                     })

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)

    testOutput[:, 19:] = 1000 * testOutput[:, 19:]  # kwtts a watts
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    mse1_test = testOutput[:, 3].mean()
    mae1_test = testOutput[:, 11].mean()
    mse2_test = testOutput[:, 4].mean()
    mae2_test = testOutput[:, 12].mean()
    mse3_test = testOutput[:, 5].mean()
    mae3_test = testOutput[:, 13].mean()
    mse4_test = testOutput[:, 6].mean()
    mae4_test = testOutput[:, 14].mean()
    mse5_test = testOutput[:, 7].mean()
    mae5_test = testOutput[:, 15].mean()
    mse6_test = testOutput[:, 8].mean()
    mae6_test = testOutput[:, 16].mean()
    mse7_test = testOutput[:, 9].mean()
    mae7_test = testOutput[:, 17].mean()
    mse8_test = testOutput[:, 10].mean()
    mae8_test = testOutput[:, 18].mean()

    print(testOutput[:, 3:11].mean(), testOutput[:, 11:19].mean())

    relErr1_test = testMetrics2[:, 0].mean()
    relErr2_test = testMetrics2[:, 1].mean()
    relErr3_test = testMetrics2[:, 2].mean()
    relErr4_test = testMetrics2[:, 3].mean()
    relErr5_test = testMetrics2[:, 4].mean()
    relErr6_test = testMetrics2[:, 5].mean()
    relErr7_test = testMetrics2[:, 6].mean()
    relErr8_test = testMetrics2[:, 7].mean()

    propAssigned1_test = testMetrics2[:, 8].mean()
    propAssigned2_test = testMetrics2[:, 9].mean()
    propAssigned3_test = testMetrics2[:, 10].mean()
    propAssigned4_test = testMetrics2[:, 11].mean()
    propAssigned5_test = testMetrics2[:, 12].mean()
    propAssigned6_test = testMetrics2[:, 13].mean()
    propAssigned7_test = testMetrics2[:, 14].mean()
    propAssigned8_test = testMetrics2[:, 15].mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n\n")
    fLog.write(
        "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test, mse6_test,mse7_test,mse8_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test, mae6_test,mae7_test,mae8_test,mseTest,maeTest\n"
    )
    #fLog.write("Unnorm,{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},0.0,0.0\n\n".format(mse1_valUnNorm,mse2_valUnNorm,mse3_valUnNorm,mse4_valUnNorm,mse5_valUnNorm, mse6_valUnNorm,mse7_valUnNorm,mse8_valUnNorm,mae1_valUnNorm,mae2_valUnNorm,mae3_valUnNorm,mae4_valUnNorm,mae5_valUnNorm, mae6_valUnNorm,mae7_valUnNorm,mae8_valUnNorm))
    fLog.write(
        "{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n\n"
        .format(recon_test, mse1_test, mse2_test, mse3_test, mse4_test,
                mse5_test, mse6_test, mse7_test, mse8_test, mae1_test,
                mae2_test, mae3_test, mae4_test, mae5_test, mae6_test,
                mae7_test, mae8_test, mse_test, mae_test))
    fLog.write(
        "relErr1,relErr2,relErr3,relErr4,relErr5,relErr6,relErr7,relErr8,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(
        relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test,
        relErr6_test, relErr7_test, relErr8_test, propAssigned1_test,
        propAssigned2_test, propAssigned3_test, propAssigned4_test,
        propAssigned5_test, propAssigned6_test, propAssigned7_test,
        propAssigned8_test))

    fLog.write(
        "batch,perReal1,perReal2,perReal3,perReal4,perReal5,perReal6,perReal7,perReal8,perPredict1,perPredict2,perPredict3,perPredict4,perPredict5,perPredict6,perPredict7,perPredict8\n"
    )
    for batch, item in enumerate(perEnergyAssig):
        fLog.write(
            "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(
                batch, item[0][0], item[0][1], item[0][2], item[0][3],
                item[0][4], item[0][5], item[0][6], item[0][7], item[1][0],
                item[1][1], item[1][2], item[1][3], item[1][4], item[1][5],
                item[1][6], item[1][7]))
    fLog.write(pickleModel)
    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()
Esempio n. 32
0
def evaluate_lenet5(learning_rate=0.02, n_epochs=4, L2_weight=1e-5, extra_size=4, emb_size=300, batch_size=100, filter_size=[3,3], maxSentLen=40, hidden_size=[300,300], max_term_len=4, p_mode = 'conc'):

    model_options = locals().copy()
    print "model options", model_options

    seed=1234
    np.random.seed(seed)
    rng = np.random.RandomState(seed)    #random seed, control the model generates the same results


    # all_sentences_l, all_masks_l, all_sentences_r, all_masks_r, all_word1,all_word2,all_word1_mask,all_word2_mask,all_labels, all_extra, word2id  =load_wordnet_hyper_vs_all_with_words(maxlen=maxSentLen, wordlen=max_term_len)  #minlen, include one label, at least one word in the sentence
    # test_sents_l, test_masks_l, test_sents_r, test_masks_r, test_labels, word2id  =load_ACE05_dataset(maxSentLen, word2id)
    word2id = load_word2id(root_dic+'LenciBenotto_word2id.pkl')
    test_sents_l, test_masks_l, test_sents_r, test_masks_r, test_word1,test_word2,test_word1_mask,test_word2_mask,test_labels, test_extra, word2id, group_size_list = load_task_hyper_vs_all_with_allDefComb(LenciBenotto_file,maxSentLen, word2id, wordlen=max_term_len)



    test_sents_l=np.asarray(test_sents_l, dtype='int32')

    test_masks_l=np.asarray(test_masks_l, dtype=theano.config.floatX)


    test_sents_r=np.asarray(test_sents_r, dtype='int32')


    test_masks_r=np.asarray(test_masks_r, dtype=theano.config.floatX)


    test_word1=np.asarray(test_word1, dtype='int32')
    test_word2=np.asarray(test_word2, dtype='int32')


    test_word1_mask=np.asarray(test_word1_mask, dtype=theano.config.floatX)
    test_word2_mask=np.asarray(test_word2_mask, dtype=theano.config.floatX)


    test_labels_store=np.asarray(test_labels, dtype='int32')

    test_extra=np.asarray(test_extra, dtype=theano.config.floatX)

    # train_size=len(train_labels_store)
    # dev_size=len(dev_labels_store)
    test_size=len(test_sents_l)
    print ' test size: ', test_size

    vocab_size=len(word2id)+1


    rand_values=rng.normal(0.0, 0.01, (vocab_size, emb_size))   #generate a matrix by Gaussian distribution
    rand_values[0]=np.array(np.zeros(emb_size),dtype=theano.config.floatX)
    id2word = {y:x for x,y in word2id.iteritems()}
    word2vec=load_word2vec()
    rand_values=load_word2vec_to_init(rand_values, id2word, word2vec)
    init_embeddings=theano.shared(value=np.array(rand_values,dtype=theano.config.floatX), borrow=True)   #wrap up the python variable "rand_values" into theano variable
    # load_model_from_file(root_dic+'Weeds_best_para_init_embeddings', [init_embeddings])

    #now, start to build the input form of the model
    sents_ids_l=T.imatrix()
    sents_mask_l=T.fmatrix()
    sents_ids_r=T.imatrix()
    sents_mask_r=T.fmatrix()
    word1_ids = T.imatrix()
    word2_ids = T.imatrix()
    word1_mask = T.fmatrix()
    word2_mask = T.fmatrix()
    extra = T.fvector()
    labels=T.ivector()
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'
    def embed_input(emb_matrix, sent_ids):
        return emb_matrix[sent_ids.flatten()].reshape((batch_size,maxSentLen, emb_size)).dimshuffle(0,2,1)

    embed_input_l=embed_input(init_embeddings, sents_ids_l)#embeddings[sents_ids_l.flatten()].reshape((batch_size,maxSentLen, emb_size)).dimshuffle(0,2,1) #the input format can be adapted into CNN or GRU or LSTM
    embed_input_r=embed_input(init_embeddings, sents_ids_r)#embeddings[sents_ids_r.flatten()].reshape((batch_size,maxSentLen, emb_size)).dimshuffle(0,2,1)

    embed_word1 = init_embeddings[word1_ids.flatten()].reshape((batch_size,word1_ids.shape[1], emb_size))
    embed_word2 = init_embeddings[word2_ids.flatten()].reshape((batch_size,word2_ids.shape[1], emb_size))
    word1_embedding = T.sum(embed_word1*word1_mask.dimshuffle(0,1,'x'), axis=1)
    word2_embedding = T.sum(embed_word2*word2_mask.dimshuffle(0,1,'x'), axis=1)


    '''create_AttentiveConv_params '''
    conv_W, conv_b=create_conv_para(rng, filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]))
    conv_W_context, conv_b_context=create_conv_para(rng, filter_shape=(hidden_size[1], 1, emb_size, 1))

    NN_para=[conv_W, conv_b,conv_W_context]

    '''
    attentive convolution function
    '''
    term_vs_term_layer = Conv_for_Pair(rng,
            origin_input_tensor3=embed_word1.dimshuffle(0,2,1),
            origin_input_tensor3_r = embed_word2.dimshuffle(0,2,1),
            input_tensor3=embed_word1.dimshuffle(0,2,1),
            input_tensor3_r = embed_word2.dimshuffle(0,2,1),
             mask_matrix = word1_mask,
             mask_matrix_r = word2_mask,
             image_shape=(batch_size, 1, emb_size, max_term_len),
             image_shape_r = (batch_size, 1, emb_size, max_term_len),
             filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]),
             filter_shape_context=(hidden_size[1], 1,emb_size, 1),
             W=conv_W, b=conv_b,
             W_context=conv_W_context, b_context=conv_b_context)
    tt_embeddings_l = term_vs_term_layer.attentive_maxpool_vec_l
    tt_embeddings_r = term_vs_term_layer.attentive_maxpool_vec_r

    p_ww = T.concatenate([tt_embeddings_l,tt_embeddings_r,tt_embeddings_l*tt_embeddings_r,tt_embeddings_l-tt_embeddings_r], axis=1)

    term_vs_def_layer = Conv_for_Pair(rng,
            origin_input_tensor3=embed_word1.dimshuffle(0,2,1),
            origin_input_tensor3_r = embed_input_r,
            input_tensor3=embed_word1.dimshuffle(0,2,1),
            input_tensor3_r = embed_input_r,
             mask_matrix = word1_mask,
             mask_matrix_r = sents_mask_r,
             image_shape=(batch_size, 1, emb_size, max_term_len),
             image_shape_r = (batch_size, 1, emb_size, maxSentLen),
             filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]),
             filter_shape_context=(hidden_size[1], 1,emb_size, 1),
             W=conv_W, b=conv_b,
             W_context=conv_W_context, b_context=conv_b_context)
    td_embeddings_l = term_vs_def_layer.attentive_maxpool_vec_l
    td_embeddings_r = term_vs_def_layer.attentive_maxpool_vec_r
    p_wd = T.concatenate([td_embeddings_l,td_embeddings_r,td_embeddings_l*td_embeddings_r,td_embeddings_l-td_embeddings_r], axis=1)


    def_vs_term_layer = Conv_for_Pair(rng,
            origin_input_tensor3=embed_input_l,
            origin_input_tensor3_r = embed_word2.dimshuffle(0,2,1),
            input_tensor3=embed_input_l,
            input_tensor3_r = embed_word2.dimshuffle(0,2,1),
             mask_matrix = sents_mask_l,
             mask_matrix_r = word2_mask,
             image_shape=(batch_size, 1, emb_size, maxSentLen),
             image_shape_r = (batch_size, 1, emb_size, max_term_len),
             filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]),
             filter_shape_context=(hidden_size[1], 1,emb_size, 1),
             W=conv_W, b=conv_b,
             W_context=conv_W_context, b_context=conv_b_context)
    dt_embeddings_l = def_vs_term_layer.attentive_maxpool_vec_l
    dt_embeddings_r = def_vs_term_layer.attentive_maxpool_vec_r

    p_dw = T.concatenate([dt_embeddings_l,dt_embeddings_r,dt_embeddings_l*dt_embeddings_r,dt_embeddings_l-dt_embeddings_r], axis=1)


    def_vs_def_layer = Conv_for_Pair(rng,
            origin_input_tensor3=embed_input_l,
            origin_input_tensor3_r = embed_input_r,
            input_tensor3=embed_input_l,
            input_tensor3_r = embed_input_r,
             mask_matrix = sents_mask_l,
             mask_matrix_r = sents_mask_r,
             image_shape=(batch_size, 1, emb_size, maxSentLen),
             image_shape_r = (batch_size, 1, emb_size, maxSentLen),
             filter_shape=(hidden_size[1], 1, emb_size, filter_size[0]),
             filter_shape_context=(hidden_size[1], 1,emb_size, 1),
             W=conv_W, b=conv_b,
             W_context=conv_W_context, b_context=conv_b_context)
    dd_embeddings_l = def_vs_def_layer.attentive_maxpool_vec_l
    dd_embeddings_r = def_vs_def_layer.attentive_maxpool_vec_r
    p_dd = T.concatenate([dd_embeddings_l,dd_embeddings_r,dd_embeddings_l*dd_embeddings_r,dd_embeddings_l-dd_embeddings_r], axis=1)

    if p_mode == 'conc':
        p=T.concatenate([p_ww, p_wd, p_dw, p_dd], axis=1)
        p_len = 4*4*hidden_size[1]
    else:
        p = T.max(T.concatenate([p_ww.dimshuffle('x',0,1),p_wd.dimshuffle('x',0,1),p_dw.dimshuffle('x',0,1),p_dd.dimshuffle('x',0,1)],axis=0), axis=0)
        p_len =4*hidden_size[1]

    # HL_input = T.concatenate([p,cosine_matrix1_matrix2_rowwise(word1_embedding,word2_embedding).dimshuffle(0,'x'),extra.dimshuffle(0,'x')],axis=1)
    # HL_input_size=p_len+1+1
    #
    # HL_layer_1=HiddenLayer(rng, input=HL_input, n_in=HL_input_size, n_out=hidden_size[1], activation=T.tanh)


    "form input to LR classifier"
    LR_input = T.concatenate([p,cosine_matrix1_matrix2_rowwise(word1_embedding,word2_embedding).dimshuffle(0,'x'),extra.dimshuffle(0,'x')],axis=1)
    LR_input_size=p_len+1+1
    # LR_input = HL_layer_1.output
    # LR_input_size = hidden_size[1]

    U_a = create_ensemble_para(rng, 2, LR_input_size) # the weight matrix hidden_size*2
    LR_b = theano.shared(value=np.zeros((2,),dtype=theano.config.floatX),name='LR_b', borrow=True)  #bias for each target class
    LR_para=[U_a, LR_b]


    layer_LR=LogisticRegression(rng, input=LR_input, n_in=LR_input_size, n_out=2, W=U_a, b=LR_b, bias=0.25) #basically it is a multiplication between weight matrix and input feature vector
    loss=layer_LR.negative_log_likelihood(labels)  #for classification task, we usually used negative log likelihood as loss, the lower the better.


    # L2_reg = (conv_W**2).sum()+(conv_W_context**2).sum()+(U_a**2).sum()





    params = NN_para+LR_para #[init_embeddings]


    # load_model_from_file('/save/wenpeng/datasets/HypeNet/HyperDef_label_meta_best_para_0.938730853392', params)
    load_model_from_file(root_dic+'LenciBenotto_best_para_0.557286573332', params)

    '''
    0.552587544259; current ap: 0.574037513126 ap@100 0.918481316424
    0.557286573332; current ap: 0.576498645289 ap@100 0.909032657538
    '''

    test_model = theano.function([sents_ids_l, sents_mask_l, sents_ids_r, sents_mask_r, word1_ids,word2_ids,word1_mask,word2_mask,extra], [layer_LR.y_pred,layer_LR.prop_for_posi], allow_input_downcast=True, on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 50000000000  # look as this many examples regardless
    start_time = time.time()
    mid_time = start_time
    past_time= mid_time
    epoch = 0
    done_looping = False


    n_test_batches=test_size/batch_size
    n_test_remain = test_size%batch_size
    if n_test_remain!=0:
        test_batch_start=list(np.arange(n_test_batches)*batch_size)+[test_size-batch_size]
    else:
        test_batch_start=list(np.arange(n_test_batches)*batch_size)



    # max_acc_dev=0.0
    max_ap_test=0.0
    max_ap_topk_test=0.0
    max_f1=0.0


    pred_labels =[]
    probs = []
    gold_labels =[]
    error_sum=0.0
    for idd, test_batch_id in enumerate(test_batch_start): # for each test batch
        pred_i, prob_i=test_model(
                test_sents_l[test_batch_id:test_batch_id+batch_size],
                test_masks_l[test_batch_id:test_batch_id+batch_size],
                test_sents_r[test_batch_id:test_batch_id+batch_size],
                test_masks_r[test_batch_id:test_batch_id+batch_size],
                test_word1[test_batch_id:test_batch_id+batch_size],
                test_word2[test_batch_id:test_batch_id+batch_size],
                test_word1_mask[test_batch_id:test_batch_id+batch_size],
                test_word2_mask[test_batch_id:test_batch_id+batch_size],
                test_extra[test_batch_id:test_batch_id+batch_size])

        # error_sum+=error_i
        pred_labels+=list(pred_i)
        probs+=list(prob_i)

    print len(test_sents_l), len(probs)
    if n_test_remain !=0:
        probs = probs[:(len(test_batch_start)-1)*batch_size]+probs[-n_test_remain:]
    print len(test_sents_l), len(probs)
    assert len(test_sents_l) == len(probs)
    assert sum(group_size_list) == len(probs)
    #max prob in group
    max_probs = []
    prior_size = 0
    for i in range(len(group_size_list)):

        sub_probs = probs[prior_size:prior_size+group_size_list[i]]
        prior_size += group_size_list[i]
        max_probs.append(max(sub_probs))

    print len(group_size_list),len(max_probs),len(test_labels)
    assert len(test_labels) == len(max_probs)
    # test_acc=1.0-error_sum/(len(test_batch_start))
    test_ap = apk(test_labels, max_probs, k=len(test_labels))
    test_ap_top100 = apk(test_labels, max_probs, k=100)


    # if test_ap > max_ap_test:
    #     max_ap_test=test_ap
    #     store_model_to_file('/save/wenpeng/datasets/EVALution/HyperDef_label_4ways_conc_test_on_EVA_allDefComb_best_para_'+str(max_ap_test), params)
    # if test_ap_top100 > max_ap_topk_test:
    #     max_ap_topk_test=test_ap_top100
    print '\t\tcurrent ap:', test_ap,'ap@100', test_ap_top100
Esempio n. 33
0
  def __init__(self, K, conv_layer_sizes, hidden_layer_sizes, gamma):
    self.K = K
    lr = np.float32(2.5e-4)
    mu = np.float32(0)
    decay = np.float32(0.99)
    eps = np.float32(1e-10)

    # inputs and targets
    X = T.ftensor4('X')
    G = T.fvector('G')
    actions = T.ivector('actions')

    # create the graph
    self.conv_layers = []
    num_input_filters = 4 # number of filters / color channels
    for num_output_filters, filtersz, stride in conv_layer_sizes:
      layer = ConvLayer(num_input_filters, num_output_filters, filtersz, stride)
      self.conv_layers.append(layer)
      num_input_filters = num_output_filters


    ##### debug #####
    # Z = X / 255.0
    # j = 0
    # for layer in self.conv_layers:
    #   Z = layer.forward(Z)
    #   out = Z
    #   op = theano.function(inputs=[X], outputs=out, allow_input_downcast=True)
    #   test = op(np.random.randn(1, 4, IM_SIZE, IM_SIZE))
    #   print("output size after conv %d: %s" % (j, test.shape))
    #   j += 1


    # get conv output size
    Z = X / 255.0
    for layer in self.conv_layers:
      Z = layer.forward(Z)
    conv_out = Z.flatten(ndim=2)
    conv_out_op = theano.function(inputs=[X], outputs=conv_out, allow_input_downcast=True)
    test = conv_out_op(np.random.randn(1, 4, IM_SIZE, IM_SIZE))
    flattened_ouput_size = test.shape[1]


    # build fully connected layers
    self.layers = []
    M1 = flattened_ouput_size
    for M2 in hidden_layer_sizes:
      layer = HiddenLayer(M1, M2)
      self.layers.append(layer)
      M1 = M2

    # final layer
    layer = HiddenLayer(M1, K, lambda x: x)
    self.layers.append(layer)

    # collect params for copy
    self.params = []
    for layer in (self.conv_layers + self.layers):
      self.params += layer.params
    

    # calculate final output and cost
    Z = conv_out
    for layer in self.layers:
      Z = layer.forward(Z)
    Y_hat = Z

    selected_action_values = Y_hat[T.arange(actions.shape[0]), actions]
    cost = T.mean((G - selected_action_values)**2)

    # create train function
    # we need to ensure cache is updated before parameter update
    # by creating a list of new_caches
    # and using them in the parameter update
    grads = T.grad(cost, self.params)
    caches = [theano.shared(np.ones_like(p.get_value())) for p in self.params]
    new_caches = [decay*c + (np.float32(1) - decay)*g*g for c, g in zip(caches, grads)]

    c_update = [(c, new_c) for c, new_c in zip(caches, new_caches)]
    g_update = [
      (p, p - lr*g / T.sqrt(new_c + eps)) for p, new_c, g in zip(self.params, new_caches, grads)
    ]
    updates = c_update + g_update

    # compile functions
    self.train_op = theano.function(
      inputs=[X, G, actions],
      updates=updates,
      allow_input_downcast=True
    )

    self.predict_op = theano.function(
      inputs=[X],
      outputs=Y_hat,
      allow_input_downcast=True
    )
Esempio n. 34
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=1,
                 activation_method="Sigmoid"):
        """This class is made to support a variable number of layers.

    :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.activation = T.nnet.sigmoid

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = MRG_RandomStreams(numpy_rng.randint(2**30))

        # allocate symbolic variables for the data

        # the data is presented as rasterized images
        self.x = T.matrix('x')

        # the labels are presented as 1D vector of [int] labels
        self.y = T.fvector('y')

        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in range(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=self.activation)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LinearRegression(input=self.sigmoid_layers[-1].output,
                                         n_in=hidden_layers_sizes[-1],
                                         n_out=n_outs,
                                         l2=0,
                                         l1=0)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)
def RelationStackMaker(chips, params, graph=False, weighted=False, batched=False):
    if batched:
        emb_input = T.itensor3('emb_input')
        entities_tv = [T.fmatrix('enidx_'+str(i)).astype(theano.config.floatX) for i in range(params['num_entity'])]
        if graph:
            if weighted:
                masks = T.ftensor4('child_mask')
            else:
                masks = T.ftensor3('child_mask')
        else:
            masks = T.fmatrix('batch_mask')
    else:
        emb_input = T.imatrix('emb_input')
        entities_tv = [T.fvector('enidx_'+str(i)).astype(theano.config.floatX) for i in range(params['num_entity'])]
        if graph:
            if weighted:
                masks = T.ftensor3('child_mask')
            else:
                masks = T.fmatrix('child_mask')
        else:
            masks = None
    #print masks, type(masks), masks.ndim
    current_chip = Start(params['voc_size'], emb_input)  
    print ('\n', 'Building Stack now', '\n', 'Start: ', params['voc_size'], 'out_tv dim:', current_chip.output_tv.ndim)
    instantiated_chips = stackLayers(chips, current_chip, params, entity_size=params['num_entity'])
    regularizable_params = computeLayers(instantiated_chips, current_chip, params, entities_input=entities_tv, mask=masks)
    ### Debug use: Get the attention co-efficiency and visualize. ###
    for c in instantiated_chips:
        if c[1].endswith('Entity_Att'):
            assert hasattr(c[0], 'att_wt_arry')
            assert hasattr(c[0], 'entity_tvs')
            attention_weights = c[0].att_wt_arry
            entity_tvs = c[0].entity_tvs
    
    current_chip = instantiated_chips[-1][0]
    if current_chip.output_tv.ndim == 2:
        pred_y = current_chip.output_tv #T.argmax(current_chip.output_tv, axis=1)
    else:
        pred_y = current_chip.output_tv #T.argmax(current_chip.output_tv) #, axis=1)
    gold_y = (current_chip.gold_y
            if hasattr(current_chip, 'gold_y')
            else None)
    # Show all parameters that would be needed in this system
    params_needed = calculate_params_needed(instantiated_chips)
    print ("Parameters Needed", params_needed)
    for k in params_needed:
        assert k in params, k
        print (k, params[k])
    assert hasattr(current_chip, 'score')
    cost = current_chip.score #/ params['nsentences'] 
    cost_arr = [cost]
    for layer in instantiated_chips[:-1]:
        if hasattr(layer[0], 'score'):
            print (layer[1])
            cost += params['cost_coef'] * layer[0].score
            cost_arr.append(params['cost_coef'] * layer[0].score)

    grads = T.grad(cost,
            wrt=regularizable_params)
            #[params[k] for k in params if (hasattr(params[k], 'is_regularizable') and params[k].is_regularizable)])
    print ('Regularizable parameters:')
    for k, v in params.items():
        if hasattr(v, 'is_regularizable'):
            print (k, v, v.is_regularizable)
    if graph or batched:
        #return (emb_input, masks, entities_tv, attention_weights, entity_tvs, gold_y, pred_y, cost, grads, regularizable_params) 
        return (emb_input, masks, entities_tv, gold_y, pred_y, cost, grads, regularizable_params) 
    else: 
        return (emb_input, entities_tv, gold_y, pred_y, cost, grads, regularizable_params) 
Esempio n. 36
0
def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'dp_disall-sch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = int(args['stride_test'])
    loadType = int(args['loadType'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])
    typeActivFunc = args['typeActivFunc']

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path
    print(str(windows))

    q_z_dim = 500
    p_z_dim = 500
    p_x_dim = 500
    x2s_dim = 200
    y2s_dim = 200
    z2s_dim = 200
    lr_iterations = {0: lr}

    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_redd(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.5,
        valPer=0.25,
        testPer=0.25,
        typeLoad=loadType,
        flgAggSumScaled=1,
        flgFilterZeros=1)

    print(Xtrain.shape, Xval.shape, Xtest.shape, ytrain.shape, yval.shape,
          ytest.shape)
    print("Mean ", reader.meanTraining)
    print("Std", reader.stdTraining)
    instancesPlot = {0: [4]}

    train_data = Redd(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Redd(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Redd(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1', 'y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1', 's_tm1'],
                                  parent_dim=[x2s_dim, rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit=typeActivFunc,
                                    init_W=init_W,
                                    init_b=init_b)

    if (y_dim > 1):
        theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    if (y_dim > 2):
        theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    if (y_dim > 3):
        theta_mu4 = FullyConnectedLayer(name='theta_mu4',
                                        parent=['theta_1'],
                                        parent_dim=[p_x_dim],
                                        nout=target_dim,
                                        unit=typeActivFunc,
                                        init_W=init_W,
                                        init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    if (y_dim > 1):
        theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    if (y_dim > 2):
        theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    if (y_dim > 3):
        theta_sig4 = FullyConnectedLayer(name='theta_sig4',
                                         parent=['theta_1'],
                                         parent_dim=[p_x_dim],
                                         nout=target_dim,
                                         unit='softplus',
                                         cons=1e-4,
                                         init_W=init_W,
                                         init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    if (y_dim > 1):
        coeff2 = FullyConnectedLayer(name='coeff2',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    if (y_dim > 2):
        coeff3 = FullyConnectedLayer(name='coeff3',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    if (y_dim > 3):
        coeff4 = FullyConnectedLayer(name='coeff4',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=k,
                                     unit='softmax',
                                     init_W=init_W,
                                     init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1
    ]

    dynamicOutput = [None, None, None, None, None, None, None, None]
    if (y_dim > 1):
        nodes = nodes + [theta_mu2, theta_sig2, coeff2]
        dynamicOutput = dynamicOutput + [None, None, None, None
                                         ]  #mu, sig, coef and pred
    if (y_dim > 2):
        nodes = nodes + [theta_mu3, theta_sig3, coeff3]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 3):
        nodes = nodes + [theta_mu4, theta_sig4, coeff4]
        dynamicOutput = dynamicOutput + [None, None, None, None]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    output_fn = [s_0] + dynamicOutput
    output_fn_val = [s_0] + dynamicOutput[2:]
    print(len(output_fn), len(output_fn_val))

    def inner_fn(x_t, y_t, scheduleSamplingMask, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)

        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        ## prediction 1
        y_pred = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred

        if (y_dim > 1):
            theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
            theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
            coeff2_t = coeff2.fprop([theta_1_t], params)
            y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
            y_pred = T.concatenate([y_pred, y_pred2], axis=1)
            tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t,
                                       y_pred2)

        if (y_dim > 2):
            theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
            theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
            coeff3_t = coeff3.fprop([theta_1_t], params)
            y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
            y_pred = T.concatenate([y_pred, y_pred3], axis=1)
            tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t,
                                       y_pred3)

        if (y_dim > 3):
            theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
            theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
            coeff4_t = coeff4.fprop([theta_1_t], params)
            y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
            y_pred = T.concatenate([y_pred, y_pred4], axis=1)
            tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t,
                                       y_pred4)

        #s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)

        if (scheduleSamplingMask == 1):
            s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        else:
            y_t_aux = y_1.fprop([y_pred], params)
            s_t = rnn.fprop([[x_t, z_1_t, y_t_aux], [s_tm1]], params)

        return (s_t, ) + tupleMulti

        #corr_temp, binary_temp

    (otherResults, updates) = theano.scan(
        fn=inner_fn,
        sequences=[x_1_temp, y_1_temp, scheduleSamplingMask],
        outputs_info=output_fn)  #[s_0, (None)]

    s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,\
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp = otherResults[:9]
    restResults = otherResults[9:]

    for k, v in updates.iteritems():
        k.default_update = v

    #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'
    y_pred1_temp.name = 'disaggregation1'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1 = T.mean(
        T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1))

    ddoutMSEA = []
    ddoutYpreds = [y_pred1_temp]
    indexSepDynamic = 7  #plus two totalmse, totalmae

    totaMAE = T.copy(mae1)
    totaMSE = T.copy(mse1)
    mse2 = T.zeros((1, ))
    mae2 = T.zeros((1, ))
    mse3 = T.zeros((1, ))
    mae3 = T.zeros((1, ))
    mse4 = T.zeros((1, ))
    mae4 = T.zeros((1, ))

    if (y_dim > 1):
        theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu2_temp.name = 'theta_mu2'
        theta_sig2_temp.name = 'theta_sig2'
        coeff2_temp.name = 'coeff2'
        y_pred2_temp.name = 'disaggregation2'
        mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae2 = T.mean(
            T.abs_(y_pred2_temp -
                   y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))
        mse2.name = 'mse2'
        mae2.name = 'mae2'

        theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = theta_mu2_in, theta_sig2_in, coeff2_in

        ddoutMSEA = ddoutMSEA + [mse2, mae2]
        ddoutYpreds = ddoutYpreds + [y_pred2_temp]
        #totaMSE+=mse2
        indexSepDynamic += 2

    if (y_dim > 2):
        theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu3_temp.name = 'theta_mu3'
        theta_sig3_temp.name = 'theta_sig3'
        coeff3_temp.name = 'coeff3'
        y_pred3_temp.name = 'disaggregation3'
        mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae3 = T.mean(
            T.abs_(y_pred3_temp -
                   y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))
        mse3.name = 'mse3'
        mae3.name = 'mae3'

        theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu3_in, theta_sig3_in, coeff3_in)
        ddoutMSEA = ddoutMSEA + [mse3, mae3]
        ddoutYpreds = ddoutYpreds + [y_pred3_temp]
        #totaMSE+=mse3
        indexSepDynamic += 2

    if (y_dim > 3):
        theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp = restResults[:
                                                                                 4]
        restResults = restResults[4:]
        theta_mu4_temp.name = 'theta_mu4'
        theta_sig4_temp.name = 'theta_sig4'
        coeff4_temp.name = 'coeff4'
        y_pred4_temp.name = 'disaggregation4'
        mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae4 = T.mean(
            T.abs_(y_pred4_temp -
                   y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))
        mse4.name = 'mse4'
        mae4.name = 'mae4'

        theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1))
        theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1))
        coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM = argsGMM + (theta_mu4_in, theta_sig4_in, coeff4_in)
        ddoutMSEA = ddoutMSEA + [mse4, mae4]
        ddoutYpreds = ddoutYpreds + [y_pred4_temp]
        #totaMSE+=mse4
        indexSepDynamic += 2

    totaMSE = (mse1 + mse2 + mse3 + mse4) / y_dim
    totaMSE.name = 'mse'

    totaMAE = (mae1 + mae2 + mae3 + mae4) / y_dim
    totaMAE.name = 'mae'

    recon = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, *argsGMM
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    #nll_upper_bound_0 = recon_term + kl_term
    #nll_upper_bound_0.name = 'nll_upper_bound_0'
    if (flgMSE == 1):
        nll_upper_bound = recon_term + kl_term + totaMSE
    else:
        nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    ######################

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, totaMSE, totaMAE, mse1,
                mae1
            ] + ddoutMSEA + ddoutYpreds,
            indexSep=indexSepDynamic,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[recon_term, kl_term, nll_upper_bound, totaMSE, totaMAE],
        n_steps=n_steps,
        extension=extension,
        lr_iterations=lr_iterations,
        k_speedOfconvergence=kSchedSamp)

    mainloop.run()
    '''
    data=Iterator(test_data, batch_size)

    test_fn = theano.function(inputs=[x, y],#[x, y],
                              #givens={x:Xtest},
                              #on_unused_input='ignore',
                              #z=( ,200,1)
                              allow_input_downcast=True,
                              outputs=[prediction_val, recon_term_val, totaMSE_val, totaMAE_val, 
                                        mse1_val,mse2_val,mse3_val,mse4_val,
                                        mae1_val,mae2_val,mae3_val,mae4_val, #unnormalized mae and mse 16 items#
                                        relErr1_val,relErr2_val,relErr3_val,relErr4_val,
                                        propAssigned1_val, propAssigned2_val,propAssigned3_val,propAssigned4_val],
                              updates=updates_val
                              )
    testOutput = []
    testMetrics2 = []
    numBatchTest = 0
    for batch in data:
      outputGeneration = test_fn(batch[0], batch[2])
      testOutput.append(outputGeneration[1:12]) #before 36 including unnormalized metrics
      testMetrics2.append(outputGeneration[12:])
      #{0:[4,20], 2:[5,10]} 
      #if (numBatchTest==0):

      plt.figure(1)
      plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(2)
      plt.plot(np.transpose(batch[2],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(3)
      plt.plot(np.transpose(batch[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
      plt.clf()
      numBatchTest+=1

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)

    testOutput[:,19:] = 1000 * testOutput[:,19:] # kwtts a watts
    recon_test = testOutput[:, 0].mean()
    mse_test =  testOutput[:, 1].mean()
    mae_test =  testOutput[:, 2].mean()
    mse1_test =  testOutput[:, 3].mean()
    mae1_test =  testOutput[:, 7].mean()
    mse2_test =  testOutput[:, 4].mean()
    mae2_test =  testOutput[:, 8].mean()
    mse3_test =  testOutput[:, 5].mean()
    mae3_test =  testOutput[:, 9].mean()
    mse4_test =  testOutput[:, 6].mean()
    mae4_test =  testOutput[:, 10].mean()


    print(testOutput[:,3:11].mean(),testOutput[:,11:19].mean())

    relErr1_test = testMetrics2[:,0].mean()
    relErr2_test = testMetrics2[:,1].mean()
    relErr3_test = testMetrics2[:,2].mean()
    relErr4_test = testMetrics2[:,3].mean()

    propAssigned1_test = testMetrics2[:, 8].mean()
    propAssigned2_test = testMetrics2[:, 9].mean()
    propAssigned3_test = testMetrics2[:, 10].mean()
    propAssigned4_test = testMetrics2[:, 11].mean()
    '''

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n\n")

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    fLog.write("epoch,log,kl,mse1,mse2,mse3,mse4,mae1,mae2,mae3,mae4\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        e, f, g, h, j, k, l, n, p, q, r, s, t, u = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
        ep = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        d = mainloop.trainlog.monitor['mse1'][i]
        m = mainloop.trainlog.monitor['mae1'][i]

        if (y_dim > 1):
            e = mainloop.trainlog.monitor['mse2'][i]
            n = mainloop.trainlog.monitor['mae2'][i]
        if (y_dim > 2):
            f = mainloop.trainlog.monitor['mse3'][i]
            p = mainloop.trainlog.monitor['mae3'][i]
        if (y_dim > 3):
            g = mainloop.trainlog.monitor['mse4'][i]
            q = mainloop.trainlog.monitor['mae4'][i]

        fLog.write(
            "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n"
            .format(ep, a, b, d, e, f, g, m, n, p, q))
Esempio n. 37
0
    def __init__(
        self, model, dataset, train, percept_preprocessor, action_map,
        base_dir, model_pickle_path, save_rate=100,
        epsilon=1, epsilon_anneal_frames=1000000, epsilon_end=0.1,
        discount_factor=0.8, k=4,
    ):
        # Validate and store parameters
        assert(model)
        self.model = model

        assert(dataset)
        self.dataset = dataset

        assert(train)
        self.train = train

        assert(percept_preprocessor)
        self.percept_preprocessor = percept_preprocessor

        assert(action_map and type(action_map) == dict)
        self.action_map = action_map

        assert(os.path.exists(base_dir))
        self.base_dir = base_dir

        assert(os.path.exists(os.path.dirname(model_pickle_path)))
        self.model_pickle_path = model_pickle_path

        assert(save_rate > 0)
        self.save_rate = save_rate

        assert(discount_factor > 0)
        if (discount_factor >= 1):
            log.warning("Discount factor >= 1, learning may diverge.")
        self.discount_factor = discount_factor

        assert(epsilon >= 0 and epsilon <= 1)
        self.epsilon = epsilon

        assert(epsilon_anneal_frames >= 0)
        self.epsilon_anneal_frames = epsilon_anneal_frames

        assert(epsilon_end >= 0)
        self.epsilon_end = epsilon_end

        self.epsilon_annealing_rate = 0
        if self.epsilon_anneal_frames > 0:
            self.epsilon_annealing_rate = float(self.epsilon - self.epsilon_end)
            self.epsilon_annealing_rate /= float(self.epsilon_anneal_frames)
        log.info('Epsilon annealing rate: %0.10f' % self.epsilon_annealing_rate)

        assert(k > 0)
        self.k = k

        self.train.dataset = self

        # How many actual actions does RL-Glue/ALE support? Can we query the available actions
        # for a given game and make this part more efficient? Using 20 for now.
        self.action_log = {i: 0 for i in range(20)}

        # Init helper member variables
        self.action_count = 0
        self.reward = 0  # Accumulator for reward values

        # Init frame memory
        self.frame_memory = col.deque(maxlen=self.k)

        # Compile action function
        log.info('BASIC AGENT: Compiling action function...'),
        phi_eq = T.tensor4()
        q_eq = self.model.fprop(phi_eq)
        action_eq = T.argmax(q_eq, axis=1)
        self.action_func = function([phi_eq], action_eq)
        log.info('Done.')

        # Compile max q
        log.info('BASIC AGENT: Compiling y function...'),
        max_action_eq = T.max(q_eq, axis=1)
        self.max_q_func = function([phi_eq], max_action_eq)
        log.info('Done.')

        # Compile maximum action function
        log.info('BASIC AGENT: Compiling y function...'),
        r = T.fvector('r')
        gamma = T.fscalar('gamma')
        y = r + gamma*max_action_eq
        self.y_func = function([r, gamma, phi_eq], y)
        log.info('Done.')
Esempio n. 38
0
def train_mlprnn(weight_path=sys.argv[1],
                 file_name1=sys.argv[2],
                 L1_reg=0.0,
                 L2_reg=0.0000,
                 path_name='/exports/work/inf_hcrc_cstr_udialogue/siva/data/'):

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size

    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size)
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size)

    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x1 = T.fvector('x1')
    x2 = T.fvector('x2')
    x3 = T.fvector('x3')
    ht1 = T.fvector('ht1')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate')

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    train_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                 allow_downcast=True)
    valid_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                                allow_downcast=True)

    test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'),
                                allow_downcast=True)
    test_set_y = theano.shared(numpy.empty((1), dtype='int32'),
                               allow_downcast=True)

    rng = numpy.random.RandomState()

    classifier = MLP_RNN(rng=rng,
                         input1=x1,
                         input2=x2,
                         input3=x3,
                         initial_hidden=ht1,
                         n_in=vocab_size,
                         fea_dim=int(sys.argv[3]),
                         context_size=2,
                         n_hidden=int(sys.argv[4]),
                         n_out=vocab_size)

    hidden_state = theano.shared(
        numpy.empty((int(sys.argv[4]), ), dtype='float32'))

    cost = classifier.cost(y)

    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = 0.05, scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)

    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x1: test_set_x1,
                                           x2: test_set_x2,
                                           x3: test_set_x3,
                                           ht1: hidden_state,
                                           y: test_set_y})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [log_likelihood], \
                                     givens = {x1: valid_set_x1,
                                               x2: valid_set_x2,
                                               x3: valid_set_x3,
                                               ht1: hidden_state,
                                               y: valid_set_y})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))

    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, param - learning_rate * gradient))

    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.RNNhiddenlayer.output], updates = updates, \
                                 givens = {x1: train_set_x1,
                                           x2: train_set_x2,
                                           x3: train_set_x3,
                                           ht1: hidden_state,
                                           y: train_set_y})
    f = h5py.File(weight_path + file_name1, "r")
    for i in xrange(0, classifier.no_of_layers, 2):
        path_modified = '/' + 'MLP' + str(2) + '/layer' + str(i / 2)
        if i == 4:
            classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified +
                                                              "/W"].value,
                                                            dtype='float32'),
                                              borrow=True)
        else:
            classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified +
                                                              "/W"].value,
                                                            dtype='float32'),
                                              borrow=True)
            classifier.MLPparams[i + 1].set_value(numpy.asarray(
                f[path_modified + "/b"].value, dtype='float32'),
                                                  borrow=True)
    f.close()

    print '.....training'
    best_valid_loss = numpy.inf
    start_time = time.time()
    while (learnrate_schedular.get_rate() != 0):

        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()

        for feats_lab_tuple in dataprovider_train:

            features, labels = feats_lab_tuple

            if labels is None or features is None:
                continue
            frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features3 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                temp_features3[temp[1]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_x3.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                train_set_y.set_value(numpy.asarray([labels[i]],
                                                    dtype='int32'),
                                      borrow=True)
                out = train_model(
                    numpy.array(learnrate_schedular.get_rate(),
                                dtype='float32'))
                hidden_state.set_value(numpy.asarray(out[1], dtype='float32'),
                                       borrow=True)

            progress += 1
            if progress % 10000 == 0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1), dtype='float32'))
            train_set_x2.set_value(numpy.empty((1), dtype='float32'))
            train_set_x3.set_value(numpy.empty((1), dtype='float32'))
            train_set_y.set_value(numpy.empty((1), dtype='int32'))

        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))

        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time()  # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:
            features, labels = feats_lab_tuple
            if labels is None or features is None:
                continue
            valid_frames_showed += features.shape[0]
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype='float32')
                temp_features2 = numpy.zeros(vocab_size, dtype='float32')
                temp_features3 = numpy.zeros(vocab_size, dtype='float32')
                temp_features1[temp[0]] = 1
                temp_features2[temp[1]] = 1
                temp_features3[temp[1]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_x2.set_value(numpy.asarray(temp_features2,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_x3.set_value(numpy.asarray(temp_features3,
                                                     dtype='float32'),
                                       borrow=True)
                valid_set_y.set_value(numpy.asarray([labels[i]],
                                                    dtype='int32'),
                                      borrow=True)
                out = validate_model()
                #error_rate = out[0]
                likelihoods = out[0]
                #valid_losses.append(error_rate)
                log_likelihood.append(likelihoods)
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress % 1000 == 0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)

        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        #this_validation_loss = numpy.mean(valid_losses)
        entropy = (-numpy.sum(log_likelihood) / valid_frames_showed)
        print entropy, numpy.sum(log_likelihood)

        if entropy < best_valid_loss:
            learning_rate = learnrate_schedular.get_next_rate(entropy)
            best_valid_loss = entropy
        else:
            learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time()  # it is also stop of training time
    dataprovider_test.reset()

    for feats_lab_tuple in dataprovider_test:

        features, labels = feats_lab_tuple

        if labels is None or features is None:
            continue

        test_frames_showed += features.shape[0]
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype='float32')
            temp_features2 = numpy.zeros(vocab_size, dtype='float32')
            temp_features3 = numpy.zeros(vocab_size, dtype='float32')
            temp_features1[temp[0]] = 1
            temp_features2[temp[1]] = 1
            temp_features3[temp[1]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1,
                                                dtype='float32'),
                                  borrow=True)
            test_set_x2.set_value(numpy.asarray(temp_features2,
                                                dtype='float32'),
                                  borrow=True)
            test_set_x3.set_value(numpy.asarray(temp_features3,
                                                dtype='float32'),
                                  borrow=True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'),
                                 borrow=True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress % 1000 == 0:
            end_time_test_progress = time.time()
            print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                           %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    print numpy.sum(log_likelihood)
Esempio n. 39
0
    def __theano_build__(self):
        E, W, U, V = self.E, self.W, self.U, self.V

        x = T.fvector('x')
        y = T.fvector('y')

        # initial hidden vector
        initial_hidden_vector = np.zeros(self.hidden_dim)

        def calculate(x, h_t_prev, E, W, U, V):
            x_t = T.dot(E,x)

            P = U[0].dot(h_t_prev)

            z_t = T.nnet.sigmoid(T.dot(W[0], x_t) +  U[0].dot(h_t_prev))
            r_t = T.nnet.sigmoid(T.dot(W[1], x_t) + U[1].dot(h_t_prev))
            _h_t = T.tanh(T.dot(W[2], x_t) + U[2].dot(h_t_prev * r_t))
            h_t = (T.ones_like(z_t) - z_t) * h_t_prev + z_t * _h_t

            # softmax returns a matrix thith one row only
            # the row we want
            o_t = T.nnet.softmax(V.dot(h_t))[0][0]

            return [o_t, h_t_prev]

        [o, h] , updates = theano.scan(
            calculate,
            # outputs_info=[None, dict(initial=T.zeros(self.hidden_dim))],
            outputs_info=[None, initial_hidden_vector],
            non_sequences = [E, W, U, V],
            sequences=x,
        )

        prediction = T.argmax(o, axis=0)
        prediction_error = T.sum(T.nnet.categorical_crossentropy(o, y))

        # Total cost (Regularization can be done here)
        cost = prediction_error

        # gradients
        dE = T.grad(cost, E)
        dW = T.grad(cost, W)
        dU = T.grad(cost, U)
        dV = T.grad(cost, V)

        # assign functions
        self.predict = theano.function([x], o)
        self.prediction_class = theano.function([x], prediction)
        self.c_error = theano.function([x,y], cost)
        self.bptt = theano.function([x, y], [dW, dU, dV])

        # SDG parameters
        learning_rate = T.scalar('learning_rate')
        decay = T.scalar('decay')

        # rmsprop cache updates
        mE = decay * self.mE + (1 - decay) * dE ** 2
        mU = decay * self.mU + (1 - decay) * dU ** 2
        mW = decay * self.mW + (1 - decay) * dW ** 2
        mV = decay * self.mV + (1 - decay) * dV ** 2

        self.sgd_step = theano.function(
                [x, y, learning_rate, theano.In(decay, value=0.9)],
                [],
                updates = [
                            (E, E - learning_rate * dE / T.sqrt(mE + 1e-6)),
                            (W, W - learning_rate * dW / T.sqrt(mW + 1e-6)),
                            (U, U - learning_rate * dU / T.sqrt(mU + 1e-6)),
                            (V, V - learning_rate * dV / T.sqrt(mV + 1e-6)),
                            # (self.mE, mE),
                            (self.mU, mU),
                            (self.mW, mW),
                            (self.mV, mV)
                ]
        )
Esempio n. 40
0
 #Pi model variables:
 if model.network_type=="pi":
     input_b_var = T.tensor3('inputs_b')
     mask_train=T.vector('mask_train')
     unsup_weight_var = T.scalar('unsup_weight')
 elif model.network_type=="tempens":
 #tempens model variables:
     z_target_var = T.matrix('z_targets')
     mask_train = T.vector('mask_train')
     unsup_weight_var = T.scalar('unsup_weight')
 
 learning_rate_var = T.scalar('learning_rate')
 adam_beta1_var = T.scalar('adam_beta1')
   
 #negative loss
 negative_loss_alpha=T.fvector("negative_loss_alpha")
 negative_loss_lamda=T.fscalar("negative_loss_lamda") 
 
 #Keywords-attention
 input_root=T.fmatrix("input_root")
 input_e1=T.fmatrix("input_e1")
 input_e2=T.fmatrix("input_e2")
 
 """
 2.
 Bulit GRU network
 ADAM
 """
 gru_network,l_in,l_mask,l_gru_forward,l_split_cnn=model.bulit_gru(input_var,mask_var,input_root,input_e1,input_e2)
 
 #mask_train_input: where "1" is pass. where "0" isn't pass.
Esempio n. 41
0
def policy_network(state):
    input_state = InputLayer(input_var=state, shape=(None, n_input))

    dense_1 = DenseLayer(input_state, num_units=n_input, nonlinearity=tanh)

    dense_2 = DenseLayer(dense_1, num_units=n_input, nonlinearity=tanh)

    probs = DenseLayer(dense_2, num_units=n_output, nonlinearity=softmax)

    return probs


X_state = T.fmatrix()
X_action = T.bvector()
X_reward = T.fvector()

X_action_hot = to_one_hot(X_action, n_output)

prob_values = policy_network(X_state)

policy_ = get_output(prob_values)
policy = theano.function(inputs=[X_state],
                         outputs=policy_,
                         allow_input_downcast=True)

loss = categorical_crossentropy(policy_, X_action_hot) * X_reward
loss = loss.mean()

params = get_all_params(prob_values)
Esempio n. 42
0
    def test_cudnn_softmax_grad(self):
        if not cuda.dnn.dnn_available():
            raise SkipTest(cuda.dnn.dnn_available.msg)

        def cmp(n, m, f, f_gpu):
            data = numpy.arange(n * m, dtype='float32').reshape(n, m)
            gdata = numpy.asarray(data)[:, :, None, None]
            out = f(data)
            gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
            assert numpy.allclose(out, gout), numpy.absolute(out - gout)

        x = T.matrix('x', 'float32')
        x_gpu = T.tensor4('x_gpu', 'float32')
        f_z = T.nnet.softmax
        f_gpu = theano.sandbox.cuda.dnn.GpuDnnSoftmax('bc01', 'accurate',
                                                      'channel')

        # Verify the grad operation
        dims = (2, 3, 4, 5)
        gdata = numpy.arange(numpy.product(dims),
                             dtype='float32').reshape(dims)
        T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu)

        def check_types(graph, graph_gpu):
            self._check_types(graph, graph_gpu, -1, type(f_z),
                              theano.sandbox.cuda.dnn.GpuDnnSoftmax)

        def check_types_opt(graph, graph_gpu):
            assert isinstance(graph.maker.fgraph.toposort()[-1].op, type(f_z))
            assert len([
                n for n in graph_gpu.maker.fgraph.toposort()
                if isinstance(n.op, theano.sandbox.cuda.dnn.GpuDnnSoftmax)
            ]) == 1

        # Verify that the CPU and GPU implementations return the same results
        # up to a tolerance.
        self._test_softmax(x, x_gpu, f_z, f_gpu, cmp, mode_with_gpu,
                           check_types)

        mode_w_cudnn = mode_with_gpu.including("cudnn")
        self._test_softmax(x, x, f_z, f_z, self._cmp, mode_w_cudnn,
                           check_types_opt)

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is
        # applied when cudnn is required
        y = T.fvector('y')
        f = theano.function([y],
                            T.grad(T.nnet.softmax(y).mean(), y),
                            mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)
        ]) == 1)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 0)

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad optimization is not
        # applied when cudnn is excluded or not available
        mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
        y = T.fvector('y')
        f = theano.function([y],
                            T.grad(T.nnet.softmax(y).mean(), y),
                            mode=mode_wo_cudnn)
        sorted_f = f.maker.fgraph.toposort()
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)
        ]) == 0)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 1)

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
        # crash with manual graph
        y = T.fvector('y')
        o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
        f = theano.function([y], o, mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.sandbox.cuda.dnn.GpuDnnSoftmaxGrad)
        ]) == 1)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 0)
def MultitaskRelationStackMaker(Shared, Classifiers, params, num_tasks, graph=False, weighted=False, batched=False):
    if batched:
        emb_inputs = [T.itensor3('emb_input_'+str(i)) for i in range(num_tasks)]
        entities_tv = [[T.fmatrix('enidx_'+str(j)+'_t_'+str(i))
                    for j in range(params['num_entity_d'+str(i)])] 
                    for i in range(num_tasks)]
        if graph:
            if weighted:
                masks = [T.ftensor4('child_mask_d'+str(i)) for i in range(num_tasks)]
            else:
                masks = [T.ftensor3('child_mask_d'+str(i)) for i in range(num_tasks)]
        else:
            masks = [T.fmatrix('batch_mask_d'+str(i)) for i in range(num_tasks)]
    else:
        emb_inputs = [T.imatrix('emb_input_'+str(i)) for i in range(num_tasks)]
        entities_tv = [[T.fvector('enidx_'+str(j)+'_t_'+str(i))
                    for j in range(params['num_entity_d'+str(i)])] 
                    for i in range(num_tasks)]
        if graph:
            if weighted:
                masks = [T.ftensor3('child_mask_d'+str(i)) for i in range(num_tasks)]
            else:
                masks = [T.fmatrix('child_mask_d'+str(i)) for i in range(num_tasks)]
        else:
            masks = None
    current_chip = Start(params['voc_size'], None) 
    instantiated_chips = stackLayers(Shared, current_chip, params)
    print ('Building Classifiers for tasks, input dim:', current_chip.out_dim)
    pred_ys = []
    gold_ys = []
    costs_arr = []
    grads_arr = []
    regularizable_param_arr = []
    global_regularizable_params = []
    for i, clsfier in enumerate(Classifiers):
        #feature_size = len(params['features2idx_dicts'][i]) #params['feature_size_'+str(i)]
        current_chip = instantiated_chips[-1][0]
        decoder_chips = stackLayers(clsfier, current_chip, params, entity_size=params['num_entity_d'+str(i)])
        ## Note: this implementation only uses the LSTM hidden layer
        temp_chips = instantiated_chips + decoder_chips
        init_chip = Start(params['voc_size'], emb_inputs[i])
        if batched:
            regularizable_params = computeLayers(temp_chips, init_chip, params, entities_input=entities_tv[i], mask=masks[i])
        else:
            regularizable_params = computeLayers(temp_chips, init_chip, params, entities_input=entities_tv[i])
        global_regularizable_params.extend(regularizable_params)
        regularizable_param_arr.append(regularizable_params)
        #task_chips.append(temp_chips)
        current_chip = temp_chips[-1][0]
        if current_chip.output_tv.ndim == 2:
            pred_ys.append(current_chip.output_tv) #T.argmax(current_chip.output_tv, axis=1))
        else:
            pred_ys.append(current_chip.output_tv) #T.argmax(current_chip.output_tv, axis=0))
        gold_ys.append(current_chip.gold_y)
        assert hasattr(current_chip, 'score')
        cost = current_chip.score 
        costs_arr.append(cost) #/params['nsentences']
        grads_arr.append( T.grad(cost,
            wrt=regularizable_params) )
        # Show all parameters that would be needed in this system
        params_needed = ['voc_size', 'feature_size_'+str(i)]
        params_needed += calculate_params_needed(temp_chips)
    #cost = sum(costs_arr)
    #global_regularizable_params = list(set(global_regularizable_params))
    #grads = T.grad(cost,
    #        wrt=global_regularizable_params)
    print ('The joint model regularizable parameters:')
    for k, v in params.items():
        if hasattr(v, 'is_regularizable'):
            print (k, v, v.is_regularizable)
    #return (emb_inputs, entities_tv, gold_ys, pred_ys, costs_arr, cost, grads_arr, grads, regularizable_param_arr, global_regularizable_params)
    if batched or graph:
        return (emb_inputs, entities_tv, masks, gold_ys, pred_ys, costs_arr, grads_arr, regularizable_param_arr)
    else:
        return (emb_inputs, entities_tv, gold_ys, pred_ys, costs_arr, grads_arr, regularizable_param_arr)
Esempio n. 44
0
    def fit(self):

        #if self.batch_size is not None:

        index = T.lscalar('index')

        # create shared data-sets in case of mini-batch
        train_X = self.shared_dataset(self.X_dat)
        train_y = self.shared_dataset(self.y_dat)
        test_X = self.shared_dataset(self.X_test)

        if self.batch_size is not None:

            n_train_batches = train_X.get_value(
                borrow=True).shape[0] / self.batch_size
            n_test_batches = test_X.get_value(
                borrow=True).shape[0] / self.batch_size

        X = T.matrix()

        if self.linear_regression:

            Y = T.fvector()

        else:

            Y = T.matrix()

        if self.linear_regression:

            self.w = self.initialize_weights(
                (self.X_dat.shape[1]), self.X_dat.shape[1], 1,
                self.weights_initialization
            )  # initialize weights for the parameters ( linear regression )

            if self.add_bias:

                self.b = theano.shared(
                    np.asarray(0, dtype=theano.config.floatX)
                )  # initialize bias to zero ( linear regression -- a single value )

                py_x = T.dot(
                    X,
                    self.w) + self.b  # get predictions for linear regression

            else:

                py_x = T.dot(X, self.w)

        else:

            self.w = self.initialize_weights(
                (self.X_dat.shape[1], self.y_dat.shape[1]),
                self.X_dat.shape[1], self.y_dat.shape[1],
                self.weights_initialization
            )  # initialize weights for the parameters ( logistic regression )

            if self.add_bias:

                self.b = theano.shared(
                    np.zeros((self.y_dat.shape[1], ),
                             dtype=theano.config.floatX)
                )  # initialize bias to zeros ( logistic regression -- a numpy array )

                py_x = T.nnet.softmax(T.dot(X, self.w) +
                                      self.b)  # get probability predictions

            else:

                py_x = T.nnet.softmax(T.dot(X, self.w))

        cost = T.mean(
            self.objectives(py_x, Y, self.objective,
                            self.X_dat.shape[0]))  # objective function

        if self.L1 > 0.0 or self.L2 > 0.0:  # L1, L2 regularization [ when both used then 'elastic-net' ]

            if self.add_bias:

                reg_param_L1 = abs(T.sum(self.w) +
                                   T.sum(self.b))  # L1 regrularization

                reg_param_L2 = T.sum(T.sqr(self.w)) + T.sum(T.sqr(
                    self.b))  # L2 regularization

                cost = cost + self.L1 * reg_param_L1 + self.L2 * reg_param_L2

            else:

                reg_param_L1 = abs(T.sum(self.w))  # L1 regrularization

                reg_param_L2 = T.sum(T.sqr(self.w))  # L2 regularization

                cost = cost + self.L1 * reg_param_L1 + self.L2 * reg_param_L2

        if self.add_bias:

            Params = [self.w, self.b]

        else:

            Params = [self.w]

        if self.batch_size is None:

            train = theano.function(
                inputs=[index],
                outputs=cost,
                updates=Optimizers_update(cost, Params, self.learning_rate,
                                          self.optimizer).run_optimizer(),
                givens={
                    X: train_X[0:index],
                    Y: train_y[0:index]
                },
                allow_input_downcast=True
            )  # Compile [ call external class Optimizers_update ]

            predict_valid = theano.function(inputs=[index],
                                            outputs=py_x,
                                            givens={X: test_X[0:index]},
                                            allow_input_downcast=True)

        else:

            train = theano.function(
                inputs=[index],
                outputs=cost,
                updates=Optimizers_update(cost, Params, self.learning_rate,
                                          self.optimizer).run_optimizer(),
                givens={
                    X:
                    train_X[index * self.batch_size:(index + 1) *
                            self.batch_size],
                    Y:
                    train_y[index * self.batch_size:(index + 1) *
                            self.batch_size]
                },
                allow_input_downcast=True)

            predict_valid = theano.function(
                inputs=[index],
                outputs=py_x,
                givens={
                    X:
                    test_X[index * self.batch_size:(index + 1) *
                           self.batch_size]
                },
                allow_input_downcast=True
            )  # prediction function for validation set

        self.predict = theano.function(inputs=[X],
                                       outputs=py_x)  # predictions function

        early_stopping = []  # early stopping

        consecutive_increases_OR_decreases = 0

        for i in range(self.iters):

            if self.batch_size is None:

                cost_train = train(self.X_dat.shape[0])

                if self.custom_eval is None:

                    cost_valid = self.evaluate_early_stopping(
                        self.Y_test, self.predict(self.X_test),
                        self.linear_regression)

                else:

                    cost_valid = self.custom_eval[0](self.Y_test,
                                                     self.predict(self.X_test))

            else:

                for batch_index_train in range(n_train_batches):

                    cost_train = train(batch_index_train)

                if self.custom_eval is None:

                    cost_valid = np.mean([
                        self.evaluate_early_stopping(
                            self.Y_test[batch_index_test *
                                        self.batch_size:(batch_index_test +
                                                         1) * self.batch_size],
                            predict_valid(batch_index_test),
                            self.linear_regression)
                        for batch_index_test in range(n_test_batches)
                    ])

                else:

                    cost_valid = np.mean([
                        self.custom_eval[0](
                            self.Y_test[batch_index_test *
                                        self.batch_size:(batch_index_test +
                                                         1) * self.batch_size],
                            predict_valid(batch_index_test))
                        for batch_index_test in range(n_test_batches)
                    ])

            try:

                if self.custom_eval is None:

                    print 'iter', str(i + 1), '  train_loss ', str(
                        np.round(cost_train, 3)), '  test_loss ', str(
                            np.round(cost_valid, 3))

                else:

                    print 'iter', str(i + 1), '  train_loss ', str(
                        np.round(
                            cost_train,
                            3)), '  test_' + self.custom_eval[1], ' ', str(
                                np.round(cost_valid, 3))

            except:

                ValueError

            early_stopping.append(cost_valid)

            if not self.maximize:

                change_sign = len(early_stopping) >= 2 and early_stopping[
                    -1] > early_stopping[-2]
                increase = 'increases'

            else:

                change_sign = len(early_stopping) >= 2 and early_stopping[
                    -1] < early_stopping[-2]
                decrease = 'decreases'

            if change_sign:

                consecutive_increases_OR_decreases += 1
            else:
                consecutive_increases_OR_decreases = 0

            if (consecutive_increases_OR_decreases >=
                    self.early_stopping_rounds):

                if not self.maximize:

                    print 'regression stopped after ', str(
                        consecutive_increases_OR_decreases
                    ), ' consecutive ', increase, ' of loss and ', str(
                        i + 1), ' Epochs'

                    break

                else:
                    print 'regression stopped after ', str(
                        consecutive_increases_OR_decreases
                    ), ' consecutive ', decrease, ' of loss and ', str(
                        i + 1), ' Epochs'

                    break

            if np.isinf(cost_valid) or np.isnan(cost_valid):

                print 'Inf or nan values present after', str(i), 'Epochs'

                break
Esempio n. 45
0
def model_eval(get_scores):
    entityPairs = T.fmatrix()
    entities = T.fmatrix()
    relations = T.fmatrix()
    testData_DM = T.imatrix()
    testData_MF = T.imatrix()
    entity_oov_embedding = T.fvector()
    entityPair_oov_embedding = T.fvector()
    normalize_eval = T.iscalar()
    normalize = T.iscalar()
    '''
        for a given (e1, ?): we can partition the filtered candidate e2s into:

        1) e2s such that (e1,e2) is trained -> allowedEP_MF
    '''
    allowedEP_MF = theano.typed_list.TypedListType(T.ivector)()
    set1_e2 = theano.typed_list.TypedListType(T.ivector)()
    set2_e2 = theano.typed_list.TypedListType(T.ivector)()
    set3_e2 = T.ivector()

    oov_flag_e1_DM = T.ivector()
    oov_flag_e2_DM = T.ivector()
    oov_flags_MF = T.ivector()

    nnet_W1 = T.fmatrix()
    nnet_W2 = T.fmatrix()
    nnet_W3 = T.fmatrix()

    nnet_b1 = T.fvector()
    nnet_b2 = T.fvector()
    nnet_b3 = T.fvector()
    aux_features = T.fmatrix()

    layers = [(nnet_W1, nnet_b1), (nnet_W2, nnet_b2), (nnet_W3, nnet_b3)]

    normalize_DM_W1 = T.fmatrix()
    normalize_DM_b1 = T.fvector()
    normalize_MF_W1 = T.fmatrix()
    normalize_MF_b1 = T.fvector()

    layers_normalize_DM = [(normalize_DM_W1, normalize_DM_b1)]
    layers_normalize_MF = [(normalize_MF_W1, normalize_MF_b1)]

    def MF_fn(testPoint_DM, testPoint_MF, i, oov_flag_e1, oov_flag_e2,
              oov_flag, entityPairs, entities, relations,
              entityPair_oov_embedding, entity_oov_embedding,
              allowed_entityPair, set1_e2, set2_e2, set3_e2, normalize_eval,
              normalize):
        # score of allowed e2s
        scores_MF = T.tanh(
            T.dot(entityPairs[allowed_entityPair[i]],
                  relations[testPoint_MF[0]]))
        #scores_MF = T.dot(entityPairs[allowed_entityPair[i]], relations[testPoint_MF[0]])
        # score for oov (e1,e2)s
        score_oov_MF = T.tanh(
            T.dot(entityPair_oov_embedding, relations[testPoint_MF[0]]))
        #score_oov_MF = T.dot(entityPair_oov_embedding,relations[testPoint_MF[0]])
        score_nonOOV_MF = T.tanh(
            T.dot(entityPairs[testPoint_MF[1]], relations[testPoint_MF[0]]))
        #score_nonOOV_MF = T.dot(entityPairs[testPoint_MF[1]], relations[testPoint_MF[0]])

        # based on whether (e1,e2) is OOV pick the score for the current testPoint
        score_testPoint_MF = T.switch(oov_flag, score_oov_MF, score_nonOOV_MF)

        e1_fact_embedding = T.switch(oov_flag_e1, entity_oov_embedding,
                                     entities[testPoint_DM[0]])
        e2_fact_embedding = T.switch(oov_flag_e2, entity_oov_embedding,
                                     entities[testPoint_DM[2]])

        # score of allowed e2s -> (e1,e2) seen -> e2 seen
        scores_DM = T.tanh(
            T.dot(e1_fact_embedding * entities[set1_e2[i]],
                  relations[testPoint_DM[1]]))
        #scores_DM   = T.dot(e1_fact_embedding*entities[set1_e2[i]], relations[testPoint_DM[1]])
        # score for the test point
        score_testPoint_DM = T.tanh(
            T.dot(relations[testPoint_DM[1]],
                  e1_fact_embedding * e2_fact_embedding))
        #score_testPoint_DM  = T.dot(relations[testPoint_DM[1]], e1_fact_embedding*e2_fact_embedding)
        score_oov_DM = T.tanh(
            T.dot(relations[testPoint_DM[1]],
                  e1_fact_embedding * entity_oov_embedding))
        #score_oov_DM = T.dot(relations[testPoint_DM[1]], e1_fact_embedding*entity_oov_embedding)

        # score for e2s such that (e1,e2) non seen but e2 non OOV.
        scores_DM_set2 = T.tanh(
            T.dot(e1_fact_embedding * entities[set2_e2[i]],
                  relations[testPoint_DM[1]]))
        #scores_DM_set2 = T.dot(e1_fact_embedding*entities[set2_e2[i]], relations[testPoint_DM[1]])

        #Normalize scores using pretrained weights
        scores_MF = T.switch(
            normalize, get_normalized_scores(layers_normalize_MF, scores_MF),
            scores_MF)
        score_testPoint_MF = T.switch(
            normalize,
            get_normalized_scores(layers_normalize_MF, score_testPoint_MF),
            score_testPoint_MF)
        score_oov_MF = T.switch(
            normalize, get_normalized_scores(layers_normalize_MF,
                                             score_oov_MF), score_oov_MF)
        scores_DM = T.switch(
            normalize, get_normalized_scores(layers_normalize_DM, scores_DM),
            scores_DM)
        score_testPoint_DM = T.switch(
            normalize,
            get_normalized_scores(layers_normalize_DM, score_testPoint_DM),
            score_testPoint_DM)
        score_oov_DM = T.switch(
            normalize, get_normalized_scores(layers_normalize_DM,
                                             score_oov_DM), score_oov_DM)
        scores_DM_set2 = T.switch(
            normalize,
            get_normalized_scores(layers_normalize_DM, scores_DM_set2),
            scores_DM_set2)

        #DM and MF score normalization

        mean_DM, std_DM = get_data_stats(
            T.concatenate([scores_DM, scores_DM_set2,
                           T.stack([score_oov_DM])]))
        scores_DM = T.switch(normalize_eval,
                             normalize_data(scores_DM, mean_DM, std_DM),
                             scores_DM)
        mean_MF, std_MF = get_data_stats(
            T.concatenate([scores_MF, T.stack([score_oov_MF])]))
        scores_MF = T.switch(normalize_eval,
                             normalize_data(scores_MF, mean_MF, std_MF),
                             scores_MF)
        score_oov_DM = T.switch(normalize_eval,
                                normalize_data(score_oov_DM, mean_DM, std_DM),
                                score_oov_DM)
        score_oov_MF = T.switch(normalize_eval,
                                normalize_data(score_oov_MF, mean_MF, std_MF),
                                score_oov_MF)
        score_testPoint_MF = T.switch(normalize_eval,
                                      (score_testPoint_MF - mean_MF) / std_MF,
                                      score_testPoint_MF)
        score_testPoint_DM = T.switch(normalize_eval,
                                      (score_testPoint_DM - mean_DM) / std_DM,
                                      score_testPoint_DM)
        #

        score_testPoint, scores_set1, scores_set2, scores_set3, f1 = get_scores(
            layers, aux_features[i], [scores_MF, scores_DM],
            [T.stack(score_oov_MF), scores_DM_set2],
            [score_oov_MF, score_oov_DM],
            [score_testPoint_MF, score_testPoint_DM])

        rank = 1 + T.sum(scores_set1 > score_testPoint) + T.sum(
            scores_set2 > score_testPoint)
        oov_comparison = score_testPoint < scores_set3
        rank = T.switch(oov_comparison, rank + set3_e2[i], rank)
        rank = T.switch(oov_flag_e2, rank + (set3_e2[i] / 2.0), rank)

        same = T.sum(T.eq(scores_set1, score_testPoint)) + T.sum(
            T.eq(scores_set2, score_testPoint))

        rank += same / 2.0

        same = same / (scores_set1.shape[0] + scores_set2.shape[0] * 1.0)
        '''
        dataStats = T.concatenate([get_data_stats(T.concatenate([scores_set1,scores_set2])),
        get_data_stats(scores_MF),
        get_data_stats(T.concatenate([scores_DM,scores_DM_set2]))])
        oov_scores = T.stack([score_oov_MF, score_oov_DM])
            
        return rank, f1, score_testPoint_DM, score_testPoint_MF, dataStats, oov_scores
        '''
        return rank, f1, score_testPoint_DM, score_testPoint_MF, same * 100.0

    ranks, ignore = theano.scan(MF_fn,
                                non_sequences=[
                                    entityPairs, entities, relations,
                                    entityPair_oov_embedding,
                                    entity_oov_embedding, allowedEP_MF,
                                    set1_e2, set2_e2, set3_e2, normalize_eval,
                                    normalize
                                ],
                                sequences=[
                                    testData_DM, testData_MF,
                                    theano.tensor.arange(testData_DM.shape[0]),
                                    oov_flag_e1_DM, oov_flag_e2_DM,
                                    oov_flags_MF
                                ])
    f = theano.function([
        normalize_eval, normalize, entityPairs, entities, relations,
        entityPair_oov_embedding, entity_oov_embedding, testData_DM,
        testData_MF, allowedEP_MF, set1_e2, set2_e2, oov_flag_e1_DM,
        oov_flag_e2_DM, oov_flags_MF, set3_e2, aux_features, nnet_W1, nnet_b1,
        nnet_W2, nnet_b2, nnet_W3, nnet_b3, normalize_DM_W1, normalize_DM_b1,
        normalize_MF_W1, normalize_MF_b1
    ],
                        ranks,
                        allow_input_downcast=True)

    return f
Esempio n. 46
0
def main(args):

    theano.optimizer = 'fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps
    typeLoad = int(args['typeLoad'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 350
    p_z_dim = 400
    p_x_dim = 450
    x2s_dim = 400
    y2s_dim = 200
    z2s_dim = 350
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_ukdale(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1,
        typeLoad=typeLoad,
        trainPer=0.5,
        valPer=0.25,
        testPer=0.25)

    instancesPlot = {0: [5]}
    #instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0])

    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1', 'y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1', 's_tm1'],
                                  parent_dim=[x2s_dim, rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu4 = FullyConnectedLayer(name='theta_mu4',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_mu5 = FullyConnectedLayer(name='theta_mu5',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='linear',
                                    init_W=init_W,
                                    init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig4 = FullyConnectedLayer(name='theta_sig4',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    theta_sig5 = FullyConnectedLayer(name='theta_sig5',
                                     parent=['theta_1'],
                                     parent_dim=[p_x_dim],
                                     nout=target_dim,
                                     unit='softplus',
                                     cons=1e-4,
                                     init_W=init_W,
                                     init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff2 = FullyConnectedLayer(name='coeff2',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff3 = FullyConnectedLayer(name='coeff3',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff4 = FullyConnectedLayer(name='coeff4',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    coeff5 = FullyConnectedLayer(name='coeff5',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=k,
                                 unit='softmax',
                                 init_W=init_W,
                                 init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1,
        theta_mu2,
        theta_sig2,
        coeff2,
        theta_mu3,
        theta_sig3,
        coeff3,
        theta_mu4,
        theta_sig4,
        coeff4,
        theta_mu5,
        theta_sig5,
        coeff5
    ]

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_test(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            prior_mu_t, prior_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
        theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
        coeff2_t = coeff2.fprop([theta_1_t], params)
        y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
        y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1)

        theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
        theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
        coeff3_t = coeff3.fprop([theta_1_t], params)
        y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
        y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1)

        theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
        theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
        coeff4_t = coeff4.fprop([theta_1_t], params)
        y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
        y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1)

        theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
        theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
        coeff5_t = coeff5.fprop([theta_1_t], params)
        y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
        y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1)

        pred_1_t = y_1.fprop([y_pred1], params)
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1, theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2, theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3, theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5
        #corr_temp, binary_temp

    ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, theta_mu1_temp_val,
      theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val,
      theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val,
      y_pred2_temp_val, theta_mu3_temp_val, theta_sig3_temp_val,
      coeff3_temp_val, y_pred3_temp_val, theta_mu4_temp_val,
      theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val,
      theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val,
      y_pred5_temp_val),
     updates_val) = theano.scan(fn=inner_fn_test,
                                sequences=[x_1_temp],
                                outputs_info=[
                                    s_0, None, None, None, None, None, None,
                                    None, None, None, None, None, None, None,
                                    None, None, None, None, None, None, None,
                                    None, None
                                ])

    for k, v in updates_val.iteritems():
        k.default_update = v

    def inner_fn(x_t, y_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            phi_mu_t, phi_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)

        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
        theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
        coeff2_t = coeff2.fprop([theta_1_t], params)
        y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)

        theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
        theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
        coeff3_t = coeff3.fprop([theta_1_t], params)
        y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)

        theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
        theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
        coeff4_t = coeff4.fprop([theta_1_t], params)
        y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)

        theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
        theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
        coeff5_t = coeff5.fprop([theta_1_t], params)
        y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)

        s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1, theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2, theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3, theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4, theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5
        #corr_temp, binary_temp

    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp,
      theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp,
      theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp,
      theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp,
      theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred5_temp),
     updates) = theano.scan(fn=inner_fn,
                            sequences=[x_1_temp, y_1_temp],
                            outputs_info=[
                                s_0, None, None, None, None, None, None, None,
                                None, None, None, None, None, None, None, None,
                                None, None, None, None, None, None, None, None,
                                None
                            ])

    for k, v in updates.iteritems():
        k.default_update = v

    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'
    y_pred1_temp.name = 'disaggregation1'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1], 1)))**2)
    mae1 = T.mean(
        T.abs_(y_pred1_temp - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape

    theta_mu2_temp.name = 'theta_mu2'
    theta_sig2_temp.name = 'theta_sig2'
    coeff2_temp.name = 'coeff2'
    y_pred2_temp.name = 'disaggregation2'
    mse2 = T.mean((y_pred2_temp - y[:, :, 1].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae2 = T.mean(
        T.abs_(y_pred2_temp - y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))
    mse2.name = 'mse2'
    mae2.name = 'mae2'

    theta_mu3_temp.name = 'theta_mu3'
    theta_sig3_temp.name = 'theta_sig3'
    coeff3_temp.name = 'coeff3'
    y_pred3_temp.name = 'disaggregation3'
    mse3 = T.mean((y_pred3_temp - y[:, :, 2].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae3 = T.mean(
        T.abs_(y_pred3_temp - y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))
    mse3.name = 'mse3'
    mae3.name = 'mae3'

    theta_mu4_temp.name = 'theta_mu4'
    theta_sig4_temp.name = 'theta_sig4'
    coeff4_temp.name = 'coeff4'
    y_pred4_temp.name = 'disaggregation4'
    mse4 = T.mean((y_pred4_temp - y[:, :, 3].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae4 = T.mean(
        T.abs_(y_pred4_temp - y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))
    mse4.name = 'mse4'
    mae4.name = 'mae4'

    theta_mu5_temp.name = 'theta_mu5'
    theta_sig5_temp.name = 'theta_sig5'
    coeff5_temp.name = 'coeff5'
    y_pred5_temp.name = 'disaggregation5'
    mse5 = T.mean((y_pred5_temp - y[:, :, 4].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae5 = T.mean(
        T.abs_(y_pred5_temp - y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))
    mse5.name = 'mse5'
    mae5.name = 'mae5'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    theta_mu1_in = theta_mu1_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu2_in = theta_mu2_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig2_in = theta_sig2_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff2_in = coeff2_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu3_in = theta_mu3_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig3_in = theta_sig3_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff3_in = coeff3_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu4_in = theta_mu4_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig4_in = theta_sig4_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff4_in = coeff4_temp.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu5_in = theta_mu5_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig5_in = theta_sig5_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff5_in = coeff5_temp.reshape((x_shape[0] * x_shape[1], -1))

    x_shape = x.shape
    y_shape = y.shape
    #x_in = x.reshape((x_shape[0]*x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))

    recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in,
                          theta_mu2_in, theta_sig2_in, coeff2_in, theta_mu3_in,
                          theta_sig3_in, coeff3_in, theta_mu4_in,
                          theta_sig4_in, coeff4_in, theta_mu5_in,
                          theta_sig5_in, coeff5_in)
    #recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, theta_mu2_in, theta_sig2_in, coeff2_in,theta_mu3_in, theta_sig3_in, coeff3_in,theta_mu4_in, theta_sig4_in, coeff4_in,theta_mu5_in, theta_sig5_in, coeff5_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'

    ######################## TEST (GENERATION) TIME

    #s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu1_temp_val.name = 'theta_mu1_val'
    theta_sig1_temp_val.name = 'theta_sig1_val'
    coeff1_temp_val.name = 'coeff1_val'
    y_pred1_temp_val.name = 'disaggregation1_val'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1_val = T.mean(
        T.abs_(y_pred1_temp_val -
               y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))

    #NEURALNILM #(sum_output - sum_target) / max(sum_output, sum_target))
    totPred = T.sum(y_pred1_temp_val)
    totReal = T.sum(y[:, :, 0])
    relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned1_val = 1 - T.sum(
        T.abs_(y_pred1_temp_val - y[:, :, 0].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    #y_unNormalize = (y[:,:,0] * reader.stdTraining[0]) + reader.meanTraining[0]
    #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTraining[0]) + reader.meanTraining[0]

    #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))))
    mse1_val.name = 'mse1_val'
    mae1_val.name = 'mae1_val'

    theta_mu1_in_val = theta_mu1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig1_in_val = theta_sig1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu2_temp_val.name = 'theta_mu2_val'
    theta_sig2_temp_val.name = 'theta_sig2_val'
    coeff2_temp_val.name = 'coeff2_val'
    y_pred2_temp_val.name = 'disaggregation2_val'
    mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae2_val = T.mean(
        T.abs_(y_pred2_temp_val -
               y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred2_temp_val)
    totReal = T.sum(y[:, :, 1])
    relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned2_val = 1 - T.sum(
        T.abs_(y_pred2_temp_val - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse2_val.name = 'mse2_val'
    mae2_val.name = 'mae2_val'

    theta_mu2_in_val = theta_mu2_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig2_in_val = theta_sig2_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu3_temp_val.name = 'theta_mu3_val'
    theta_sig3_temp_val.name = 'theta_sig3_val'
    coeff3_temp_val.name = 'coeff3_val'
    y_pred3_temp_val.name = 'disaggregation3_val'
    mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae3_val = T.mean(
        T.abs_(y_pred3_temp_val -
               y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred3_temp_val)
    totReal = T.sum(y[:, :, 2])
    relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned3_val = 1 - T.sum(
        T.abs_(y_pred3_temp_val - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse3_val.name = 'mse3_val'
    mae3_val.name = 'mae3_val'

    theta_mu3_in_val = theta_mu3_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig3_in_val = theta_sig3_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu4_temp_val.name = 'theta_mu4_val'
    theta_sig4_temp_val.name = 'theta_sig4_val'
    coeff4_temp_val.name = 'coeff4_val'
    y_pred4_temp_val.name = 'disaggregation4_val'
    mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae4_val = T.mean(
        T.abs_(y_pred4_temp_val -
               y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred4_temp_val)
    totReal = T.sum(y[:, :, 3])
    relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned4_val = 1 - T.sum(
        T.abs_(y_pred4_temp_val - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse4_val.name = 'mse4_val'
    mae4_val.name = 'mae4_val'

    theta_mu4_in_val = theta_mu4_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig4_in_val = theta_sig4_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    theta_mu5_temp_val.name = 'theta_mu5_val'
    theta_sig5_temp_val.name = 'theta_sig5_val'
    coeff5_temp_val.name = 'coeff5_val'
    y_pred5_temp_val.name = 'disaggregation5_val'
    mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae5_val = T.mean(
        T.abs_(y_pred5_temp_val -
               y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))

    totPred = T.sum(y_pred5_temp_val)
    totReal = T.sum(y[:, :, 4])
    relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned5_val = 1 - T.sum(
        T.abs_(y_pred5_temp_val - y[:, :, 4].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    mse5_val.name = 'mse5_val'
    mae5_val.name = 'mae5_val'

    theta_mu5_in_val = theta_mu5_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig5_in_val = theta_sig5_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    prediction_val = T.concatenate([
        y_pred1_temp_val, y_pred2_temp_val, y_pred3_temp_val, y_pred4_temp_val,
        y_pred5_temp_val
    ],
                                   axis=2)

    recon_val = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val,
        theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val, theta_mu3_in_val,
        theta_sig3_in_val, coeff3_in_val, theta_mu4_in_val, theta_sig4_in_val,
        coeff4_in_val, theta_mu5_in_val, theta_sig5_in_val, coeff5_in_val)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out'
    totaMSE_val = (mse1_val + mse2_val + mse3_val + mse4_val +
                   mse5_val) / y_dim
    totaMAE_val = (mae1_val + mae2_val + mae3_val + mae4_val +
                   mae5_val) / y_dim
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term'

    ######################

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, mse1, mae1, mse2, mae2,
                mse3, mae3, mse4, mae4, mse5, mae5, y_pred1_temp, y_pred2_temp,
                y_pred3_temp, y_pred4_temp, y_pred5_temp
            ],
            indexSep=13,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        n_steps=n_steps,
                        extension=extension,
                        lr_iterations=lr_iterations,
                        k_speedOfconvergence=30)
    mainloop.run()

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[
            prediction_val, recon_term_val, totaMSE_val, totaMAE_val, mse1_val,
            mse2_val, mse3_val, mse4_val, mse5_val, mae1_val, mae2_val,
            mae3_val, mae4_val, mae5_val, relErr1_val, relErr2_val,
            relErr3_val, relErr4_val, relErr5_val, propAssigned1_val,
            propAssigned2_val, propAssigned3_val, propAssigned4_val,
            propAssigned5_val
        ]  #prediction_val, mse_val, mae_val
        ,
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )
    testOutput = []
    testMetrics2 = []
    numBatchTest = 0
    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])
        testOutput.append(outputGeneration[1:14])
        testMetrics2.append(outputGeneration[14:])
        #{0:[4,20], 2:[5,10]}
        #if (numBatchTest==0):

        plt.figure(1)
        plt.plot(np.transpose(outputGeneration[0],
                              [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(2)
        plt.plot(np.transpose(batch[2], [1, 0, 2])[4])
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(3)
        plt.plot(np.transpose(batch[0], [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
        plt.clf()
        numBatchTest += 1

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    mse1_test = testOutput[:, 3].mean()
    mae1_test = testOutput[:, 8].mean()
    mse2_test = testOutput[:, 4].mean()
    mae2_test = testOutput[:, 9].mean()
    mse3_test = testOutput[:, 5].mean()
    mae3_test = testOutput[:, 10].mean()
    mse4_test = testOutput[:, 6].mean()
    mae4_test = testOutput[:, 11].mean()
    mse5_test = testOutput[:, 7].mean()
    mae5_test = testOutput[:, 12].mean()

    relErr1_test = testMetrics2[:, 0].mean()
    relErr2_test = testMetrics2[:, 1].mean()
    relErr3_test = testMetrics2[:, 2].mean()
    relErr4_test = testMetrics2[:, 3].mean()
    relErr5_test = testMetrics2[:, 4].mean()

    propAssigned1_test = testMetrics2[:, 5].mean()
    propAssigned2_test = testMetrics2[:, 6].mean()
    propAssigned3_test = testMetrics2[:, 7].mean()
    propAssigned4_test = testMetrics2[:, 8].mean()
    propAssigned5_test = testMetrics2[:, 9].mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write(
        "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test,mseTest,maeTest\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n\n".format(
        recon_test, mse1_test, mse2_test, mse3_test, mse4_test, mse5_test,
        mae1_test, mae2_test, mae3_test, mae4_test, mae5_test, mse_test,
        mae_test))
    fLog.write(
        "relErr1,relErr2,relErr3,relErr4,relErr5,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{}\n".format(
        relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test,
        propAssigned1_test, propAssigned2_test, propAssigned3_test,
        propAssigned4_test, propAssigned5_test))

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    fLog.write(
        "epoch,log,kl,mse1,mse2,mse3,mse4,mse5,mae1,mae2,mae3,mae4,mae5\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        d, e, f, g, j, k, l, m = 0, 0, 0, 0, 0, 0, 0, 0
        ep = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['mse1'][i]
        h = mainloop.trainlog.monitor['mae1'][i]

        d = mainloop.trainlog.monitor['mse2'][i]
        j = mainloop.trainlog.monitor['mae2'][i]
        e = mainloop.trainlog.monitor['mse3'][i]
        k = mainloop.trainlog.monitor['mae3'][i]
        f = mainloop.trainlog.monitor['mse4'][i]
        l = mainloop.trainlog.monitor['mae4'][i]
        g = mainloop.trainlog.monitor['mse5'][i]
        m = mainloop.trainlog.monitor['mae5'][i]
        fLog.write(
            "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n"
            .format(ep, a, b, c, d, e, f, g, h, j, k, l, m))

    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()
Esempio n. 47
0
    def __init__(
            self,
            glimpse_shape,
            glimpse_times,
            dim_hidden,
            dim_fc,
            dim_out,
            reward_base,
            rng_std=1.0,
            activation=T.tanh,
            bptt_truncate=-1,
            lmbd=0.1  # gdupdate + lmbd*rlupdate
    ):
        if reward_base == None:
            reward_base = np.zeros((glimpse_times)).astype('float32')
            reward_base[-1] = 1.0
        x = T.ftensor3('x')  # N * W * H
        y = T.ivector('y')  # label
        lr = T.fscalar('lr')
        reward_base = theano.shared(name='reward_base',
                                    value=np.array(reward_base).astype(
                                        theano.config.floatX),
                                    borrow=True)  # Time (vector)
        reward_bias = T.fvector('reward_bias')
        rng = MRG_RandomStreams(np.random.randint(9999999))
        #       rng = theano.tensor.shared_randomstreams.RandomStreams(np.random.randint(9999999))

        i = InputLayer(x)
        au = AttentionUnit(x, glimpse_shape, glimpse_times, dim_hidden, rng,
                           rng_std, activation, bptt_truncate)
        #       All hidden states are put into decoder
        #       layers = [i, au, InputLayer(au.output[:,:,:].flatten(2))]
        #       dim_fc = [glimpse_times*dim_hidden] + dim_fc + [dim_out]
        #       Only the last hidden states
        layers = [i, au, InputLayer(au.output[:, -1, :])]
        dim_fc = [dim_hidden] + dim_fc + [dim_out]
        for Idim, Odim in zip(dim_fc[:-1], dim_fc[1:]):
            fc = FullConnectLayer(layers[-1].output, Idim, Odim, activation,
                                  'FC')
            layers.append(fc)
        sm = SoftmaxLayer(layers[-1].output)
        layers.append(sm)

        output = sm.output  # N * classes
        hidoutput = au.output  # N * dim_output
        location = au.location  # N * T * dim_hidden
        prediction = output.argmax(1)  # N

        # calc
        equalvec = T.eq(prediction, y)  # [0, 1, 0, 0, 1 ...]
        correct = T.cast(T.sum(equalvec), 'float32')
        #       noequalvec = T.neq(prediction, y)
        #       nocorrect = T.cast(T.sum(noequalvec), 'float32')
        logLoss = T.log(output)[T.arange(y.shape[0]), y]  #
        reward_biased = T.outer(equalvec,
                                reward_base) - reward_bias.dimshuffle('x', 0)
        # N * Time
        # (R_t - b_t), where b = E[R]

        # gradient descent
        gdobjective = logLoss.sum() / x.shape[
            0]  # correct * dim_output (only has value on the correctly predicted sample)
        gdparams = reduce(lambda x, y: x + y.params, layers, [])
        gdupdates = map(lambda x: (x, x + lr * T.grad(gdobjective, x)),
                        gdparams)

        # reinforce learning
        rlobjective = (reward_biased.dimshuffle(0, 1, 'x') *
                       T.log(au.location_p)).sum() / x.shape[0]
        # location_p: N * Time * 2
        # location_logp: N * Time
        # reward_biased: N * 2
        rlparams = au.reinforceParams
        rlupdates = map(lambda x: (x, x + lr * lmbd * T.grad(rlobjective, x)),
                        rlparams)

        # Hidden state keeps unchange in time
        deltas = T.stack(*[((au.output[:, i, :].mean(0) -
                             au.output[:, i + 1, :].mean(0))**2).sum()
                           for i in xrange(glimpse_times - 1)])
        # N * Time * dim_hidden

        print 'compile step()'
        self.step = theano.function([x, y, lr, reward_bias], [
            gdobjective, rlobjective, correct,
            T.outer(equalvec, reward_base)
        ],
                                    updates=gdupdates + rlupdates)
        #       print 'compile gdstep()'
        #       self.gdstep = theano.function([x, y, lr], [gdobjective, correct, location], updates=gdupdates)
        #       print 'compile rlstep()'
        #       self.rlstep = theano.function([x, y, lr], [rlobjective], updates=rlupdates)
        print 'compile predict()'
        self.predict = theano.function([x], prediction)
        #       print 'compile forward()'
        #       self.forward = theano.function([x], map(lambda x: x.output, layers)) #[layers[-3].output, fc.output])
        #       print 'compile error()'
        #       self.error = theano.function([x, y], gdobjective)
        print 'compile locate()'
        self.locate = theano.function(
            [x],
            [au.location_mean, location])  #[layers[-3].output, fc.output])
        print 'compile debug()'
        self.debug = theano.function([x, y, lr, reward_bias],
                                     [deltas, au.location_p],
                                     on_unused_input='warn')

        # self.xxx
        self.glimpse_times = glimpse_times
Esempio n. 48
0
    def __init__(self,
                 num_actions,
                 id_num,
                 shared_arr=None,
                 num_moves=None,
                 args=None):
        print "USING OPTION CRITIC"
        self.args = args
        self.id_num = id_num
        self.num_actions = num_actions
        self.num_moves = num_moves
        self.reset_storing()
        self.rng = np.random.RandomState(100 + id_num)
        # input is 8x8
        model_network = [{
            "model_type": "conv",
            "filter_size": [4, 4],
            "pool": [1, 1],
            "stride": [2, 2],
            "out_size": 32,
            "activation": "relu"
        }, {
            "model_type": "conv",
            "filter_size": [3, 3],
            "pool": [1, 1],
            "stride": [2, 2],
            "out_size": 64,
            "activation": "relu"
        }, {
            "model_type": "mlp",
            "out_size": 48,
            "activation": "relu"
        }, {
            "model_type": "mlp",
            "out_size": 32,
            "activation": "relu"
        }]
        out = [None, model_network[-1]["out_size"]]
        self.conv = Model(model_network,
                          input_size=[
                              None, args.concat_frames *
                              (1 if args.grayscale else 3), 8, 8
                          ])
        self.termination_model = Model([{
            "model_type": "mlp",
            "out_size": args.num_options,
            "activation": "sigmoid",
            "W": 0
        }],
                                       input_size=out)
        self.Q_val_model = Model([{
            "model_type": "mlp",
            "out_size": args.num_options,
            "activation": "linear",
            "W": 0
        }],
                                 input_size=out)
        self.options_model = MLP3D(input_size=out[1],
                                   num_options=args.num_options,
                                   out_size=num_actions,
                                   activation="softmax")
        self.params = self.conv.params + self.Q_val_model.params + self.options_model.params + self.termination_model.params
        self.set_rms_shared_weights(shared_arr)

        x = T.ftensor4()
        y = T.fvector()
        a = T.ivector()
        o = T.ivector()
        delib = T.fscalar()

        s = self.conv.apply(x / np.float32(255))
        intra_option_policy = self.options_model.apply(s, o)

        q_vals = self.Q_val_model.apply(s)
        disc_q = theano.gradient.disconnected_grad(q_vals)
        current_option_q = q_vals[T.arange(o.shape[0]), o]
        disc_opt_q = disc_q[T.arange(o.shape[0]), o]
        terms = self.termination_model.apply(s)
        o_term = terms[T.arange(o.shape[0]), o]
        V = T.max(q_vals, axis=1) * (1 - self.args.option_epsilon) + (
            self.args.option_epsilon * T.mean(q_vals, axis=1))
        disc_V = theano.gradient.disconnected_grad(V)

        aggr = T.mean  # T.sum
        log_eps = 0.0001

        critic_cost = aggr(args.critic_coef * 0.5 *
                           T.sqr(y - current_option_q))
        termination_grad = aggr(o_term * ((disc_opt_q - disc_V) + delib))
        entropy = -aggr(
            T.sum(intra_option_policy * T.log(intra_option_policy + log_eps),
                  axis=1)) * args.entropy_reg
        pg = aggr(
            (T.log(intra_option_policy[T.arange(a.shape[0]), a] + log_eps)) *
            (y - disc_opt_q))
        cost = pg + entropy - critic_cost - termination_grad

        grads = T.grad(cost * args.update_freq, self.params)
        # grads = T.grad(cost, self.params)
        updates, grad_rms, self.rms_weights = rmsprop(self.params,
                                                      grads,
                                                      clip=args.clip,
                                                      clip_type=args.clip_type)
        self.share_rms(shared_arr)

        self.get_state = theano.function([x], s, on_unused_input='warn')
        self.get_policy = theano.function([s, o], intra_option_policy)
        self.get_termination = theano.function([x], terms)
        self.get_q = theano.function([x], q_vals)
        self.get_q_from_s = theano.function([s], q_vals)
        self.get_V = theano.function([x], V)

        self.rms_grads = theano.function([x, a, y, o, delib],
                                         grad_rms,
                                         updates=updates,
                                         on_unused_input='warn')
        print "ALL COMPILED"

        if not self.args.testing:
            self.init_tracker()
        self.initialized = False
Esempio n. 49
0
    def fit(self,
            learning_rate=1e-6,
            momentum=1e-8,
            batch=200,
            activation=T.tanh,
            depth=7):
        self.f = activation

        #define set of input and corresponding output for supervise learning
        X = [[]]
        Y = [[]]

        #theano input-output vectors
        thX = T.fvector('X')
        thY = T.fvector('Y')
        thK = T.iscalar('depth')

        #reccurent call of evaluation, return next pair of input\reccurent hidden values
        def recurrence(x_t, h_t1):
            #update reccurent hidden values
            #h_t = f(Wx*x + Wh*h_t1 + b)
            h_t = self.f(x_t.dot(self.Wx) + h_t1.dot(self.Wh) + self.bh)
            #calculate current output, note in our model it is the next time step disctribution
            #y_t = f(Wo*h_t + b)
            y_t = self.f(h_t.dot(self.Wo) + self.bo)
            return h_t, y_t

        #define theano scan function for call
        [h, y], _ = th.scan(
            fn=recurrence,
            outputs_info=[self.h0, None],
            sequences=thX,
            n_steps=thK,
        )

        #define prediction, should be normalyze function
        #temporal approache -- softmax
        prediction = T.softmax(Y)

        #define learning model
        #for the cost is usuall log loss function
        cost = -T.mean(T.log(Y[T.arange(thY.shape[0]), thY]))
        #for grad use theano grad function
        grads = T.gtrad(cost, self.params)
        #calculate the change of params for momentum
        #init to all zero
        dparams = [theano.shared(p.get_value() * 0) for p in self.params]

        #define the update using gradient decent algorithm with momentum
        #i.e. w <- w + momentum * dw - n * grad_w(E)
        #     dw<- momentum * dw - n * grad_w(E)
        updates = [(p, p + mu * dp - learning_rate * g)
                   for p, dp, g in zip(self.params, dparams, grads)
                   ] + [(dp, mu * dp - learning_rate * g)
                        for dp, g in zip(dparams, grads)]

        #define complete training model for theano
        self.predict_op = th.function(inputs=[thX, thK], outputs=prediction)
        self.train_op = th.function(inputs=[thX, thY],
                                    outputs=[cost, prediction, y],
                                    updates=updates)
Esempio n. 50
0
    def build_train_func(self,
                         solver_mode="sgd",
                         cost_factors=[],
                         use_acc_mode=False,
                         skip_build=False):

        #arguments to function
        logging.info(
            "Building training functions - solver: %s, use_acc_mode: %s" %
            (solver_mode, use_acc_mode))
        iteration = tensor.fscalar()
        learn_rate = tensor.fscalar()
        momentum = tensor.fvector()
        decay = tensor.fscalar()

        #find costs
        self.yt = []
        self.cost_list = []
        self.cost_layers = []
        self.cost_layer_names = []
        for layer in self.layers:
            yt_index = tensor.lvector("target index %i" %
                                      len(self.cost_layers))
            yt_value = tensor.fvector("target value %i" %
                                      len(self.cost_layers))
            cost = layer.cost(yt_index, yt_value)
            if not cost is None:
                self.yt += [yt_index, yt_value]
                self.cost_list.append(cost)
                self.cost_layers.append(layer)
                self.cost_layer_names.append(layer.type_name)

        self.cost_factors = [1.0] * len(self.cost_list) if len(
            cost_factors) == 0 else cost_factors
        assert len(self.cost_factors) == len(
            self.cost_list
        ), "Different number of cost factors (%i) and cost layers (%i)" % (len(
            self.cost_factors), len(self.cost_layers))
        logging.info("Found %i costs in model:" % len(self.cost_layers),
                     list(zip(self.cost_layer_names, self.cost_factors)))

        self.train_cost = tensor.as_tensor_variable(0)
        for i, cost in enumerate(self.cost_list):
            self.train_cost += self.cost_factors[i] * cost

        if self.gradient_clip > 0.0:
            logging.info("Clipping gradient to [%f,%f]" %
                         (-self.gradient_clip, self.gradient_clip))
            self.train_cost = theano.gradient.grad_clip(
                self.train_cost, -self.gradient_clip, self.gradient_clip)

        #find split points
        split_points = [0]
        self.use_split_mode = False
        for index, layer in enumerate(self.layers):
            if layer.has_split:
                self.use_split_mode = True
                split_points.append(index)
        split_points.append(len(self.layers))

        if self.use_split_mode:
            logging.verbose("Using split mode with split points:",
                            split_points)
            self.func["train_fwd"] = []
            self.func["train_bwd"] = []

        self.updates = []
        for sp in range(len(split_points) - 1):

            logging.info("Building training functions for layers %i-%i" %
                         (split_points[sp], split_points[sp + 1]))

            split_start = self.layers[split_points[sp]] if sp > 0 else None
            split_end = self.layers[split_points[sp + 1]] if (
                sp + 2) < len(split_points) else None
            split_cost = self.train_cost if split_end is None else None
            split_layers = []
            for i, layer in enumerate(self.layers):
                if (i > split_points[sp]) and (i < split_points[sp + 1]):
                    split_layers.append(layer)

            #determine known_grads provided by previous backward passes
            from collections import OrderedDict
            split_known_grads = OrderedDict()
            for i in range(sp + 1, len(split_points) - 1):
                split_known_grads.update(
                    self.layers[split_points[i]].split_known_grads())

            if len(split_known_grads) == 0:
                split_known_grads = None

            # print(split_known_grads)
            # print(split_known_grads)
            # print(sp+1, len(split_points)-1)

            #
            def get_sgd_updates(p, g):
                m = theano.shared(numpy.zeros(p.shape.eval(),
                                              dtype=theano.config.floatX),
                                  broadcastable=p.broadcastable,
                                  borrow=True)
                rho = tensor.switch(tensor.gt(iteration, 0), momentum[0], 0.0)
                m_update = rho * m + (1.0 - rho) * g
                p_update = p - learn_rate * m_update
                return [(p, p_update), (m, m_update)]

            def get_torch_updates(p, g):
                m = theano.shared(numpy.zeros(p.shape.eval(),
                                              dtype=theano.config.floatX),
                                  broadcastable=p.broadcastable,
                                  borrow=True)
                rho = tensor.switch(tensor.gt(iteration, 0), momentum[0], 0.0)
                m_update = rho * m + g
                p_update = p - learn_rate * (g + momentum[0] * m_update)
                return [(p, p_update), (m, m_update)]

            def get_adam_updates(p, g):
                eps = 1e-8
                m = theano.shared(numpy.zeros(p.shape.eval(),
                                              dtype=theano.config.floatX),
                                  broadcastable=p.broadcastable,
                                  borrow=True)
                v = theano.shared(numpy.zeros(p.shape.eval(),
                                              dtype=theano.config.floatX),
                                  broadcastable=p.broadcastable,
                                  borrow=True)
                m_update = momentum[0] * m + (1.0 - momentum[0]) * g
                v_update = momentum[1] * v + (1.0 - momentum[1]) * (g * g)
                m_hat = m_update / (1.0 -
                                    tensor.pow(momentum[0], iteration + 1))
                v_hat = v_update / (1.0 -
                                    tensor.pow(momentum[1], iteration + 1))
                p_update = p - learn_rate * m_hat / (tensor.sqrt(v_hat) + eps)
                return [(p, p_update), (m, m_update), (v, v_update)]

            #append parameter updates
            params = []
            params_decay = []
            for layer in split_layers:
                params += layer.weights()
                params_decay += [True] * len(layer.weights())
                params += layer.biases()
                params_decay += [False] * len(layer.biases())

            #build updates
            print("known grads:", split_known_grads)
            grads = tensor.grad(split_cost,
                                params,
                                known_grads=split_known_grads)
            solver_updates = []
            for p, g, p_decay in zip(params, grads, params_decay):

                #add L2 weight decay if needed
                if p_decay or self.bias_decay:
                    g += decay * p

                if solver_mode == "adam":
                    solver_updates += get_adam_updates(p, g)
                elif solver_mode == "torch" or solver_mode == "nesterov":
                    solver_updates += get_torch_updates(p, g)
                else:
                    solver_updates += get_sgd_updates(p, g)

            #append per layer updates
            local_updates = solver_updates + sum(
                [layer.updates(self.train_cost) for layer in split_layers], [])

            #all updates
            self.updates += local_updates

            #skipping actual theano function building (if you just want updates, etc)
            if skip_build:
                continue

            global debug_train
            if debug_train:
                logging.warning("WARNING: Debug mode is active!")
                from theano.compile.nanguardmode import NanGuardMode
                debug_mode = theano.compile.MonitorMode(
                    post_func=debug_detect_errors)
            else:
                debug_mode = None

            if self.use_split_mode:

                if not split_end is None:
                    updates = sum(
                        [layer.split_forward() for layer in split_layers], [])
                    updates += split_end.split_forward()

                    print("fwd updates:", updates)
                    f = theano.function([self.input], [],
                                        updates=updates,
                                        givens=[(denet.layer.get_train(),
                                                 tensor.cast(1, 'int8'))],
                                        on_unused_input='ignore',
                                        mode=debug_mode)
                    self.func["train_fwd"].append(f)

                outputs = ([self.train_cost] +
                           self.cost_list) if split_end is None else []
                updates = sum([
                    layer.split_backward(split_cost, split_known_grads)
                    for layer in split_layers
                ], [])
                if not split_start is None:
                    updates += split_start.split_backward(
                        split_cost, split_known_grads)

                print("bwd updates:", updates)
                updates += local_updates
                f = theano.function([
                    denet.layer.get_epoch(), iteration, learn_rate, momentum,
                    decay, self.input
                ] + self.yt,
                                    outputs,
                                    updates=updates,
                                    givens=[(denet.layer.get_train(),
                                             tensor.cast(1, 'int8'))],
                                    on_unused_input='ignore',
                                    mode=debug_mode)
                self.func["train_bwd"].insert(0, f)

            elif use_acc_mode:
                acc_counter = theano.shared(
                    numpy.array(0, dtype=theano.config.floatX))
                begin_updates = [(acc_counter, tensor.zeros_like(acc_counter))]
                step_updates = [(acc_counter, acc_counter + 1)]
                end_updates = []
                self.acc_params = []
                for p_dest, p_src in self.updates:
                    p_acc = theano.shared(numpy.zeros(
                        p_dest.shape.eval(), dtype=theano.config.floatX),
                                          broadcastable=p_dest.broadcastable,
                                          borrow=True)
                    begin_updates.append((p_acc, tensor.zeros_like(p_acc)))
                    step_updates.append((p_acc, p_acc + p_src))
                    end_updates.append((p_dest, p_acc / acc_counter))
                    self.acc_params.append(p_acc)

                logging.info(
                    "Constructing parameter accumulate update functions (solver=%s)"
                    % solver_mode)
                self.func["train_begin"] = theano.function(
                    [], [], updates=begin_updates)
                self.func["train_step"] = theano.function(
                    [
                        denet.layer.get_epoch(), iteration, learn_rate,
                        momentum, decay, self.input
                    ] + self.yt, [self.train_cost] + self.cost_list,
                    updates=step_updates,
                    givens=[(denet.layer.get_train(), tensor.cast(1, 'int8'))],
                    on_unused_input='ignore',
                    allow_input_downcast=True,
                    mode=debug_mode)
                self.func["train_end"] = theano.function([], [],
                                                         updates=end_updates)
            else:
                logging.info(
                    "Constructing parameter update function (solver=%s)" %
                    solver_mode)

                #making
                f_input = theano.In(self.input, borrow=True)
                f_yt = [theano.In(yt, borrow=True) for yt in self.yt]
                self.func["train_step"] = theano.function(
                    [
                        denet.layer.get_epoch(), iteration, learn_rate,
                        momentum, decay, f_input
                    ] + f_yt, [self.train_cost] + self.cost_list,
                    updates=self.updates,
                    givens=[(denet.layer.get_train(), tensor.cast(1, 'int8'))],
                    on_unused_input='ignore',
                    allow_input_downcast=True,
                    mode=debug_mode)

                logging.verbose("Exporting graph...")
                with open("graph.txt", "w") as f:
                    theano.printing.debugprint(self.func["train_step"],
                                               file=f,
                                               print_type=True)
Esempio n. 51
0
def test_GpuCrossentropySoftmaxArgmax1HotWithBias():
    """
    This is basic test for GpuCrossentropySoftmaxArgmax1HotWithBias

    We check that we loop when their is too much threads

    """

    n_in = 1000
    batch_size = 4097
    n_out = 1250

    if not isinstance(mode_with_gpu, theano.compile.DebugMode):
        n_in = 4098
        n_out = 4099

    x = T.fmatrix('x')
    y = T.lvector('y')

    b = T.fvector('b')
    #W = T.fmatrix('W')

    #we precompute the dot with big shape before to allow the test of
    #GpuCrossentropySoftmax1HotWithBiasDx to don't fail with the error
    #(the launch timed out and was terminated) on GPU card not
    #powerful enough. We need the big shape to check for corner
    #case.
    dot_result = T.fmatrix('dot_result')

    # Seed numpy.random with config.unittests.rseed
    utt.seed_rng()

    xx = numpy.asarray(numpy.random.rand(batch_size, n_in),
                       dtype=numpy.float32)
    #?????yy = numpy.ones((batch_size,),dtype='float32')
    yy = numpy.ones((batch_size, ), dtype='int32')
    b_values = numpy.zeros((n_out, ), dtype='float32')
    W_values = numpy.asarray(numpy.random.rand(n_in, n_out), dtype='float32')

    dot_value = numpy.asarray(numpy.dot(xx, W_values), dtype='float32')
    del W_values
    p_y_given_x = T.nnet.softmax(dot_result + b)
    y_pred = T.argmax(p_y_given_x, axis=-1)
    loss = -T.mean(T.log(p_y_given_x)[T.arange(y.shape[0]), y])
    dW = T.grad(loss, dot_result)
    classify = theano.function(inputs=[y, b, dot_result],
                               outputs=[loss, y_pred, dW],
                               mode=mode_without_gpu)
    classify_gpu = theano.function(inputs=[y, b, dot_result],
                                   outputs=[loss, y_pred, dW],
                                   mode=mode_with_gpu)
    #theano.printing.debugprint(classify)
    #theano.printing.debugprint(classify_gpu)

    assert any([
        isinstance(node.op, T.nnet.CrossentropySoftmaxArgmax1HotWithBias)
        for node in classify.maker.fgraph.toposort()
    ])
    assert any([
        isinstance(node.op, cuda.nnet.GpuCrossentropySoftmaxArgmax1HotWithBias)
        for node in classify_gpu.maker.fgraph.toposort()
    ])

    out = classify(yy, b_values, dot_value)
    gout = classify_gpu(yy, b_values, dot_value)

    assert len(out) == len(gout) == 3
    assert numpy.allclose(out[0], gout[0])
    assert numpy.allclose(out[2], gout[2],
                          atol=3e-6), numpy.absolute(gout - out).max()
    assert numpy.allclose(out[1],
                          gout[1]), [(id, out[1][id], gout[1][id], val)
                                     for id, val in enumerate(out[1] - gout[1])
                                     if val != 0]
    margin = T.scalar('margin')

    loss = mean_loss_kl_div(predictions, targets, margin)
    loss_fun = theano.function([predictions, targets, margin], loss)
    mean_err = loss_fun(test_pred, test_targ, test_margin)

    foreach_prep = foreach(predictions, targets, margin)
    foreach_fun = theano.function([predictions, targets, margin], foreach_prep)
    err_mat = foreach_fun(test_pred, test_targ, test_margin)
    err = err_mat.sum() / ((len(err_mat) - 1) * len(err_mat))

    def loss(predictions, targets, margin, f):
        assert len(predictions) == len(targets)
        L_sum = 0
        for i in range(len(predictions)):
            for j in range(len(predictions)):
                L_sum += f(predictions[i], targets[i], predictions[j],
                           targets[j], margin)
        return L_sum / (2 * len(predictions))

    xp = T.scalar('xp')
    xq = T.scalar('xq')
    p = T.fvector('P')
    q = T.fvector('Q')
    result = loss_with_kl_div(p, xp, q, xq, margin)
    f = theano.function([p, xp, q, xq, margin], result)
    mean_np = loss(test_pred, test_targ, test_margin, f)

    assert (mean_err == err == mean_np)
    print('Run without errors!')
Esempio n. 53
0
def soft_cascade_LR_1LNN(trX1, trY1, teX1, teY1, trX2, teX2, lambda_vector,
                         K1):

    (N, D1) = trX2.shape
    D = trX1.shape[1]
    C = 2
    t1 = ComputeComplexity([D1, C])
    t2 = ComputeComplexity([D, K1, C])

    n_it = 10000
    time1 = np.zeros((len(lambda_vector), 1))
    accuracy1 = np.zeros((len(lambda_vector), 1))
    F1 = np.zeros((len(lambda_vector), 1))
    nnz_first = np.zeros((len(lambda_vector), 1))

    for i, plambda in enumerate(lambda_vector):

        X = T.fmatrix()
        F = T.fmatrix()
        Y = T.fvector()

        w_l = CF.init_weights((D1, ))
        b_l = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                            broadcastable=(True, ))
        # w_l.set_value(np.zeros((D1,)))
        # b_l.set_value(np.zeros((1,)))

        w_h1 = CF.init_weights((D, K1))
        b1 = CF.init_weights((K1, ))
        w_o = CF.init_weights((K1, ))
        bo = theano.shared(CF.floatX(np.random.randn(1) * 0.01),
                           broadcastable=(True, ))

        pygx1 = CF.model00(F, w_l, b_l)
        pygx2 = CF.model3(X, w_h1, w_o, b1, bo, 0, 1)
        pygx_final = pygx1 * pygx2

        yhat1 = (pygx1 > 0.5)
        yhat = (pygx2 > 0.5)

        reg = T.mean(t1 + t2 * pygx1)
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final,
                                                 Y)) + plambda * reg

        params = [w_l, b_l, w_h1, w_o, b1, bo]
        updates = lasagne.updates.rmsprop(cost,
                                          params,
                                          learning_rate=0.001 * 5,
                                          rho=0.9,
                                          epsilon=1e-06)
        # updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06)

        train = theano.function(inputs=[X, F, Y],
                                outputs=cost,
                                updates=updates,
                                allow_input_downcast=True)
        reg_value = theano.function(inputs=[F],
                                    outputs=reg,
                                    allow_input_downcast=True)

        predict_first = theano.function(inputs=[F],
                                        outputs=yhat1,
                                        allow_input_downcast=True)
        predict_second = theano.function(inputs=[X],
                                         outputs=yhat,
                                         allow_input_downcast=True)

        max_iter = 300
        for j in range(max_iter):
            c = train(trX1, trX2, trY1)
            r = reg_value(trX2)
            print(c - plambda * r, plambda * r)

        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX2)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test = np.where(teQ1 == 1)[0]
        nnz_first[i] = inds_test.shape[0]

        # check that we get 100 percent recall from the first stage
        inds_true = np.where(teY1 == 1)[0]
        int_result = np.intersect1d(inds_test, inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_test.shape[0], inds_true.shape[0], int_result.shape[0]))
        r1 = int_result.shape[0] / inds_true.shape[0]
        p1 = int_result.shape[0] / inds_test.shape[0]
        a1 = np.mean(teY1 == teQ1)
        print("first stage: recall = %f, precision = %f, accuracy = %f" %
              (r1, p1, a1))

        teX11 = teX1[inds_test, :]

        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX11)
        end1 = time.clock()
        time1[i] += end1 - start1

        teY2 = np.zeros(teY1.shape, dtype=int)
        teY2.fill(0)
        teY2[inds_test] = teQ2

        inds_second = np.where(teY2 == 1)[0]
        int_result = np.intersect1d(inds_second, inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %
              (inds_second.shape[0], inds_true.shape[0], int_result.shape[0]))
        r2 = int_result.shape[0] / inds_true.shape[0]
        p2 = int_result.shape[0] / inds_second.shape[0]
        a2 = np.mean(teY1 == teY2)
        print("second stage: recall = %f, precision = %f, accuracy = %f" %
              (r2, p2, a2))
        F1[i] = 2 * r2 * p2 / (r2 + p2)
        accuracy1[i] = a2

    return time1, accuracy1, F1, nnz_first
Esempio n. 54
0
def cascade_three_stage(trX1, trY1, teX1, teY1, trX2, teX2, trX3, teX3, w_h1, w_h2, w_o, b1, b2, bo, v_h1, v_o, c1, co, plambda, a):
    
    (N,D) = trX3.shape
    lambda_vector = plambda
    
    n_it = 10000
    time1 = np.zeros((len(lambda_vector),1))
    accuracy1 = np.zeros((len(lambda_vector),1))
    F1 = np.zeros((len(lambda_vector),1))
    nnz_first = np.zeros((len(lambda_vector),1))
    nnz_second = np.zeros((len(lambda_vector),1))
    
    for i,plambda in enumerate(lambda_vector):
                    
        X = T.fmatrix()
        F = T.fmatrix()
        E = T.fmatrix()
        Y = T.fvector()
               
        w_l = CF.init_weights((D,))
        b_l  = theano.shared(CF.floatX(np.random.randn(1) * 0.01), broadcastable=(True,))        
        w_l.set_value(np.zeros((D,)))    
        b_l.set_value(np.zeros((1,)))               
               
        pygx1 = CF.model00(E, w_l, b_l)
        pygx2 = CF.model3(F, v_h1, v_o, c1, co, 0, 1)
        pygx = CF.model(X, w_h1, w_h2, w_o, b1, b2, bo, 0, 1)
        
        yhat1 = (pygx1 > 0.5)
        yhat2 = (pygx2 > 0.5)
        yhat = (pygx > 0.5)
        
        f = lambda x, a: 1/(1+T.exp(-a*(x-0.5)))
        
        pygx_final = (1-f(pygx1,a))*pygx1 + (1-f(pygx2,a))*f(pygx1,a)*pygx2 + f(pygx1, a)*f(pygx2, a)*pygx

        reg = T.mean(f(pygx1,a))  
        cost = T.mean(T.nnet.binary_crossentropy(pygx_final, Y)) + plambda*reg
         
        params = [w_l, b_l]
        updates = lasagne.updates.rmsprop(cost, params, learning_rate=0.5, rho=0.9, epsilon=1e-06)
        # updates = lasagne.updates.adagrad(cost, params, learning_rate=1, epsilon=1e-06)
        
        train = theano.function(inputs=[X, F, E, Y], outputs=cost, updates=updates, allow_input_downcast=True)
        reg_value = theano.function(inputs=[E], outputs=reg, allow_input_downcast=True)
        
        predict_first = theano.function(inputs=[E], outputs=yhat1, allow_input_downcast=True)
        predict_second = theano.function(inputs=[F], outputs=yhat2, allow_input_downcast=True)
        predict_third = theano.function(inputs=[X], outputs=yhat, allow_input_downcast=True)
        
        max_iter = 500
        for j in range(max_iter):
            # c = train(trX1, trY1)
            c = train(trX1, trX2, trX3, trY1) 
            # r = reg_value(trX1)
            r = reg_value(trX3) 
            print(c-plambda*r,plambda*r)
            # cost = train(trX1, trY1)
        
        start1 = time.clock()
        for t in range(n_it):
            teQ1 = predict_first(teX3)
        end1 = time.clock()
        time1[i] = end1 - start1
        inds_test = np.where(teQ1 == 1)[0]
        nnz_first[i] = inds_test.shape[0]

        # check that we get 100 percent recall from the first stage
        inds_true = np.where( teY1 == 1 )[0]
        int_result = np.intersect1d(inds_test,inds_true)
        print("first stage nzs:%d,true nzs:%d,intersection:%d" %(inds_test.shape[0],inds_true.shape[0],int_result.shape[0]))
        r1 = int_result.shape[0] / inds_true.shape[0]
        p1 = int_result.shape[0] / inds_test.shape[0]
        a1 = np.mean(teY1 == teQ1)
        print("first stage: recall = %f, precision = %f, accuracy = %f" %(r1,p1,a1))
        
        teX22 = teX2[inds_test,:]
                
        start1 = time.clock()
        for t in range(n_it):
            teQ2 = predict_second(teX22)
        end1 = time.clock()
        time1[i] += end1 - start1
        inds_test2 = np.where(teQ2 == 1)[0]
        nnz_second[i] = inds_test2.shape[0]
            
        teY2 = np.zeros(teY1.shape,dtype = int)
        teY2.fill(0)
        teY2[inds_test] = teQ2
        
        inds_second = np.where( teY2 == 1 )[0]            
        int_result = np.intersect1d(inds_second, inds_true)
        print("second stage nzs:%d,true nzs:%d,intersection:%d" %(inds_second.shape[0],inds_true.shape[0],int_result.shape[0]))
        r2 = int_result.shape[0] / inds_true.shape[0]
        p2 = int_result.shape[0] / inds_second.shape[0]
        a2 = np.mean(teY1 == teY2)
        print("second stage: recall = %f, precision = %f, accuracy = %f" %(r2,p2,a2))
            
        # teX1 = teX1[inds_test2,:]
        teX11 = teX1[inds_test[inds_test2],:]
            
        start1 = time.clock()
        for t in range(n_it):
            teQ3 = predict_third(teX11)
        end1 = time.clock()
        time1[i] += end1 - start1            
            
        teY3 = np.zeros(teY1.shape,dtype = int)
        teY3.fill(0)
        teY3[inds_test[inds_test2]] = teQ3
        accuracy1[i] = np.mean(teY1 == teY3)    
        
        inds_third = np.where( teY3 == 1 )[0]
        int_result2 = np.intersect1d(inds_third,inds_true)
        print("third stage nzs:%d,true nzs:%d,intersection:%d" %(inds_third.shape[0],inds_true.shape[0],int_result2.shape[0]))
        r3 = int_result2.shape[0] / inds_true.shape[0]
        p3 = int_result2.shape[0] / inds_third.shape[0]
        print("third stage: recall = %f, precision = %f, accuracy = %f" %(r3, p3, accuracy1[i]))
        F1[i] = 2*r3*p3/(r3 + p3)
        
    return time1, accuracy1, F1, nnz_first, nnz_second
Esempio n. 55
0
    def test_softmax_grad(self):
        def cmp(n, m, f, f_gpu):
            data = numpy.arange(n * m, dtype='float32').reshape(n, m)
            gdata = numpy.asarray(data)[:, :, None, None]

            out = f(data)
            gout = numpy.asarray(f_gpu(gdata))[:, :, 0, 0]
            utt.assert_allclose(out, gout)

        x = T.matrix('x', 'float32')
        x_gpu = T.tensor4('x_gpu', 'float32')
        f_z = T.nnet.softmax_op
        f_gpu = dnn.GpuDnnSoftmax('accurate', 'channel')

        # Verify the grad operation
        dims = (2, 3, 4, 5)
        gdata = numpy.arange(numpy.product(dims),
                             dtype='float32').reshape(dims)
        T.verify_grad(f_gpu, [gdata], rng=numpy.random, mode=mode_with_gpu)

        # Verify that the CPU and GPU implementations return the same results
        # up to a tolerance.

        self._test_softmax(x, x_gpu, f_z, f_gpu, cmp)

        self._test_softmax(x, x, f_z, f_z, self._cmp)

        # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
        # optimization is applied when cudnn is required
        y = T.fvector('y')
        f = theano.function([y],
                            T.grad(T.nnet.softmax(y).mean(), y),
                            mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        val = numpy.random.rand(5).astype('float32')
        out_dnn = f(val)
        assert (len(
            [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 0)

        # Verify that the SoftmaxGrad -> Gpu[Dnn]SoftmaxGrad
        # optimization is not applied when cudnn is excluded or not
        # available
        mode_wo_cudnn = mode_with_gpu.excluding("cudnn")
        y = T.fvector('y')
        f = theano.function([y],
                            T.grad(T.nnet.softmax(y).mean(), y),
                            mode=mode_wo_cudnn)
        sorted_f = f.maker.fgraph.toposort()
        out_cpu = f(val)
        utt.assert_allclose(out_dnn, out_cpu)
        assert (len(
            [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 0)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 1)

        # Verify that the SoftmaxGrad -> GpuDnnSoftmaxGrad do not
        # crash with manual graph
        y = T.fvector('y')
        o = theano.tensor.nnet.SoftmaxGrad()(y, y * 2)
        f = theano.function([y], o, mode=mode_with_gpu)
        sorted_f = f.maker.fgraph.toposort()
        assert (len(
            [i for i in sorted_f if isinstance(i.op, self.gpu_grad_op)]) == 1)
        assert (len([
            i for i in sorted_f
            if isinstance(i.op, theano.tensor.nnet.SoftmaxGrad)
        ]) == 0)
Esempio n. 56
0
import theano.tensor as T
import numpy as np
import odl
import odl.contrib.theano

# --- Wrap ODL operator as Theano operator --- #

# Define ODL operator
matrix = np.array([[1., 2.], [0., 0.], [0., 1.]])
odl_op = odl.MatrixOperator(matrix)

# Define evaluation point
x = [1., 2.]

# Create Theano placeholders
x_theano = T.fvector('x')

# Create Theano layer from ODL operator
odl_op_layer = odl.contrib.theano.TheanoOperator(odl_op)

# Build computation graph
y_theano = odl_op_layer(x_theano)
y_theano_func = theano.function([x_theano], y_theano)

# Evaluate using Theano and compare to odl_op(x)
print('Theano eval    : ', y_theano_func(x))
print('ODL eval       : ', odl_op(x))

# --- Wrap ODL functional as Theano operator --- #

# Define ODL cost and composed functional
def evaluate_lenet5(learning_rate=0.1,
                    n_epochs=2000,
                    word_nkerns=500,
                    char_nkerns=100,
                    batch_size=1,
                    window_width=3,
                    emb_size=500,
                    char_emb_size=100,
                    hidden_size=200,
                    margin=0.5,
                    L2_weight=0.0003,
                    update_freq=1,
                    norm_threshold=5.0,
                    max_truncate=40,
                    max_char_len=40,
                    max_des_len=20,
                    max_relation_len=5,
                    max_Q_len=30,
                    train_neg_size=6,
                    neg_all=100,
                    train_size=75893,
                    test_size=19168,
                    mark='_BiasedMaxPool_lr0.1_word500_char100_noDes_ent2.0'
                    ):  #train_size=75909, test_size=17386
    #     maxSentLength=max_truncate+2*(window_width-1)
    model_options = locals().copy()
    print "model options", model_options
    rootPath = '/mounts/data/proj/wenpeng/Dataset/freebase/SimpleQuestions_v2/'
    triple_files = [
        'annotated_fb_data_train.entitylinking.top20_succSet_asInput.txt',
        'annotated_fb_data_test.entitylinking.top20_succSet_asInput.fromMo_FB5M.txt'
    ]

    rng = numpy.random.RandomState(23455)
    word2id, char2id = load_word2id_char2id(mark)
    #     datasets, datasets_test, length_per_example_test, vocab_size, char_size=load_test_or_valid(triple_files[0], triple_files[1], max_char_len, max_des_len, max_relation_len, max_Q_len, train_size, test_size)#max_char_len, max_des_len, max_relation_len, max_Q_len

    datasets_test, length_per_example_test, word2id, char2id = load_test_or_valid(
        triple_files[1], char2id, word2id, max_char_len, max_des_len,
        max_relation_len, max_Q_len, test_size)
    vocab_size = len(word2id)
    char_size = len(char2id)
    print 'vocab_size:', vocab_size, 'char_size:', char_size

    #     train_data=datasets
    #     valid_data=datasets[1]
    test_data = datasets_test
    #     result=(pos_entity_char, pos_entity_des, relations, entity_char_lengths, entity_des_lengths, relation_lengths, mention_char_ids, remainQ_word_ids, mention_char_lens, remainQ_word_lens, entity_scores)
    #
    #     train_pos_entity_char=train_data[0]
    #     train_pos_entity_des=train_data[1]
    #     train_relations=train_data[2]
    #     train_entity_char_lengths=train_data[3]
    #     train_entity_des_lengths=train_data[4]
    #     train_relation_lengths=train_data[5]
    #     train_mention_char_ids=train_data[6]
    #     train_remainQ_word_ids=train_data[7]
    #     train_mention_char_lens=train_data[8]
    #     train_remainQ_word_len=train_data[9]
    #     train_entity_scores=train_data[10]

    test_pos_entity_char = test_data[0]
    #    test_pos_entity_des=test_data[1]
    test_relations = test_data[2]
    test_entity_char_lengths = test_data[3]
    #    test_entity_des_lengths=test_data[4]
    test_relation_lengths = test_data[5]
    test_mention_char_ids = test_data[6]
    test_remainQ_word_ids = test_data[7]
    test_mention_char_lens = test_data[8]
    test_remainQ_word_len = test_data[9]
    test_entity_scores = test_data[10]
    #
    #     test_pos_entity_char=test_data[0]       #matrix, each row for line example, all head and tail entity, iteratively: 40*2*51
    #     test_pos_entity_des=test_data[1]        #matrix, each row for a examle: 20*2*51
    #     test_relations=test_data[2]             #matrix, each row for a example: 5*51
    #     test_entity_char_lengths=test_data[3]   #matrix, each row for a example: 3*2*51  (three valies for one entity)
    #     test_entity_des_lengths=test_data[4]    #matrix, each row for a example: 3*2*51  (three values for one entity)
    #     test_relation_lengths=test_data[5]      #matrix, each row for a example: 3*51
    #     test_mention_char_ids=test_data[6]      #matrix, each row for a mention: 40
    #     test_remainQ_word_ids=test_data[7]      #matrix, each row for a question: 30
    #     test_mention_char_lens=test_data[8]     #matrix, each three values for a mention: 3
    #     test_remainQ_word_len=test_data[9]      #matrix, each three values for a remain question: 3

    #     train_sizes=[len(train_pos_entity_char), len(train_pos_entity_des), len(train_relations), len(train_entity_char_lengths), len(train_entity_des_lengths),\
    #            len(train_relation_lengths), len(train_mention_char_ids), len(train_remainQ_word_ids), len(train_mention_char_lens), len(train_remainQ_word_len), len(train_entity_scores)]
    #     if sum(train_sizes)/len(train_sizes)!=train_size:
    #         print 'weird size:', train_sizes
    #         exit(0)

    test_sizes=[len(test_pos_entity_char), len(test_relations), len(test_entity_char_lengths),\
           len(test_relation_lengths), len(test_mention_char_ids), len(test_remainQ_word_ids), len(test_mention_char_lens), len(test_remainQ_word_len), len(test_entity_scores)]
    if sum(test_sizes) / len(test_sizes) != test_size:
        print 'weird size:', test_sizes
        exit(0)


#     n_train_batches=train_size/batch_size
#     n_test_batches=test_size/batch_size

#     train_batch_start=list(numpy.arange(n_train_batches)*batch_size)
#     test_batch_start=list(numpy.arange(n_test_batches)*batch_size)

#     indices_train_pos_entity_char=pythonList_into_theanoIntMatrix(train_pos_entity_char)
#     indices_train_pos_entity_des=pythonList_into_theanoIntMatrix(train_pos_entity_des)
#     indices_train_relations=pythonList_into_theanoIntMatrix(train_relations)
#     indices_train_entity_char_lengths=pythonList_into_theanoIntMatrix(train_entity_char_lengths)
#     indices_train_entity_des_lengths=pythonList_into_theanoIntMatrix(train_entity_des_lengths)
#     indices_train_relation_lengths=pythonList_into_theanoIntMatrix(train_relation_lengths)
#     indices_train_mention_char_ids=pythonList_into_theanoIntMatrix(train_mention_char_ids)
#     indices_train_remainQ_word_ids=pythonList_into_theanoIntMatrix(train_remainQ_word_ids)
#     indices_train_mention_char_lens=pythonList_into_theanoIntMatrix(train_mention_char_lens)
#     indices_train_remainQ_word_len=pythonList_into_theanoIntMatrix(train_remainQ_word_len)
#     indices_train_entity_scores=pythonList_into_theanoFloatMatrix(train_entity_scores)

#     indices_test_pos_entity_char=pythonList_into_theanoIntMatrix(test_pos_entity_char)
#     indices_test_pos_entity_des=pythonList_into_theanoIntMatrix(test_pos_entity_des)
#     indices_test_relations=pythonList_into_theanoIntMatrix(test_relations)
#     indices_test_entity_char_lengths=pythonList_into_theanoIntMatrix(test_entity_char_lengths)
#     indices_test_entity_des_lengths=pythonList_into_theanoIntMatrix(test_entity_des_lengths)
#     indices_test_relation_lengths=pythonList_into_theanoIntMatrix(test_relation_lengths)
#     indices_test_mention_char_ids=pythonList_into_theanoIntMatrix(test_mention_char_ids)
#     indices_test_remainQ_word_ids=pythonList_into_theanoIntMatrix(test_remainQ_word_ids)
#     indices_test_mention_char_lens=pythonList_into_theanoIntMatrix(test_mention_char_lens)
#     indices_test_remainQ_word_len=pythonList_into_theanoIntMatrix(test_remainQ_word_len)
#     indices_test_entity_scores=pythonList_into_theanoIntMatrix(test_entity_scores)

    rand_values = random_value_normal((vocab_size + 1, emb_size),
                                      theano.config.floatX,
                                      numpy.random.RandomState(1234))
    #     rand_values[0]=numpy.array(numpy.zeros(emb_size),dtype=theano.config.floatX)
    #rand_values[0]=numpy.array([1e-50]*emb_size)
    #     rand_values=load_word2vec_to_init(rand_values, rootPath+'word_emb.txt')
    embeddings = theano.shared(value=rand_values, borrow=True)

    char_rand_values = random_value_normal((char_size + 1, char_emb_size),
                                           theano.config.floatX,
                                           numpy.random.RandomState(1234))
    #     char_rand_values[0]=numpy.array(numpy.zeros(char_emb_size),dtype=theano.config.floatX)
    char_embeddings = theano.shared(value=char_rand_values, borrow=True)

    # allocate symbolic variables for the data
    index = T.iscalar()
    chosed_indices = T.ivector()

    ent_char_ids_M = T.imatrix()
    ent_lens_M = T.imatrix()
    men_char_ids_M = T.imatrix()
    men_lens_M = T.imatrix()
    rel_word_ids_M = T.imatrix()
    rel_word_lens_M = T.imatrix()
    #desH_word_ids_M=T.imatrix()
    #desH_word_lens_M=T.imatrix()
    q_word_ids_M = T.imatrix()
    q_word_lens_M = T.imatrix()
    ent_scores = T.fvector()

    filter_size = (emb_size, window_width)
    char_filter_size = (char_emb_size, window_width)
    #poolsize1=(1, ishape[1]-filter_size[1]+1) #?????????????????????????????
    #     length_after_wideConv=ishape[1]+filter_size[1]-1

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    char_filter_shape = (char_nkerns, 1, char_filter_size[0],
                         char_filter_size[1])
    word_filter_shape = (word_nkerns, 1, filter_size[0], filter_size[1])
    char_conv_W, char_conv_b = create_conv_para(rng,
                                                filter_shape=char_filter_shape)
    q_rel_conv_W, q_rel_conv_b = create_conv_para(
        rng, filter_shape=word_filter_shape)
    #q_desH_conv_W, q_desH_conv_b=create_conv_para(rng, filter_shape=word_filter_shape)
    params = [
        char_embeddings, embeddings, char_conv_W, char_conv_b, q_rel_conv_W,
        q_rel_conv_b
    ]  #, q_desH_conv_W, q_desH_conv_b]
    load_model_from_file(rootPath, params, mark)

    def SimpleQ_matches_Triple(ent_char_ids_f, ent_lens_f, rel_word_ids_f,
                               rel_word_lens_f, men_char_ids_f, q_word_ids_f,
                               men_lens_f, q_word_lens_f):

        #         rng = numpy.random.RandomState(23455)
        ent_char_input = char_embeddings[ent_char_ids_f.flatten()].reshape(
            (batch_size, max_char_len,
             char_emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
        men_char_input = char_embeddings[men_char_ids_f.flatten()].reshape(
            (batch_size, max_char_len,
             char_emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)

        rel_word_input = embeddings[rel_word_ids_f.flatten()].reshape(
            (batch_size, max_relation_len,
             emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
        #desH_word_input = embeddings[desH_word_ids_f.flatten()].reshape((batch_size,max_des_len, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)

        #         desT_word_input = embeddings[desT_word_ids_f.flatten()].reshape((batch_size,max_des_len, emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)
        q_word_input = embeddings[q_word_ids_f.flatten()].reshape(
            (batch_size, max_Q_len,
             emb_size)).transpose(0, 2, 1).dimshuffle(0, 'x', 1, 2)

        #ent_mention
        ent_char_conv = Conv_with_input_para(rng,
                                             input=ent_char_input,
                                             image_shape=(batch_size, 1,
                                                          char_emb_size,
                                                          max_char_len),
                                             filter_shape=char_filter_shape,
                                             W=char_conv_W,
                                             b=char_conv_b)
        men_char_conv = Conv_with_input_para(rng,
                                             input=men_char_input,
                                             image_shape=(batch_size, 1,
                                                          char_emb_size,
                                                          max_char_len),
                                             filter_shape=char_filter_shape,
                                             W=char_conv_W,
                                             b=char_conv_b)
        #q-rel
        q_rel_conv = Conv_with_input_para(rng,
                                          input=q_word_input,
                                          image_shape=(batch_size, 1, emb_size,
                                                       max_Q_len),
                                          filter_shape=word_filter_shape,
                                          W=q_rel_conv_W,
                                          b=q_rel_conv_b)
        rel_conv = Conv_with_input_para(rng,
                                        input=rel_word_input,
                                        image_shape=(batch_size, 1, emb_size,
                                                     max_relation_len),
                                        filter_shape=word_filter_shape,
                                        W=q_rel_conv_W,
                                        b=q_rel_conv_b)
        #q_desH
        #q_desH_conv = Conv_with_input_para(rng, input=q_word_input,
        #        image_shape=(batch_size, 1, emb_size, max_Q_len),
        #        filter_shape=word_filter_shape, W=q_desH_conv_W, b=q_desH_conv_b)
        #desH_conv = Conv_with_input_para(rng, input=desH_word_input,
        #        image_shape=(batch_size, 1, emb_size, max_des_len),
        #        filter_shape=word_filter_shape, W=q_desH_conv_W, b=q_desH_conv_b)

        ent_conv_pool = Max_Pooling(rng,
                                    input_l=ent_char_conv.output,
                                    left_l=ent_lens_f[0],
                                    right_l=ent_lens_f[2])
        men_conv_pool = Max_Pooling(rng,
                                    input_l=men_char_conv.output,
                                    left_l=men_lens_f[0],
                                    right_l=men_lens_f[2])

        #q_rel_pool=Max_Pooling(rng, input_l=q_rel_conv.output, left_l=q_word_lens_f[0], right_l=q_word_lens_f[2])
        rel_conv_pool = Max_Pooling(rng,
                                    input_l=rel_conv.output,
                                    left_l=rel_word_lens_f[0],
                                    right_l=rel_word_lens_f[2])
        q_rel_pool = Average_Pooling_for_SimpleQA(
            rng,
            input_l=q_rel_conv.output,
            input_r=rel_conv_pool.output_maxpooling,
            left_l=q_word_lens_f[0],
            right_l=q_word_lens_f[2],
            length_l=q_word_lens_f[1] + filter_size[1] - 1,
            dim=max_Q_len + filter_size[1] - 1,
            topk=2)

        #q_desH_pool=Max_Pooling(rng, input_l=q_desH_conv.output, left_l=q_word_lens_f[0], right_l=q_word_lens_f[2])
        #desH_conv_pool=Max_Pooling(rng, input_l=desH_conv.output, left_l=desH_word_lens_f[0], right_l=desH_word_lens_f[2])


        overall_simi=cosine(ent_conv_pool.output_maxpooling, men_conv_pool.output_maxpooling)*0.33333+\
                    cosine(q_rel_pool.output_maxpooling, rel_conv_pool.output_maxpooling)*0.55
        #           0.0*cosine(q_desH_pool.output_maxpooling, desH_conv_pool.output_maxpooling)
        #                     cosine(q_desT_pool.output_maxpooling, desT_conv_pool.output_maxpooling)
        return overall_simi

    simi_list, updates = theano.scan(SimpleQ_matches_Triple,
                                     sequences=[
                                         ent_char_ids_M, ent_lens_M,
                                         rel_word_ids_M, rel_word_lens_M,
                                         men_char_ids_M, q_word_ids_M,
                                         men_lens_M, q_word_lens_M
                                     ])

    simi_list += 0.2 * ent_scores

    posi_simi = simi_list[0]
    nega_simies = simi_list[1:]
    loss_simi_list = T.maximum(
        0.0, margin - posi_simi.reshape((1, 1)) + nega_simies)
    loss_simi = T.sum(loss_simi_list)

    test_model = theano.function([
        ent_char_ids_M, ent_lens_M, men_char_ids_M, men_lens_M, rel_word_ids_M,
        rel_word_lens_M, q_word_ids_M, q_word_lens_M, ent_scores
    ], [loss_simi, simi_list],
                                 on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... testing'

    start_time = time.clock()
    mid_time = start_time

    epoch = 0

    test_loss = []
    succ = 0
    for i in range(test_size):

        #prepare data
        test_ent_char_ids_M = numpy.asarray(test_pos_entity_char[i],
                                            dtype='int32').reshape(
                                                (length_per_example_test[i],
                                                 max_char_len))
        test_ent_lens_M = numpy.asarray(test_entity_char_lengths[i],
                                        dtype='int32').reshape(
                                            (length_per_example_test[i], 3))
        test_men_char_ids_M = numpy.asarray(test_mention_char_ids[i],
                                            dtype='int32').reshape(
                                                (length_per_example_test[i],
                                                 max_char_len))
        test_men_lens_M = numpy.asarray(test_mention_char_lens[i],
                                        dtype='int32').reshape(
                                            (length_per_example_test[i], 3))
        test_rel_word_ids_M = numpy.asarray(test_relations[i],
                                            dtype='int32').reshape(
                                                (length_per_example_test[i],
                                                 max_relation_len))
        test_rel_word_lens_M = numpy.asarray(test_relation_lengths[i],
                                             dtype='int32').reshape(
                                                 (length_per_example_test[i],
                                                  3))
        #test_desH_word_ids_M =numpy.asarray( test_pos_entity_des[i], dtype='int32').reshape((length_per_example_test[i], max_des_len))
        #test_desH_word_lens_M = numpy.asarray(test_entity_des_lengths[i], dtype='int32').reshape((length_per_example_test[i], 3))
        test_q_word_ids_M = numpy.asarray(test_remainQ_word_ids[i],
                                          dtype='int32').reshape(
                                              (length_per_example_test[i],
                                               max_Q_len))
        test_q_word_lens_M = numpy.asarray(test_remainQ_word_len[i],
                                           dtype='int32').reshape(
                                               (length_per_example_test[i], 3))
        test_ent_scores = numpy.asarray(test_entity_scores[i],
                                        dtype=theano.config.floatX)

        loss_simi_i, simi_list_i = test_model(
            test_ent_char_ids_M, test_ent_lens_M, test_men_char_ids_M,
            test_men_lens_M, test_rel_word_ids_M, test_rel_word_lens_M,
            test_q_word_ids_M, test_q_word_lens_M, test_ent_scores)
        #                     print 'simi_list_i:', simi_list_i[:10]
        test_loss.append(loss_simi_i)
        if len(simi_list_i) == 1 or simi_list_i[0] >= max(simi_list_i[1:]):
            succ += 1
        if i % 1000 == 0:
            print 'testing', i, '...acc:', (succ * 1.0 /
                                            (i + 1)) * (19168 * 1.0 / 21687)
    succ = succ * 100.0 / 21687
    #now, check MAP and MRR
    print 'accu:', succ

    #     store_model_to_file(rootPath, params, succ, mark)

    print 'Epoch ', epoch, 'uses ', (time.clock() - mid_time) / 60.0, 'min'
Esempio n. 58
0
    def ready(self):
	args = self.args
	w_emb_layer = self.w_emb_layer
	c_emb_layer = self.c_emb_layer
	r_emb_layers = self.r_emb_layers
	r_matrix_layers = self.r_matrix_layers	

	char_dim = self.char_dim = args.char_dim
	char_lstm_dim = self.char_lstm_dim = args.char_lstm_dim
	word_dim = self.word_dim = args.word_dim
	word_lstm_dim = self.word_lstm_dim = args.word_lstm_dim
	
	dropout = self.dropout = theano.shared(
                np.float64(args.dropout).astype(theano.config.floatX)
            )

	word_ids = self.word_ids = T.ivector('word_ids')
	char_ids = self.char_ids = T.imatrix('char_ids')
	char_lens = self.char_lens = T.fvector('char_lens')
	char_masks = self.char_masks = T.imatrix('char_masks')
	up_ids = self.up_ids = T.imatrix('up_ids')
	up_rels = self.up_rels = T.imatrix('up_rels')
	up_id_masks = self.up_id_masks = T.imatrix('up_id_masks')
	down_ids = self.down_ids = T.imatrix('down_ids')
	down_rels = self.down_rels = T.imatrix('down_rels')
	down_id_masks = self.down_id_masks = T.imatrix('down_id_masks')
	tag_ids = self.tag_ids = T.ivector('tag_ids')
	
	layers = self.layers = [w_emb_layer, c_emb_layer]
	layers.extend(r_emb_layers)
	layers.extend(r_matrix_layers)	

	inputs = self.inputs = []

	inputs.append(self.word_ids)
	inputs.append(self.char_ids)
	inputs.append(self.char_lens)
	inputs.append(self.char_masks)
	inputs.append(self.up_ids)
	inputs.append(self.up_rels)
	inputs.append(self.up_id_masks)
	inputs.append(self.down_ids)
	inputs.append(self.down_rels)
	inputs.append(self.down_id_masks)
	inputs.append(self.tag_ids)
	wslices = w_emb_layer.forward(word_ids)
	cslices = c_emb_layer.forward(char_ids.ravel())
	cslices = cslices.reshape((char_ids.shape[0], char_ids.shape[1], char_dim))
	cslices = cslices.dimshuffle(1, 0, 2)
	
	bv_ur_slicess = []
        bv_dr_slicess = []
        b_ur_slicess = []
        b_dr_slicess = []
	
	bv_ur_matrixss = []
	bv_dr_matrixss = []
	b_ur_matrixss = []
	b_dr_matrixss = []
	
	for r_matrix_layer in r_matrix_layers:
            bv_ur_matrixs = r_matrix_layer.forward1(up_rels.ravel())
            bv_dr_matrixs = r_matrix_layer.forward1(down_rels.ravel())
            b_ur_matrixs = r_matrix_layer.forward2(up_rels.ravel())
            b_dr_matrixs = r_matrix_layer.forward2(down_rels.ravel())
            bv_ur_matrixss.append(bv_ur_matrixs.reshape((up_rels.shape[0], up_rels.shape[1], word_dim, word_dim)))
            bv_dr_matrixss.append(bv_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1], word_dim, word_dim)))
            b_ur_matrixss.append(b_ur_matrixs.reshape((up_rels.shape[0], up_rels.shape[1], word_dim, word_dim)))
            b_dr_matrixss.append(b_dr_matrixs.reshape((down_rels.shape[0], down_rels.shape[1], word_dim, word_dim)))
	
	for r_emb_layer in r_emb_layers:
            bv_ur_slices = r_emb_layer.forward(up_rels.ravel())
            bv_dr_slices = r_emb_layer.forward(down_rels.ravel())
            b_ur_slices = r_emb_layer.forward2(up_rels.ravel())
            b_dr_slices = r_emb_layer.forward2(down_rels.ravel())
            bv_ur_slicess.append(bv_ur_slices.reshape((up_rels.shape[0], up_rels.shape[1], word_dim)))
            bv_dr_slicess.append(bv_dr_slices.reshape((down_rels.shape[0], down_rels.shape[1], word_dim)))
            b_ur_slicess.append(b_ur_slices.reshape((up_rels.shape[0], up_rels.shape[1], word_dim)))
            b_dr_slicess.append(b_dr_slices.reshape((down_rels.shape[0], down_rels.shape[1], word_dim)))

	char_masks = char_masks.dimshuffle(1, 0)

	prev_output = wslices
	prev_size = word_dim

	if char_dim:
	    layers.append(LSTM(
		n_in = char_dim,
		n_out = char_lstm_dim,
		direction = 'bi' if args.char_bidirect else 'si'	
	    ))
	    prev_output_2 = cslices
	    prev_output_2 = apply_dropout(prev_output_2, dropout, v2 = True)
	    prev_output_2 = layers[-1].forward_all(cslices, char_masks)
	    prev_output_2 = T.sum(prev_output_2, axis = 0)
	    prev_output_2 = prev_output_2 / (1e-6 * T.ones_like(char_lens) + char_lens).dimshuffle(0, 'x')

	    prev_size += char_lstm_dim
	    prev_output = T.concatenate([prev_output, prev_output_2], axis = 1)
	
	prev_output = apply_dropout(prev_output, dropout)
	if args.conv != 0:
	    for i in range(args.clayer):
            	layers.append(GKNNMultiHeadGate(
                        n_in = prev_size,
                        n_out = prev_size,
			n_head = args.head
                        ))
	    	prev_output = layers[-1].forward_all(prev_output, up_ids, up_id_masks, bv_ur_slicess[0], down_ids, down_id_masks, bv_dr_slicess[0])
	    	prev_output = apply_dropout(prev_output, dropout)
	
	
	#prev_size *= 2
	#layers.append(LSTM(
	#    n_in = prev_size,
	#    n_out = word_lstm_dim,
	#    direction = 'bi' if args.word_bidirect else 'si'
	#))
	
	#prev_output = prev_output.dimshuffle(0, 'x', 1)
	#prev_output = layers[-1].forward_all(prev_output)
	#prev_output = prev_output.reshape((prev_output.shape[0], prev_output.shape[-1]))
	
	#prev_size = word_lstm_dim
	
	layers.append(Layer(
	    n_in = prev_size,
	    n_out = args.classes,
	    activation = linear, #ReLU,
	    has_bias = False
	))

	n_tags = args.classes
	s_len = char_ids.shape[0]
	tags_scores = layers[-1].forward(prev_output)
	transitions = shared((n_tags + 2, n_tags + 2), 'transitions')
	small = -1000
        b_s = np.array([[small] * n_tags + [0, small]]).astype(np.float32)
        e_s = np.array([[small] * n_tags + [small, 0]]).astype(np.float32)
        observations = T.concatenate(
            [tags_scores, small * T.ones((s_len, 2))],
            axis=1
        )
	
        observations = T.concatenate(
            [b_s, observations, e_s],
            axis=0
        )

        real_path_score = tags_scores[T.arange(s_len), tag_ids].sum()
	b_id = theano.shared(value=np.array([n_tags], dtype=np.int32))
        e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32))
        padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0)
	
	pre_ids = T.arange(s_len + 1)
	
	s_ids = T.arange(s_len + 1) + 1
	
        real_path_score += transitions[
           padded_tags_ids[pre_ids],
           padded_tags_ids[s_ids]
        ].sum()
	
	all_paths_scores = CRFForward(observations, transitions)
        self.nll_loss = nll_loss = - (real_path_score - all_paths_scores)
        preds = CRFForward(observations, transitions, viterbi = True,
                        return_alpha = False, return_best_sequence=True)
        
	self.pred = preds[1:-1]
	
	self.l2_sqr = None
        params = self.params = [transitions]
        for layer in layers:
            self.params += layer.params
        for p in self.params:
            if self.l2_sqr is None:
                self.l2_sqr = args.l2_reg * T.sum(p**2)
            else:
                self.l2_sqr += args.l2_reg * T.sum(p**2)

	
	#for l, i in zip(layers[3:], range(len(layers[3:]))):
        for l, i in zip(layers[2+len(r_emb_layers)+len(r_matrix_layers):], range(len(layers[2+len(r_emb_layers)+len(r_matrix_layers):]))):
	    say("layer {}: n_in={}\tn_out={}\n".format(
                    i, l.n_in, l.n_out
                ))

        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                        for x in self.params)
        say("total # parameters: {}\n".format(nparams))
	
	cost = self.nll_loss + self.l2_sqr

	lr_method_name = args.learning
	lr_method_parameters = {}
	lr_method_parameters['lr'] = args.learning_rate
	updates = Optimization(clip=5.0).get_updates(lr_method_name, cost, params, **lr_method_parameters)
	
	f_train = theano.function(
	    	inputs = self.inputs,
		outputs = [cost, nll_loss],
		updates = updates,
		allow_input_downcast = True
	)

	f_eval = theano.function(
		inputs = self.inputs[:-1],
		outputs = self.pred,
		allow_input_downcast = True
	)
	
	return f_train, f_eval
Esempio n. 59
0
beta = theano.shared(
    numpy.asarray(numpy.random.randn(784, 1), dtype=theano.config.floatX))
py_x = T.nnet.softmax(T.dot(X, beta))
y_pred = T.argmax(beta, axis=1)

cost = T.mean(T.nnet.categorical_crossentropy(py_x, y))


# energy function for normal distribution with normal momentum
def normal_en(pos, mom):
    total_en = T.dot(pos, pos) / 2 + T.dot(mom, mom) / 2
    f = theano.function([pos, mom], total_en)
    return (f)


beta_0 = T.fvector()
p_0 = T.fvector()
en = lambda beta_0, p_0: T.dot(beta_0, beta_0) * 0.5 + T.dot(p_0, p_0) * 0.5

#en_f = theano.function([],en)


def simulate_dynamics(initial_pos, initial_mom, stepsize, n_steps, energy_fn):
    def leapfrog(pos, mom, step):
        # from pos(t) and vel(t-stepsize//2), compute vel(t+stepsize//2)
        dE_dmom = T.grad(energy_fn(pos, mom), mom)
        new_pos = pos + step * dE_dmom
        dE_dpos = T.grad(energy_fn(new_pos, mom), new_pos)
        new_mom = mom - step * dE_dpos
        # from vel(t+stepsize//2) compute pos(t+stepsize)
Esempio n. 60
0
    def __init__(self, config):
        ModelBase.__init__(self)

        self.config = config
        self.verbose = self.config['verbose']
        self.name = 'alexnet'
        batch_size = config['batch_size']
        flag_datalayer = config['use_data_layer']
        lib_conv = config['lib_conv']
        n_softmax_out = config['n_softmax_out']
        # ##################### BUILD NETWORK ##########################
        # allocate symbolic variables for the data
        # 'rand' is a random array used for random cropping/mirroring of data
        x = T.ftensor4('x')
        y = T.lvector('y')
        rand = T.fvector('rand')
        lr = T.scalar('lr')

        if self.verbose: print 'AlexNet 2/16'
        self.layers = []
        params = []
        weight_types = []

        if flag_datalayer:
            data_layer = DataLayer(input=x,
                                   image_shape=(3, 256, 256, batch_size),
                                   cropsize=227,
                                   rand=rand,
                                   mirror=True,
                                   flag_rand=config['rand_crop'])

            layer1_input = data_layer.output
        else:
            layer1_input = x

        convpool_layer1 = ConvPoolLayer(input=layer1_input,
                                        image_shape=(3, 227, 227, batch_size),
                                        filter_shape=(3, 11, 11, 96),
                                        convstride=4,
                                        padsize=0,
                                        group=1,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        lrn=True,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer1)
        params += convpool_layer1.params
        weight_types += convpool_layer1.weight_type

        convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output,
                                        image_shape=(96, 27, 27, batch_size),
                                        filter_shape=(96, 5, 5, 256),
                                        convstride=1,
                                        padsize=2,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.1,
                                        lrn=True,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer2)
        params += convpool_layer2.params
        weight_types += convpool_layer2.weight_type

        convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output,
                                        image_shape=(256, 13, 13, batch_size),
                                        filter_shape=(256, 3, 3, 384),
                                        convstride=1,
                                        padsize=1,
                                        group=1,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.0,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer3)
        params += convpool_layer3.params
        weight_types += convpool_layer3.weight_type

        convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 384),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=1,
                                        poolstride=0,
                                        bias_init=0.1,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer4)
        params += convpool_layer4.params
        weight_types += convpool_layer4.weight_type

        convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output,
                                        image_shape=(384, 13, 13, batch_size),
                                        filter_shape=(384, 3, 3, 256),
                                        convstride=1,
                                        padsize=1,
                                        group=2,
                                        poolsize=3,
                                        poolstride=2,
                                        bias_init=0.0,
                                        lrn=False,
                                        lib_conv=lib_conv,
                                        verbose=self.verbose)
        self.layers.append(convpool_layer5)
        params += convpool_layer5.params
        weight_types += convpool_layer5.weight_type

        fc_layer6_input = T.flatten(
            convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2)
        fc_layer6 = FCLayer(input=fc_layer6_input,
                            n_in=9216,
                            n_out=4096,
                            verbose=self.verbose)
        self.layers.append(fc_layer6)
        params += fc_layer6.params
        weight_types += fc_layer6.weight_type

        dropout_layer6 = DropoutLayer(fc_layer6.output,
                                      n_in=4096,
                                      n_out=4096,
                                      verbose=self.verbose)

        fc_layer7 = FCLayer(input=dropout_layer6.output,
                            n_in=4096,
                            n_out=4096,
                            verbose=self.verbose)
        self.layers.append(fc_layer7)
        params += fc_layer7.params
        weight_types += fc_layer7.weight_type

        dropout_layer7 = DropoutLayer(fc_layer7.output,
                                      n_in=4096,
                                      n_out=4096,
                                      verbose=self.verbose)

        softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output,
                                      n_in=4096,
                                      n_out=n_softmax_out,
                                      verbose=self.verbose)
        self.layers.append(softmax_layer8)
        params += softmax_layer8.params
        weight_types += softmax_layer8.weight_type

        # #################### NETWORK BUILT #######################
        self.p_y_given_x = softmax_layer8.p_y_given_x
        self.y_pred = softmax_layer8.y_pred

        self.output = self.p_y_given_x

        self.cost = softmax_layer8.negative_log_likelihood(y)
        self.error = softmax_layer8.errors(y)
        if n_softmax_out < 5:
            self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out)
        else:
            self.error_top_5 = softmax_layer8.errors_top_x(y, 5)
        self.params = params

        # inputs
        self.x = x
        self.y = y
        self.rand = rand
        self.lr = lr
        self.shared_x = theano.shared(
            np.zeros(
                (3, config['input_width'], config['input_height'],
                 config['file_batch_size']),  # for loading large batch
                dtype=theano.config.floatX),
            borrow=True)

        self.shared_y = theano.shared(np.zeros((config['file_batch_size'], ),
                                               dtype=int),
                                      borrow=True)
        self.shared_lr = theano.shared(np.float32(config['learning_rate']))

        # training related
        self.base_lr = np.float32(config['learning_rate'])
        self.step_idx = 0
        self.mu = config['momentum']  # def: 0.9 # momentum
        self.eta = config['weight_decay']  #0.0002 # weight decay
        self.weight_types = weight_types
        self.batch_size = batch_size

        self.grads = T.grad(self.cost, self.params)

        subb_ind = T.iscalar('subb')  # sub batch index
        #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval()
        self.subb_ind = subb_ind
        self.shared_x_slice = self.shared_x[:, :, :, subb_ind *
                                            self.batch_size:(subb_ind + 1) *
                                            self.batch_size]
        self.shared_y_slice = self.shared_y[subb_ind *
                                            self.batch_size:(subb_ind + 1) *
                                            self.batch_size]