Example #1
0
        def _recurrence(v_h_, x_h_, v_t_, x_t_, a_t_, is_aggressive):

            state = tt.concatenate([v_h_, x_h_, tt.flatten(v_t_), tt.flatten(x_t_), tt.flatten(a_t_)])

            h0 = tt.dot(state, self.W_a_0) + self.b_a_0
            relu0 = tt.nnet.relu(h0)

            h1 = tt.dot(relu0, self.W_a_1) + self.b_a_1
            relu1 = tt.nnet.relu(h1)

            h2 = tt.dot(relu1, self.W_a_2) + self.b_a_2
            relu2 = tt.nnet.relu(h2)

            a = tt.dot(relu2, self.W_a_c)

            v_h, x_h, v_t, x_t, a_t, cost_transition = _step_state(v_h_, x_h_, v_t_, x_t_, a_t_, a, is_aggressive)

            # cost:

            # 0. smooth acceleration policy
            cost_accel = tt.abs_(a)

            # 1. forcing the host to move forward (until the top point of the roundabout)
            cost_progress = tt.nnet.relu(0.5*self.two_pi_r-x_h)

            # 2. keeping distance from close vehicles
            x_abs_diffs = tt.abs_(x_h - x_t)

            cost_accident =  tt.mean(3*tt.nnet.relu( self.require_distance-x_abs_diffs )) * (x_h > - 0.5*self.host_length) #tt.nnet.sigmoid(x_h + 0.5*self.host_length)

            cost = self.alpha_accel * cost_accel + self.alpha_progress * cost_progress + self.alpha_accident * cost_accident

            return (v_h, x_h, v_t, x_t, a_t, cost, cost_transition), t.scan_module.until(x_h[0]>=0.45*self.two_pi_r)
def batch_multicrop(bboxes, frame):
	att_col = img_col
	att_row = img_row

	_cx = (bboxes[:, :, 1] + bboxes[:, :, 3]) / 2; cx = (_cx + 1) / 2. * img_col
	_cy = (bboxes[:, :, 0] + bboxes[:, :, 2]) / 2; cy = (_cy + 1) / 2. * img_row
	_w = TT.abs_(bboxes[:, :, 3] - bboxes[:, :, 1]) / 2; w = _w * img_col
	_h = TT.abs_(bboxes[:, :, 2] - bboxes[:, :, 0]) / 2; h = _h * img_row

	dx = w / (img_col - 1)
	dy = h / (img_row - 1)

	mx = cx.dimshuffle(0, 1, 'x') + dx.dimshuffle(0, 1, 'x') * (TT.arange(att_col, dtype=T.config.floatX).dimshuffle('x', 'x', 0) - (att_col - 1) / 2.)
	my = cy.dimshuffle(0, 1, 'x') + dy.dimshuffle(0, 1, 'x') * (TT.arange(att_row, dtype=T.config.floatX).dimshuffle('x', 'x', 0) - (att_row - 1) / 2.)

	a = TT.arange(img_col, dtype=T.config.floatX)
	b = TT.arange(img_row, dtype=T.config.floatX)

	# (batch_size, nr_samples, channels, frame_size, att_size)
	ax = TT.maximum(0, 1 - TT.abs_(a.dimshuffle('x', 'x', 'x', 0, 'x') - mx.dimshuffle(0, 1, 'x', 'x', 2)))
	by = TT.maximum(0, 1 - TT.abs_(b.dimshuffle('x', 'x', 'x', 0, 'x') - my.dimshuffle(0, 1, 'x', 'x', 2)))

	def __batch_multicrop_dot(a, b):
		return (a.dimshuffle(0, 1, 2, 3, 4, 'x') * b.dimshuffle(0, 1, 2, 'x', 3, 4)).sum(axis=4)

	crop = __batch_multicrop_dot(by.dimshuffle(0, 1, 2, 4, 3), __batch_multicrop_dot(frame.dimshuffle(0, 'x', 1, 2, 3), ax))
	return crop
Example #3
0
def smoothL1(x):
    #x is vector of scalars
    lto = T.abs_(x)<1
    gteo = T.abs_(x)>=1
    new_x = T.set_subtensor(x[lto.nonzero()],0.5 * T.square(x[lto.nonzero()]))
    new_x = T.set_subtensor(new_x[gteo.nonzero()], T.abs_(new_x[gteo.nonzero()]) - 0.5)
    return new_x
Example #4
0
    def get_cost_updates(self, persistant, k=2, lr=0.01, l1=0., l2=0.01):
        chain_start = persistant
        V_burn_in, updates = theano.scan(fn=self.gibbs_VhV,
                                         outputs_info=[chain_start],
                                         n_steps=k,
                                         name='MultiRTRBM Gibbs Smapler')

        chain_end = V_burn_in[-1]
        # Contrastive Divergence (Variational method Cost)/ Approxiamted
        # likelihood
        L1 = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.Wt))
        L2 = T.sum(self.W**2) + T.sum(self.Wt**2)
        KL_diff = T.mean(self.free_energy_RTRBM(self.input) -
                         self.free_energy_RTRBM(chain_end)) +\
            T.cast(l1, theano.config.floatX) * L1 + \
            T.cast(l2, theano.config.floatX) * L2
        self.gparams = T.grad(KL_diff, self.params,
                              consider_constant=[chain_end])
        for param, gparam in zip(self.params, self.gparams):
            if param in [self.W, self.Wt]:
                updates[param] = param - 0.0001 * gparam
            else:
                updates[param] = param - lr * gparam
        cost, updates = self.get_pseudo_likelihood_cost(updates)

        return cost, updates
Example #5
0
 def call(self, X):
     if type(X) is not list or len(X) != 2:
         raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X))
         
     frame, position  = X[0], X[1]
     
     # Reshaping the input to exclude the time dimension
     frameShape = K.shape(frame)
     positionShape = K.shape(position)
     (chans, height, width) = frameShape[-3:]
     targetDim = positionShape[-1]
     frame = K.reshape(frame, (-1, chans, height, width))
     position = K.reshape(position, (-1, ) + (targetDim, ))
     
     # Applying the attention
     hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0
     hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0
     position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0)
     position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0)
     position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0)
     rX = Data.linspace(-1.0, 1.0, width)
     rY = Data.linspace(-1.0, 1.0, height)
     FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x'))
     FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x'))
     m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1)
     m = m + self.alpha - THT.gt(m, 0.) * self.alpha
     frame = frame * m.dimshuffle(0, 'x', 1, 2)
     
     # Reshaping the frame to include time dimension
     output = K.reshape(frame, frameShape)
     
     return output
Example #6
0
 def pass_fn(*inputs):
     ''' 
     Function for scan op. Has to work with variable number of arguments. 
     Input layout: diff, message[message_order[0]], ..., message[message_order[N], initial_potential[0], ..., initial_potential[M] 
     '''
     
     input_messages = {}
     ''' Quick creation of message potential tables by using a shallow copy of existing potential tables'''
     for i,midx in enumerate(message_order):
         input_messages[midx] = first_messages[midx].replace_tensor(inputs[i+1])
     
     off = 1+len(message_order) #  offset into input for the initial potentials
     ipotentials = []
     ''' Create initial potentials from passed inputs'''
     for i, pot in enumerate(mpstate.initial_potentials):
         ipotentials.append(pot.replace_tensor(inputs[off+i]))
     
     ''' Pass messages and calculate next set of messages '''
     (used_message_order, next_messages) = mpstate.pass_messages(input_messages=input_messages, initial_potentials=ipotentials)
     if (convergence_threshold>=0.0): 
         
         ''' Calculate absolute difference between last set of differences and current set for convergence diagnostics'''
         diff = T.sum( T.abs_(next_messages[used_message_order[0]].pt_tensor.flatten() - input_messages[used_message_order[0]].pt_tensor.flatten()))
         for i in range(1, len(used_message_order)):
             diff += T.sum( T.abs_(next_messages[used_message_order[i]].pt_tensor.flatten() - input_messages[used_message_order[i]].pt_tensor.flatten()))
         
         ''' Create result which conforms to the start of the input layout'''
         resvalues = [diff] + [next_messages[midx].pt_tensor for midx in message_order]
         ''' Return updated values plus a convergence criterion'''
         return resvalues, theano.scan_module.until(diff<=convergence_threshold)
     else:
         diff = convergence_criterion
         resvalues = [diff] + [next_messages[midx].pt_tensor for midx in message_order]
         return resvalues
    def get_cost_updates(self, x, W, W_prime, b, b_prime, corruption_level, learning_rate, l2reg=0., l1reg=0.):
        """ This function computes the cost and the updates for one trainng
        step of the dA """
        self.x = x
        self.W = W
        self.W_prime = W_prime
        self.b = b
        self.b_prime = b_prime
        self.params = [self.W, self.W_prime, self.b, self.b_prime]
        if corruption_level == None:
            tilde_x = self.x
        else:
            tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y       = self.get_hidden_values( tilde_x)
        z       = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using minibatches,
        #        L will  be a vector, with one entry per example in minibatch
        
        XE = self.x * T.log(z) + (1 - self.x) *  T.log(1-z)
        cost = -T.mean(T.sum(XE, axis=1),axis=0)
        
        if l2reg != 0.:
            cost += l2reg * (T.mean(T.sum(self.W*self.W,1),0) + T.mean(T.sum(self.W_prime*self.W_prime,1),0))
        if l1reg != 0.:
            cost += l1reg * (T.mean(T.sum(T.abs_(y),1),0) + T.mean(T.sum(T.abs_(y),1),0))
        # compute the gradients of the cost of the `dA` with respect
        # to its parameters 
        gparams = T.grad(cost, self.params)
#        # generate the list of updates
#        updates = {}
#        for param, gparam in zip(self.params, gparams):
#            updates[param] = param -  learning_rate*gparam
        updates = [-learning_rate*gparam for gparam in gparams]

        return (cost, updates)
Example #8
0
    def prepareTraining(self):
        '''
        Prepares the relevant functions
        (details on neural_net_creator's prepareTraining)
        '''
        #loss objective to minimize
        self.prediction = lasagne.layers.get_output(self.network)
        self.prediction=self.prediction[:,0]
        #self.loss = lasagne.objectives.categorical_crossentropy(self.prediction, self.target_var)
        #the loss is now the squared error in the output
        self.loss =  lasagne.objectives.squared_error(self.prediction, self.target_var)
        self.loss = self.loss.mean()

        self.params = lasagne.layers.get_all_params(self.network, trainable=True)
        self.updates = lasagne.updates.nesterov_momentum(
                self.loss, self.params, learning_rate=0.01, momentum=0.9)

        self.test_prediction = lasagne.layers.get_output(self.network, deterministic=True)
        self.test_prediction=self.test_prediction[:,0]
        self.test_loss = lasagne.objectives.squared_error(self.test_prediction, self.target_var)
        self.test_loss = self.test_loss.mean()
        #the accuracy is now the number of sample that achieve a 0.01 precision (can be changed)
        self.test_acc = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.01)
                            , dtype=theano.config.floatX)
        self.test_acc2 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.05)
                            , dtype=theano.config.floatX)
        self.test_acc3 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.1)
                            , dtype=theano.config.floatX)

        self.train_fn = theano.function([self.input_var, self.target_var], self.loss, updates=self.updates)

        self.val_fn = theano.function([self.input_var, self.target_var], [self.test_loss,self.test_acc,self.test_acc2,self.test_acc3])

        self.use = theano.function([self.input_var],[self.test_prediction])
Example #9
0
    def criteria(self):
        
        F = T.dot(self.w, self.X)
        Fs = T.sqrt(F**2 + 1e-8)
        L2Fs = (Fs**2).sum(axis=[1])
        L2Fs = T.sqrt(L2Fs)
        NFs = Fs/L2Fs.dimshuffle(0, 'x')
        L2Fn = (NFs**2).sum(axis=[0])
        L2Fn = T.sqrt(L2Fn)
        self.Fhat = NFs/L2Fn.dimshuffle('x', 0)
        
#        self.Fhat = self.feedForward(self.dot())   
        
        F = T.sqrt(T.dot(self.gMat, T.sqr(self.Fhat))) # self.Fhat1)) # self.feedForward(self.dot()
        Fs = T.sqrt(F**2 + 1e-8)
        L2Fs = (Fs**2).sum(axis=[1])
        L2Fs = T.sqrt(L2Fs)
        NFs = Fs/L2Fs.dimshuffle(0, 'x')
        L2Fn = (NFs**2).sum(axis=[0])
        L2Fn = T.sqrt(L2Fn)
        self.gFhat = NFs/L2Fn.dimshuffle('x', 0)
        
#        from connections import distMat
#        x = distMat(self.w.shape[0].eval(), 20)
#        inhibition = T.dot(T.sqr(self.Fhat1.T), x)
#        inhibition = self.Fhat1 * inhibition.T
        
        return T.abs_(self.Fhat) + T.abs_(self.gFhat) #+ T.abs_(inhibition)
def crop_attention_bilinear(bbox, frame):
	att = bbox
	frame_col = img_col
	frame_row = img_row

	_cx = (att[1] + att[3]) / 2; cx = (_cx + 1) / 2. * frame_col
	_cy = (att[0] + att[2]) / 2; cy = (_cy + 1) / 2. * frame_row
	_w = TT.abs_(att[3] - att[1]) / 2; w = _w * frame_col
	_h = TT.abs_(att[2] - att[0]) / 2; h = _h * frame_row

	dx = w / (att_col - 1)
	dy = h / (att_row - 1)

	mx = cx + dx * (TT.arange(att_col, dtype=T.config.floatX) - (att_col - 1) / 2.)
	my = cy + dy * (TT.arange(att_row, dtype=T.config.floatX) - (att_row - 1) / 2.)

	a = TT.arange(frame_col, dtype=T.config.floatX)
	b = TT.arange(frame_row, dtype=T.config.floatX)

	ax = TT.maximum(0, 1 - TT.abs_(a.dimshuffle(0, 'x') - mx.dimshuffle('x', 0)))
	by = TT.maximum(0, 1 - TT.abs_(b.dimshuffle(0, 'x') - my.dimshuffle('x', 0)))

	bilin = TT.dot(by.T, TT.dot(frame, ax))

	return bilin
Example #11
0
    def attention_gate(self, facts, memory, question):
        # TODO: for the first iteration question and memory are the same so
        # we can speedup the computation

        # facts is (num_batch * fact_length * memory_dim)
        # questions is (num_batch * memory_dim)
        # memory is (num_batch * memory_dim)
        # attention_gates must be (fact_length * nb_batch * 1)

        # Compute z (num_batch * fact_length * (7*memory_dim + 2))

        # Dimshuffle facts to get a shape of
        # (fact_length * num_batch * memory_dim)
        facts = facts.dimshuffle(1, 0, 2)

        # Pad questions and memory to be of shape
        # (_ * num_batch * memory_dim)
        memory = T.shape_padleft(memory)
        question = T.shape_padleft(question)

        to_concatenate = list()
        to_concatenate.extend([facts, memory, question])
        to_concatenate.extend([facts * question, facts * memory])
        to_concatenate.extend([T.abs_(facts - question),
                               T.abs_(facts - memory)])

        # z = concatenate(to_concatenate, axis=2)

        # TODO: to be continued for the moment just return ones
        return T.ones((facts.shape[1], facts.shape[0], 1))
Example #12
0
    def theano_setup(self):
    
        W = T.dmatrix('W')
        b = T.dvector('b')
        c = T.dvector('c')
        x = T.dmatrix('x')
    
        s = T.dot(x, W) + c
        # h = 1 / (1 + T.exp(-s))
        # h = T.nnet.sigmoid(s)
        h = T.tanh(s)
        # r = T.dot(h,W.T) + b
        # r = theano.printing.Print("r=")(2*T.tanh(T.dot(h,W.T) + b))
        ract = T.dot(h,W.T) + b
        r = self.output_scaling_factor * T.tanh(ract)
    
        #g  = function([W,b,c,x], h)
        #f  = function([W,b,c,h], r)
        #fg = function([W,b,c,x], r)
    
        # Another variable to be able to call a function
        # with a noisy x and compare it to a reference x.
        y = T.dmatrix('y')

        all_losses = ((r - y)**2)
        loss = T.sum(all_losses)
        #loss = ((r - y)**2).sum()
        
        self.theano_encode_decode = function([W,b,c,x], r)
        self.theano_all_losses = function([W,b,c,x,y], [all_losses, T.abs_(s), T.abs_(ract)])
        self.theano_gradients = function([W,b,c,x,y], [T.grad(loss, W), T.grad(loss, b), T.grad(loss, c)])
Example #13
0
def power_pool_2d(x, ds, p=3, b=0):
    n_batch, n_ch, s0, s1 = x.shape
    d0, d1 = ds
    c = tt.ones((s0, s1))

    # sum elements in regions
    y = tt.abs_(x[:, :, 0::d0, 0::d1])**p
    d = c[0::d0, 0::d1].copy()
    for i in range(0, d0):
        for j in range(0, d1):
            if i != 0 or j != 0:
                ni = (s0 - i - 1) / d0 + 1
                nj = (s1 - j - 1) / d1 + 1
                xij = tt.abs_(x[:, :, i::d0, j::d1])**p
                y = tt.inc_subtensor(y[:, :, :ni, :nj], xij)
                d = tt.inc_subtensor(d[:ni, :nj], c[i::d0, j::d1])

    # divide by number of elements
    y /= d
    y += b**p

    # take root
    y = y**(1. / p)

    return y
 def forward_jacobian_log_det(self, x):
     if x.ndim == 1:
         return tt.log(tt.abs_(self.diag_weights)).sum()
     elif x.ndim == 2:
         return x.shape[0] * tt.log(tt.abs_(self.diag_weights)).sum()
     else:
         raise ValueError('x must be one or two dimensional.')
Example #15
0
def relevance_conv_a_b_abs(inputs, weights, out_relevances, a, b, bias=None):
    assert a is not None
    assert b is not None
    assert a - b == 1
    weights_plus = weights * T.gt(weights, 0)
    weights_neg = weights * T.lt(weights, 0)

    plus_norm = conv2d(T.abs_(inputs), weights_plus)
    # stabilize, prevent division by 0
    eps = 1e-4
    plus_norm += T.eq(plus_norm, 0) * eps
    plus_rel_normed = out_relevances / plus_norm
    in_rel_plus = conv2d(plus_rel_normed, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")
    in_rel_plus *= T.abs_(inputs)

    # minuses to get positive outputs, since will be subtracted
    # at end of function
    neg_norm = -conv2d(T.abs_(inputs), weights_neg)
    neg_norm += T.eq(neg_norm, 0) * eps
    neg_rel_normed = out_relevances / neg_norm
    in_rel_neg = -conv2d(neg_rel_normed, weights_neg.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full")
    in_rel_neg *= T.abs_(inputs)

    in_relevance = a * in_rel_plus - b * in_rel_neg
    return in_relevance
Example #16
0
    def __init__(self, rng, input1, input2, n_in1, n_in2, n_hidden_layers, d_hidden, W1=None, W2=None):
        self.input1 = input1
        self.input2 = input2
        
        CouplingFunc = WarpNetwork(rng, input1, n_hidden_layers, d_hidden, n_in1, n_in2)  
        
        if W1 is None:
            bin = numpy.sqrt(6. / (n_in1 + n_in1))
            W1_values = numpy.identity(n_in1, dtype=theano.config.floatX)            
            W1 = theano.shared(value=W1_values, name='W1')

        if W2 is None:
            bin = numpy.sqrt(6. / (n_in2 + n_in2))
            W2_values = numpy.identity(n_in2, dtype=theano.config.floatX)
            W2 = theano.shared(value=W2_values, name='W2')

        V1u = T.triu(W1)
        V1l = T.tril(W1)
        V1l = T.extra_ops.fill_diagonal(V1l, 1.)
        V1 = T.dot(V1u, V1l) 
            
        V2u = T.triu(W2)
        V2l = T.tril(W2)
        V2l = T.extra_ops.fill_diagonal(V2l, 1.)
        V2 = T.dot(V2u, V2l) 
            
        self.output1 = T.dot(input1, V1)
        self.output2 = T.dot(input2, V2) + CouplingFunc.output

        self.log_jacobian = T.log(T.abs_(T.nlinalg.ExtractDiag()(V1u))).sum() \
            + T.log(T.abs_(T.nlinalg.ExtractDiag()(V2u))).sum() 

        self.params = CouplingFunc.params
Example #17
0
    def init_param_updates(self, layer, parameter):
        step = self.variables.step

        parameter_shape = T.shape(parameter).eval()
        prev_delta = theano.shared(
            name="{}/prev-delta".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )
        prev_gradient = theano.shared(
            name="{}/prev-grad".format(parameter.name),
            value=asfloat(np.zeros(parameter_shape)),
        )

        gradient = T.grad(self.variables.error_func, wrt=parameter)
        grad_delta = T.abs_(prev_gradient - gradient)

        parameter_delta = ifelse(
            T.eq(self.variables.epoch, 1),
            gradient,
            T.clip(
                T.abs_(prev_delta) * gradient / grad_delta,
                -self.upper_bound,
                self.upper_bound
            )
        )
        return [
            (parameter, parameter - step * parameter_delta),
            (prev_gradient, gradient),
            (prev_delta, parameter_delta),
        ]
Example #18
0
    def _calc_regularization_cost(self):
        """Calculate the regularization cost given the weight decay parameters.

        Only the parameters will be considered that are stored in the set
        self.regularize. We need to handle it manually in this class, because
        the weight matrices contain bias columns, which should not be considered
        in regularization computation. Therefore, do not!!! add W1 and W2 to
        self.regularize

        Returns
        -------
        theano variable
            regularization cost depending on the parameters to be regularized
            and the weight decay parameters for L1 and L2 regularization.
        """
        cost = super(SLmNce, self)._calc_regularization_cost()
        l1_cost = T.sum(T.abs_(self.W1[:, :-1]))
        l1_cost += T.sum(T.abs_(self.W2[:, :-1]))
        l2_cost = T.sum(T.sqr(self.W1[:, :-1]))
        l2_cost += T.sum(T.sqr(self.W2[:, :-1]))

        if self.l1_weight != 0:
            cost += self.l1_weight * l1_cost

        if self.l2_weight != 0:
            cost += self.l2_weight * l2_cost

        return cost
	def update_params(self, x1, x2, lrate):
		
		#this function samples from the joint posterior and performs
		# a step of gradient ascent on the log-likelihood
		
		sp=self.get_prediction(self.s_past)
		
		sp_big=T.reshape(T.extra_ops.repeat(sp,self.nsamps,axis=1).T,(self.ns, self.npcl*self.nsamps))
		
		#s2_idxs=self.sample_multinomial_vec(self.weights_now,4)
		bsamp=self.theano_rng.multinomial(pvals=T.extra_ops.repeat(T.reshape(self.weights_now,(1,self.npcl)),self.nsamps,axis=0))
		s2_idxs=T.dot(self.idx_vec,bsamp.T)
		
		s2_samps=self.s_now[s2_idxs] #ns by nsamps
		
		s2_big=T.extra_ops.repeat(s2_samps,self.npcl,axis=0).T #ns by npcl*nsamps
		
		diffs=T.sum(T.abs_(sp_big-s2_big)/self.br,axis=0)
		#diffs=T.sum(T.abs_(sp_big-s2_big),axis=0)
		probs_unnorm=self.weights_past*T.exp(-T.reshape(diffs,(self.nsamps,self.npcl)))
		
		#s1_idxs=self.sample_multinomial_mat(probs_unnorm,4)
		s1_idxs=T.dot(self.idx_vec,self.theano_rng.multinomial(pvals=probs_unnorm).T)
		s1_samps=self.s_past[s1_idxs]
		
		x2_recons=T.dot(self.W, s2_samps.T)
		
		s_pred = self.get_prediction(s1_samps)
		
		sterm=-T.mean(T.sum(T.abs_((s2_samps-s_pred)/self.b),axis=1)) - T.sum(T.log(self.b))
		
		#xterm1=-T.mean(T.sum((x1_recons-T.reshape(x1,(self.nx,1)))**2,axis=0)/(2.0*self.xvar**2))
		xterm2=-T.mean(T.sum((x2_recons-T.reshape(x2,(self.nx,1)))**2,axis=0)/(2.0*self.xvar**2))
		
		
		energy = xterm2 + sterm 
		
		learning_params=[self.params[i] for i in range(len(self.params)) if self.rel_lrates[i]!=0.0]
		learning_rel_lrates=[self.rel_lrates[i] for i in range(len(self.params)) if self.rel_lrates[i]!=0.0]
		gparams=T.grad(energy, learning_params, consider_constant=[s1_samps, s2_samps])
		
		updates={}
		
		# constructs the update dictionary
		for gparam, param, rel_lr in zip(gparams, learning_params, learning_rel_lrates):
			#gnat=T.dot(param, T.dot(param.T,param))
			if param==self.M:
				#I do this so the derivative of M doesn't depend on the sparsity parameters
				updates[param] = T.cast(param + gparam*T.reshape(self.b,(1,self.ns))*lrate*rel_lr,'float32')
				#updates[param] = T.cast(param + gparam*lrate*rel_lr,'float32')
			elif param==self.b:
				updates[param] = T.cast(param + gparam*T.reshape(1.0/self.b,(1,self.ns))*lrate*rel_lr,'float32')
			else:
				updates[param] = T.cast(param + gparam*lrate*rel_lr,'float32')
		
		newW=updates[self.W]
		updates[self.W]=newW/T.sqrt(T.sum(newW**2,axis=0))
		
		return energy, updates
Example #20
0
def kulczynski3_theano(X, W, b=None):
    """
    GMean of precision and recall
    """
    XW = T.dot(X, W.T)
    XX = T.abs_(X).sum(axis=1).reshape((-1, 1))
    WW = T.abs_(W).sum(axis=1).reshape((1, -1))
    return T.sqrt((XW / XX) * (XW / WW))
Example #21
0
 def version1(self):
     
     Fhat = self.feedForward(self.dot())
     latFhat = T.dot(T.abs_(Fhat.T), self.distMat.T)
     latFhat = T.dot(latFhat, T.abs_(Fhat))
     self.latFhat = T.diagonal(latFhat)
     
     return self.latFhat
Example #22
0
	def define_loss(self):

		self.pred_func = - TT.sum(TT.abs_(self.e[self.rows,:] + self.r[self.cols,:] - self.e[self.tubes,:]),1)

		self.loss = TT.maximum( 0, self.margin + TT.sum(TT.abs_(self.e[self.rows[:self.batch_size],:] + self.r[self.cols[:self.batch_size],:] - self.e[self.tubes[:self.batch_size],:]),1) \
			- (1.0/self.neg_ratio) * TT.sum(TT.sum(TT.abs_(self.e[self.rows[self.batch_size:],:] + self.r[self.cols[self.batch_size:],:] - self.e[self.tubes[self.batch_size:],:]),1).reshape((int(self.batch_size),int(self.neg_ratio))),1) ).mean()

		self.regul_func = 0 
Example #23
0
def test_grad_clip():
    W = T.fmatrix()
    t = 2.
    y = T.switch(T.abs_(W) > t, t / T.abs_(W) * W, W)

    f = theano.function(inputs=[W], outputs=[y])
    w = [[1, -3], [-4, 1]]
    print f(w)
Example #24
0
	def new_attn_step(self,c_t,g_tm1,m_im1,q):
		cWq = T.stack([T.dot(T.dot(c_t, self.Wb), q)])
        	cWm = T.stack([T.dot(T.dot(c_t, self.Wb), m_im1)])
		z = T.concatenate([c_t,m_im1,q,c_t*q,c_t*m_im1,T.abs_(c_t-q),T.abs_(c_t-m_im1),cWq,cWm],axis=0)
		l_1 = T.dot(self.W1, z) + self.b1
		l_1 = T.tanh(l_1)
		l_2 = T.dot(self.W2,l_1) + self.b2
		return l_2[0]
 def mapped_log_density_theano(self, y):
     n_in_bounds = (tt.abs_(y) < self.b).sum()
     n_dim = y.shape[0]
     x = self.inverse_theano(y[(tt.abs_(y) >= self.b).nonzero()])
     return ((n_in_bounds - n_dim) * tt.log(np.pi * 2) / 2. -
             n_in_bounds * tt.log(2) +
             0.5 * (tt.log(self.beta**2 - 4 * self.alpha *
                    (self.gamma - tt.abs_(x))) - x**2).sum()
             )
 def new_attention_step(self, ct, prev_g, mem, q_q):
     cWq = T.dot(T.ones((1, self.batch_size), dtype=floatX), T.dot(T.dot(ct.T, self.W_b), q_q) * T.eye(n=self.batch_size, m=self.batch_size, dtype=floatX))
     cWm = T.dot(T.ones((1, self.batch_size), dtype=floatX), T.dot(T.dot(ct.T, self.W_b), mem) * T.eye(n=self.batch_size, m=self.batch_size, dtype=floatX))
     z = T.concatenate([ct, mem, q_q, ct * q_q, ct * mem, T.abs_(ct - q_q), T.abs_(ct - mem), cWq, cWm], axis=0)
     
     l_1 = T.dot(self.W_1, z) + self.b_1.dimshuffle(0, 'x')
     l_1 = T.tanh(l_1)
     l_2 = T.dot(self.W_2, l_1) + self.b_2.dimshuffle(0, 'x')
     G = T.nnet.sigmoid(l_2)[0]
     return G
Example #27
0
 def errors(self, y):
     
     # check if y has same dimension of y_pred
     if y.ndim != self.y_pred.ndim:
         raise TypeError('y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type))
     # check if y is of the correct datatype
     if y.dtype.startswith('int'):
         return T.mean(T.abs_(y - self.y_pred))
     else:
         return T.mean(T.abs_(y - self.y_pred))
 def new_attention_step(self, ct, prev_g, mem, q_q):
     cWq = T.stack([T.dot(T.dot(ct, self.W_b), q_q)])
     cWm = T.stack([T.dot(T.dot(ct, self.W_b), mem)])
     z = T.concatenate([ct, mem, q_q, ct * q_q, ct * mem, T.abs_(ct - q_q), T.abs_(ct - mem), cWq, cWm])
     
     l_1 = T.dot(self.W_1, z) + self.b_1
     l_1 = T.tanh(l_1)
     l_2 = T.dot(self.W_2, l_1) + self.b_2
     G = T.nnet.sigmoid(l_2)[0]
     return G
Example #29
0
    def theano_setup(self):
    
        # The matrices Wb and Wc were originally tied.
        # Because of that, I decided to keep Wb and Wc with
        # the same shape (instead of being transposed) to
        # avoid disturbing the code as much as possible.

        Wb = T.dmatrix('Wb')
        Wc = T.dmatrix('Wc')
        b = T.dvector('b')
        c = T.dvector('c')
        x = T.dmatrix('x')
    
        s = T.dot(x, Wc) + c
        # h = 1 / (1 + T.exp(-s))
        # h = T.nnet.sigmoid(s)
        h = T.tanh(s)
        # r = T.dot(h,W.T) + b
        # r = theano.printing.Print("r=")(2*T.tanh(T.dot(h,W.T) + b))
        ract = T.dot(h, Wb.T) + b
        r = self.output_scaling_factor * T.tanh(ract)
    
        # Another variable to be able to call a function
        # with a noisy x and compare it to a reference x.
        y = T.dmatrix('y')

        loss = ((r - y)**2)
        sum_loss = T.sum(loss)
        
        # theano_encode_decode : vectorial function in argument X.
        # theano_loss : vectorial function in argument X.
        # theano_gradients : returns triplet of gradients, each of
        #                    which involves the all data X summed
        #                    so it's not a "vectorial" function.

        self.theano_encode_decode = function([Wb,Wc,b,c,x], r)
        self.theano_loss = function([Wb,Wc,b,c,x,y], [loss, T.abs_(s), T.abs_(ract)])
        self.theano_gradients = function([Wb,Wc,b,c,x,y],
                                         [T.grad(sum_loss, Wb), T.grad(sum_loss, Wc),
                                          T.grad(sum_loss, b),  T.grad(sum_loss, c)])

        # other useful theano functions for the experiments that involve
        # adding noise to the hidden states
        self.theano_encode = function([Wc,c,x], h)
        self.theano_decode = function([Wb,b,h], r)

        # A non-vectorial implementation of the jacobian
        # of the encoder. Meant to be used with only one x
        # at a time, returning a matrix.
        jacob_x = T.dvector('jacob_x')
        jacob_c = T.dvector('jacob_c')
        jacob_Wc = T.dmatrix('jacob_Wc')
        jacob_s = T.dot(jacob_x, jacob_Wc) + jacob_c
        jacob_h = T.tanh(jacob_s)
        self.theano_encoder_jacobian_single = function([jacob_Wc,jacob_c,jacob_x], gradient.jacobian(jacob_h,jacob_x,consider_constant=[jacob_Wc,jacob_c]))
Example #30
0
def get_model(input_var, target_var, multiply_var):

    # input layer with unspecified batch size
    layer_input     = InputLayer(shape=(None, 30, 80, 80), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    layer_0         = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3))

    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_1         = batch_norm(Conv3DDNNLayer(incoming=layer_0, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_2         = batch_norm(Conv3DDNNLayer(incoming=layer_1, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_3         = MaxPool3DDNNLayer(layer_2, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1))
    layer_4         = DropoutLayer(layer_3, p=0.25)

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_5         = batch_norm(Conv3DDNNLayer(incoming=layer_4, num_filters=32, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_6         = batch_norm(Conv3DDNNLayer(incoming=layer_5, num_filters=32, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_7         = MaxPool3DDNNLayer(layer_6, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1))
    layer_8         = DropoutLayer(layer_7, p=0.25)
    
    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer_5         = batch_norm(Conv3DDNNLayer(incoming=layer_8, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_6         = batch_norm(Conv3DDNNLayer(incoming=layer_5, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_7         = batch_norm(Conv3DDNNLayer(incoming=layer_6, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify))
    layer_8         = MaxPool3DDNNLayer(layer_7, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1))
    layer_9         = DropoutLayer(layer_8, p=0.25)

    # LSTM
    layer         = DimshuffleLayer(layer_9, (0,2,1,3,4))
#    layer_prediction  = LSTMLayer(layer, num_units=2, only_return_final=True, learn_init=True, cell=Gate(linear))
    layer = LSTMLayer(layer, num_units=2, only_return_final=True, learn_init=True)
    layer_prediction = DenseLayer(layer, 2, nonlinearity=linear)

    # Output Layer
    # layer_hidden         = DenseLayer(layer_flatten, 500, nonlinearity=linear)
    # layer_prediction     = DenseLayer(layer_hidden, 2, nonlinearity=linear)

    # Loss
    prediction           = get_output(layer_prediction) / multiply_var**2
    loss                 = T.abs_(prediction - target_var)
    loss                 = loss.mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params               = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_prediction, deterministic=True) / multiply_var**2
    test_loss            = T.abs_(test_prediction - target_var)
    test_loss            = test_loss.mean()

    # crps estimate
    crps                 = T.abs_(test_prediction - target_var).mean()/600
    
    return test_prediction, crps, loss, params
Example #31
0
def compute_norms(array, norm_axes=None):
    """ Compute incoming weight vector norms.

    Parameters
    ----------
    array : numpy array or Theano expression
        Weight or bias.
    norm_axes : sequence (list or tuple)
        The axes over which to compute the norm.  This overrides the
        default norm axes defined for the number of dimensions
        in `array`. When this is not specified and `array` is a 2D array,
        this is set to `(0,)`. If `array` is a 3D, 4D or 5D array, it is
        set to a tuple listing all axes but axis 0. The former default is
        useful for working with dense layers, the latter is useful for 1D,
        2D and 3D convolutional layers.
        Finally, in case `array` is a vector, `norm_axes` is set to an empty
        tuple, and this function will simply return the absolute value for
        each element. This is useful when the function is applied to all
        parameters of the network, including the bias, without distinction.
        (Optional)

    Returns
    -------
    norms : 1D array or Theano vector (1D)
        1D array or Theano vector of incoming weight/bias vector norms.

    Examples
    --------
    >>> array = np.random.randn(100, 200)
    >>> norms = compute_norms(array)
    >>> norms.shape
    (200,)

    >>> norms = compute_norms(array, norm_axes=(1,))
    >>> norms.shape
    (100,)
    """

    # Check if supported type
    if not isinstance(array, theano.Variable) and \
       not isinstance(array, np.ndarray):
        raise RuntimeError("Unsupported type {}. "
                           "Only theano variables and numpy arrays "
                           "are supported".format(type(array)))

    # Compute default axes to sum over
    ndim = array.ndim
    if norm_axes is not None:
        sum_over = tuple(norm_axes)
    elif ndim == 1:  # For Biases that are in 1d (e.g. b of DenseLayer)
        sum_over = ()
    elif ndim == 2:  # DenseLayer
        sum_over = (0, )
    elif ndim in [3, 4, 5]:  # Conv{1,2,3}DLayer
        sum_over = tuple(range(1, ndim))
    else:
        raise ValueError("Unsupported tensor dimensionality {}. "
                         "Must specify `norm_axes`".format(array.ndim))

    # Run numpy or Theano norm computation
    if isinstance(array, theano.Variable):
        # Apply theano version if it is a theano variable
        if len(sum_over) == 0:
            norms = T.abs_(array)  # abs if we have nothing to sum over
        else:
            norms = T.sqrt(T.sum(array**2, axis=sum_over))
    elif isinstance(array, np.ndarray):
        # Apply the numpy version if ndarray
        if len(sum_over) == 0:
            norms = abs(array)  # abs if we have nothing to sum over
        else:
            norms = np.sqrt(np.sum(array**2, axis=sum_over))

    return norms
Example #32
0
if use.load:
    args["W"], args["b"] = load_params()

layers.append(LogRegr(out, **args))
"""
layers[-1] : softmax layer
layers[-2] : hidden layer (video if late fusion)
layers[-3] : hidden layer (trajectory, only if late fusion)
"""

# cost function
cost = layers[-1].negative_log_likelihood(y)

if reg.L1_vid > 0 or reg.L2_vid > 0:
    # L1 and L2 regularization
    L1 = T.abs_(layers[-2].W).sum() + T.abs_(layers[-1].W).sum()
    L2 = (layers[-2].W**2).sum() + (layers[-1].W**2).sum()

    cost += reg.L1_vid * L1 + reg.L2_vid * L2

if net.fusion == "late":
    L1_traj = T.abs_(layers[-3].W).sum()
    L2_traj = (layers[-3].W**2).sum()
    cost += reg.L1_traj * L1_traj + reg.L2_traj * L2_traj

# function computing the number of errors
errors = layers[-1].errors(y)

# gradient descent
# ------------------------------------------------------------------------------
Example #33
0
def SGMGNHT_2(tparams, cost, inps, ntrain, lr, iterations, rho=0.9, epsilon=1e-6, resamp = 50, clip_norm=1):
    """ Additional parameters """
    mom_tparams = OrderedDict()
    xi_tparams = OrderedDict()
    #rng = np.random.RandomState(3435)
    #+ rng.normal(0,1,p0.shape())
    for k, p0 in tparams.iteritems():
        mom_tparams[k] = theano.shared(p0.get_value() * 0. +1e-1, name='%s_mom'%k) 
        xi_tparams[k] = theano.shared(p0.get_value() * 0. + 10.0, name='%s_xi'%k) 
    
    #a = theano.shared(numpy_floatX(2.))
    # m = theano.shared(numpy_floatX(1.))
    # c = theano.shared(numpy_floatX(1.))
    # sigma_p = theano.shared(numpy_floatX(10.))
    # sigma_xi = theano.shared(numpy_floatX(0.01))
    # sigma_theta = theano.shared(numpy_floatX(0.1))
    # gamma = theano.shared(numpy_floatX(1.))
    
    m = theano.shared(numpy_floatX(1.))
    c = theano.shared(numpy_floatX(3.))
    sigma_p = theano.shared(numpy_floatX(0.01))
    sigma_mom = theano.shared(numpy_floatX(10.))
    sigma_xi = theano.shared(numpy_floatX(0.01))
    gamma = theano.shared(numpy_floatX(1.0))
    
    logger = logging.getLogger('eval_ptb_sgmgnht')
    logger.setLevel(logging.INFO)
    fh = logging.FileHandler('eval_ptb_sgmgnht.log')
    logger.info('a = 1, m {} c {} s_p{} s_mom{} s_xi{} g_xi{}'.format( m.get_value(), c.get_value(), sigma_p.get_value(), sigma_mom.get_value(), sigma_xi.get_value(), gamma.get_value()))
    
    p = tensor.vector('p', dtype='float32')
    
    """ default: lr=0.001 """
    
    trng = RandomStreams(123)
    
    grads = tensor.grad(cost, tparams.values())
    
    # clip norm
    norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads]))
    if tensor.ge(norm, clip_norm):
        grads = [g*clip_norm/norm for g in grads]
        
    gshared = [theano.shared(p0.get_value() * 0., name='%s_grad'%k) 
                for k, p0 in tparams.iteritems()]
    gsup = [(gs, g) for gs, g in zip(gshared, grads)]
    f_grad_shared = theano.function(inps, cost, updates=gsup)     
    
    updates = []
 
    for p, mom, xi, g in zip(tparams.values(),mom_tparams.values(),xi_tparams.values(), gshared):
        
        g_f = (tensor.sqrt(tensor.abs_(mom+1e-100)))/m
        K_f = g_f + 4/c/(1 + tensor.exp(c*g_f))
        
        psi_f_1 = -1 + 2/( 1 + tensor.exp(-c*g_f))
        f1_f_1 = 1/2.0/m**2 *psi_f_1**2 /g_f*tensor.sgn(mom)
        #f1_f_1 = 1/2.0/m*psi_f_1**2* tensor.abs_(mom+1e-100)**(-1/2) *tensor.sgn(mom)
        psi_grad_f_1 = 2*c*tensor.exp(- c*g_f)/(1 + tensor.exp(-c*g_f))**2
        f3_f_1 =  f1_f_1**2 - 1/2.0/m**2 * psi_f_1 * psi_grad_f_1 / tensor.abs_(mom) + 1/4.0/m * psi_f_1**2 * (tensor.abs_(mom+1e-100)**(-1.5))
        
        
#        psi_f = (tensor.exp(c*g_f) - 1)/(tensor.exp(c*g_f) + 1)
#        f1_f = 1/2/m*psi_f**2 * (tensor.abs_(mom+1e-100)**(-1/2))*tensor.sgn(mom)
#        psi_grad_f = 2*c*tensor.exp(c*g_f)/(tensor.exp(c*g_f) + 1)**2
#        f3_f =  f1_f**2 - c/2/m**2 * psi_f * psi_grad_f / tensor.abs_(mom) + 1/4/m * psi_f**2 * (tensor.abs_(mom+1e-100)**(-3/2))
 
#        temp_f1 = tensor.switch(tensor.ge(g_f,0), f1_f_1, f1_f)
#        temp_f3 = tensor.switch(tensor.ge(g_f,0), f3_f_1, f3_f)       

        temp_f1 = f1_f_1
        temp_f3 = f3_f_1
        
        noise_p = trng.normal(p.get_value().shape, avg = 0.0, std = 1., 
                          dtype='float32')
        noise_mom = trng.normal(p.get_value().shape, avg = 0.0, std = 1., 
                          dtype='float32')
        noise_xi = trng.normal(p.get_value().shape, avg = 0.0, std = 1., 
                          dtype='float32')

        # generata gamma(a,2): N(0,1)^2 = gamma(1/2,2)
        noise_temp = tensor.zeros(p.get_value().shape)
        for aa in xrange(4):
            this_noise = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32')
            noise_temp = tensor.inc_subtensor(noise_temp[:], this_noise**2)
        randmg = (noise_temp*m/2)**2*tensor.sgn(trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32'))    

        updated_p = p +  temp_f1 * lr - g * lr * ntrain * sigma_p + tensor.sqrt(2*sigma_p*lr) * noise_p
        updated_mom = (mom - temp_f1* xi *lr  - g * lr * ntrain + tensor.sqrt(2*sigma_mom*lr) * noise_mom)* (1-tensor.eq(tensor.mod(iterations,resamp),0)) + randmg * tensor.eq(tensor.mod(iterations,resamp),0)
        #updated_mom = mom - temp_f1* xi *lr  - g * lr * ntrain + tensor.sqrt(2*sigma_p*lr) * noise_p
        temp_xi = trng.normal(p.get_value().shape, avg = sigma_mom, std = tensor.sqrt(sigma_xi/2) , dtype='float32')
        updated_xi = (xi + temp_f3* gamma * lr - (xi - sigma_mom)*sigma_xi/(gamma+1e-10)*lr + tensor.sqrt(2*sigma_xi*lr) * noise_xi) * (1-tensor.eq(tensor.mod(iterations,resamp),resamp/2)) + temp_xi * tensor.eq(tensor.mod(iterations,resamp),resamp/2)

        updates.append((p, updated_p))
        updates.append((mom, updated_mom))
        updates.append((xi, updated_xi))
    
    f_update = theano.function([lr,ntrain,iterations], [p,mom,xi], updates=updates)
    #f_params = theano.function([], [a, m, c, mom.shape])
    return f_grad_shared, f_update
Example #34
0
 def l1(self, s1w, s2w):
     return -T.abs_(s1w - s2w)
def run_mlp(step, momentum, decay, n_hidden_full, n_hidden_conv,
            hidden_full_transfers, hidden_conv_transfers, filter_shapes,
            pool_size, par_std, batch_size, opt, L2, counter, X, Z, TX, TZ,
            image_height, image_width, nouts):

    print step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2, counter, image_height, image_width, nouts
    seed = 3453
    np.random.seed(seed)
    batch_size = batch_size
    #max_iter = max_passes * X.shape[ 0] / batch_size
    max_iter = 25000000
    n_report = X.shape[0] / batch_size
    weights = []
    #input_size = len(X[0])
    #Normalize
    mean = X.mean(axis=0)
    std = (X - mean).std()
    X = (X - mean) / std
    TX = (TX - mean) / std

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    optimizer = opt, {'step_rate': step, 'momentum': momentum, 'decay': decay}

    typ = 'Lenet'

    if typ == 'Lenet':
        m = Lenet(image_height,
                  image_width,
                  1,
                  X,
                  Z,
                  n_hiddens_conv=n_hidden_conv,
                  filter_shapes=filter_shapes,
                  pool_shapes=pool_size,
                  n_hiddens_full=n_hidden_full,
                  n_output=nouts,
                  hidden_transfers_conv=hidden_conv_transfers,
                  hidden_transfers_full=hidden_full_transfers,
                  out_transfer='identity',
                  loss='squared',
                  optimizer=optimizer,
                  batch_size=batch_size,
                  max_iter=max_iter)

    elif typ == 'SimpleCnn2d':
        m = SimpleCnn2d(2099, [400, 100],
                        1,
                        X,
                        Z,
                        TX,
                        TZ,
                        hidden_transfers=['tanh', 'tanh'],
                        out_transfer='identity',
                        loss='squared',
                        p_dropout_inpt=.1,
                        p_dropout_hiddens=.2,
                        optimizer=optimizer,
                        batch_size=batch_size,
                        max_iter=max_iter)

    climin.initialize.randomize_normal(m.parameters.data, 0, par_std)
    #m.parameters.data[...] = np.random.normal(0, 0.01, m.parameters.data.shape)

    # Transform the test data
    #TX = m.transformedData(TX)
    m.init_weights()
    TX = np.array([TX for _ in range(10)]).mean(axis=0)
    print TX.shape

    losses = []
    print 'max iter', max_iter

    X, Z, TX, TZ = [
        breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ)
    ]

    for layer in m.lenet.mlp.layers:
        weights.append(m.parameters[layer.weights])

    weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum()
                    #+ (weights[2]**2).sum()
                    )

    weight_decay /= m.exprs['inpt'].shape[0]
    m.exprs['true_loss'] = m.exprs['loss']
    c_wd = L2
    m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay

    mae = T.abs_((m.exprs['output'] * np.std(train_labels) +
                  np.mean(train_labels)) - m.exprs['target']).mean()
    f_mae = m.function(['inpt', 'target'], mae)

    rmse = T.sqrt(
        T.square((m.exprs['output'] * np.std(train_labels) +
                  np.mean(train_labels)) - m.exprs['target']).mean())
    f_rmse = m.function(['inpt', 'target'], rmse)

    start = time.time()
    # Set up a nice printout.
    keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test'
    max_len = max(len(i) for i in keys)
    header = '\t'.join(i for i in keys)
    print header
    print '-' * len(header)
    results = open('result_hp.txt', 'a')
    results.write(header + '\n')
    results.write('-' * len(header) + '\n')
    results.close()

    EXP_DIR = os.getcwd()
    base_path = os.path.join(EXP_DIR, "pars_hp" + str(counter) + ".pkl")
    n_iter = 0

    if os.path.isfile(base_path):
        with open("pars_hp" + str(counter) + ".pkl", 'rb') as tp:
            n_iter, best_pars = cp.load(tp)
            m.parameters.data[...] = best_pars

    for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)):
        if info['n_iter'] % n_report != 0:
            continue
        passed = time.time() - start
        losses.append((info['loss'], info['val_loss']))
        info.update({
            'time': passed,
            'mae_train': f_mae(X, train_labels),
            'rmse_train': f_rmse(X, train_labels),
            'mae_test': f_mae(TX, test_labels),
            'rmse_test': f_rmse(TX, test_labels)
        })

        info['n_iter'] += n_iter

        row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info
        results = open('result_hp.txt', 'a')
        print row
        results.write(row + '\n')
        results.close()
        with open("pars_hp" + str(counter) + ".pkl", 'wb') as fp:
            cp.dump((info['n_iter'], info['best_pars']), fp)
        with open("hps" + str(counter) + ".pkl", 'wb') as tp:
            cp.dump((step, momentum, decay, n_hidden_full, n_hidden_conv,
                     hidden_full_transfers, hidden_conv_transfers,
                     filter_shapes, pool_size, par_std, batch_size, opt, L2,
                     counter, info['n_iter']), tp)

    m.parameters.data[...] = info['best_pars']
    cp.dump(info['best_pars'], open('best_pars.pkl', 'wb'))

    Y = m.predict(X)
    TY = m.predict(TX)

    output_train = Y * np.std(train_labels) + np.mean(train_labels)
    output_test = TY * np.std(train_labels) + np.mean(train_labels)

    print 'TRAINING SET\n'
    print('MAE:  %5.2f kcal/mol' %
          np.abs(output_train - train_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol' %
          np.square(output_train - train_labels).mean(axis=0)**.5)

    print 'TESTING SET\n'
    print('MAE:  %5.2f kcal/mol' %
          np.abs(output_test - test_labels).mean(axis=0))
    print('RMSE: %5.2f kcal/mol' %
          np.square(output_test - test_labels).mean(axis=0)**.5)

    mae_train = np.abs(output_train - train_labels).mean(axis=0)
    rmse_train = np.square(output_train - train_labels).mean(axis=0)**.5
    mae_test = np.abs(output_test - test_labels).mean(axis=0)
    rmse_test = np.square(output_test - test_labels).mean(axis=0)**.5

    results = open('result.txt', 'a')
    results.write('Training set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" % mae_train)
    results.write('\nRMSE:\n')
    results.write("%5.2f" % rmse_train)
    results.write('\nTesting set:\n')
    results.write('MAE:\n')
    results.write("%5.2f" % mae_test)
    results.write('\nRMSE:\n')
    results.write("%5.2f" % rmse_test)

    results.close()
def do_regression(
        num_epochs=60,  # No. of epochs to train
        init_file=None,  # Saved parameters to initialise training
        epoch_size=680780,  # Whole dataset size
        valid_size=34848,
        train_batch_multiple=10637,  # No. of minibatches per batch
        valid_batch_multiple=1089,  # No. of minibatches per batch
        train_minibatch_size=64,
        valid_minibatch_size=32,
        eval_multiple=50,  # No. of minibatches to ave. in report
        save_model=True,
        input_width=19,
        rng_seed=100009,
        cross_val=0,  # Cross-validation subset label
        dataver=1,  # Label for different runs/architectures/etc
        rate_init=1.0,
        rate_decay=0.999983):

    ###################################################
    ################# 0. User inputs ##################
    ###################################################
    for i in range(1, len(sys.argv)):
        if sys.argv[i].startswith('-'):
            option = sys.argv[i][1:]
            if option == 'i': init_file = sys.argv[i + 1]
            elif option[0:2] == 'v=': dataver = int(option[2:])
            elif option[0:3] == 'cv=': cross_val = int(option[3:])
            elif option[0:3] == 'rs=': rng_seed = int(option[3:])
            elif option[0:3] == 'ri=': rate_init = np.float32(option[3:])
            elif option[0:3] == 'rd=': rate_decay = np.float32(option[3:])

    print("Running with dataver %s" % (dataver))
    print("Running with cross_val %s" % (cross_val))

    ###################################################
    ############# 1. Housekeeping values ##############
    ###################################################
    # Batch size is possibly not equal to epoch size due to memory limits
    train_batch_size = train_batch_multiple * train_minibatch_size
    assert epoch_size >= train_batch_size

    # Number of times we expect the training/validation generator to be called
    max_train_gen_calls = (num_epochs * epoch_size) // train_batch_size

    # Number of evaluations (total minibatches / eval_multiple)
    num_eval = max_train_gen_calls * train_batch_multiple / eval_multiple

    ###################################################
    ###### 2. Define model and theano variables #######
    ###################################################
    if rng_seed is not None:
        print("Setting RandomState with seed=%i" % (rng_seed))
        rng = np.random.RandomState(rng_seed)
        set_rng(rng)

    print("Defining variables...")
    index = T.lscalar()  # Minibatch index
    x = T.tensor3('x')  # Inputs
    y = T.fvector('y')  # Target

    print("Defining model...")
    network_0 = build_1Dregression_v1(input_var=x,
                                      input_width=input_width,
                                      nin_units=12,
                                      h_num_units=[64, 128, 256, 128, 64],
                                      h_grad_clip=1.0,
                                      output_width=1)

    if init_file is not None:
        print("Loading initial model parametrs...")
        init_model = np.load(init_file)
        init_params = init_model[init_model.files[0]]
        LL.set_all_param_values([network_0], init_params)

    ###################################################
    ################ 3. Import data ###################
    ###################################################
    # Loading data generation model parameters
    print("Defining shared variables...")
    train_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX),
                                borrow=True)
    train_set_x = theano.shared(np.zeros((1, 1, 1),
                                         dtype=theano.config.floatX),
                                borrow=True)

    valid_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX),
                                borrow=True)
    valid_set_x = theano.shared(np.zeros((1, 1, 1),
                                         dtype=theano.config.floatX),
                                borrow=True)

    # Validation data (pick a single augmented instance, rand0 here)
    print("Creating validation data...")
    chunk_valid_data = np.load(
        "./valid/data_valid_augmented_cv%s_t%s_rand0.npy" %
        (cross_val, input_width)).astype(theano.config.floatX)
    chunk_valid_answers = np.load("./valid/data_valid_expected_cv%s.npy" %
                                  (cross_val)).astype(theano.config.floatX)

    print "chunk_valid_answers.shape", chunk_valid_answers.shape
    print("Assigning validation data...")
    valid_set_y.set_value(chunk_valid_answers[:])
    valid_set_x.set_value(chunk_valid_data.transpose(0, 2, 1))

    # Create output directory
    if not os.path.exists("output_cv%s_v%s" % (cross_val, dataver)):
        os.makedirs("output_cv%s_v%s" % (cross_val, dataver))

    ###################################################
    ########### 4. Create Loss expressions ############
    ###################################################
    print("Defining loss expressions...")
    prediction_0 = LL.get_output(network_0)
    train_loss = aggregate(T.abs_(prediction_0 - y.dimshuffle(0, 'x')))

    valid_prediction_0 = LL.get_output(network_0, deterministic=True)
    valid_loss = aggregate(T.abs_(valid_prediction_0 - y.dimshuffle(0, 'x')))

    ###################################################
    ############ 5. Define update method  #############
    ###################################################
    print("Defining update choices...")
    params = LL.get_all_params(network_0, trainable=True)
    learn_rate = T.scalar('learn_rate', dtype=theano.config.floatX)

    updates = lasagne.updates.adadelta(train_loss,
                                       params,
                                       learning_rate=learn_rate)

    ###################################################
    ######### 6. Define train/valid functions #########
    ###################################################
    print("Defining theano functions...")
    train_model = theano.function(
        [index, learn_rate],
        train_loss,
        updates=updates,
        givens={
            x:
            train_set_x[(index * train_minibatch_size):((index + 1) *
                                                        train_minibatch_size)],
            y:
            train_set_y[(index * train_minibatch_size):((index + 1) *
                                                        train_minibatch_size)]
        })

    validate_model = theano.function(
        [index],
        valid_loss,
        givens={
            x:
            valid_set_x[index * valid_minibatch_size:(index + 1) *
                        valid_minibatch_size],
            y:
            valid_set_y[index * valid_minibatch_size:(index + 1) *
                        valid_minibatch_size]
        })

    ###################################################
    ################ 7. Begin training ################
    ###################################################
    print("Begin training...")
    sys.stdout.flush()

    cum_iterations = 0
    this_train_loss = 0.0
    this_valid_loss = 0.0
    best_valid_loss = np.inf
    best_iter = 0

    train_eval_scores = np.empty(num_eval)
    valid_eval_scores = np.empty(num_eval)
    eval_index = 0
    aug_index = 0

    for batch in xrange(max_train_gen_calls):
        start_time = time.time()
        chunk_train_data = np.load(
            "./train/data_train_augmented_cv%s_t%s_rand%s.npy" %
            (cross_val, input_width, aug_index)).astype(theano.config.floatX)
        chunk_train_answers = np.load("./train/data_train_expected_cv%s.npy" %
                                      (cross_val)).astype(theano.config.floatX)

        train_set_y.set_value(chunk_train_answers[:])
        train_set_x.set_value(chunk_train_data.transpose(0, 2, 1))

        # Iterate over minibatches in each batch
        for mini_index in xrange(train_batch_multiple):
            this_rate = np.float32(rate_init * (rate_decay**cum_iterations))
            this_train_loss += train_model(mini_index, this_rate)
            cum_iterations += 1

            # Report loss
            if (cum_iterations % eval_multiple == 0):
                this_train_loss = this_train_loss / eval_multiple
                this_valid_loss = np.mean(
                    [validate_model(i) for i in xrange(valid_batch_multiple)])
                train_eval_scores[eval_index] = this_train_loss
                valid_eval_scores[eval_index] = this_valid_loss

                # Save report every five evaluations
                if ((eval_index + 1) % 5 == 0):
                    np.savetxt("output_cv%s_v%s/training_scores.txt" %
                               (cross_val, dataver),
                               train_eval_scores,
                               fmt="%.5f")
                    np.savetxt("output_cv%s_v%s/validation_scores.txt" %
                               (cross_val, dataver),
                               valid_eval_scores,
                               fmt="%.5f")
                    np.savetxt("output_cv%s_v%s/last_learn_rate.txt" %
                               (cross_val, dataver), [np.array(this_rate)],
                               fmt="%.5f")

                # Save model if best validation score
                if (this_valid_loss < best_valid_loss):
                    best_valid_loss = this_valid_loss
                    best_iter = cum_iterations - 1

                    if save_model:
                        np.savez(
                            "output_cv%s_v%s/model.npz" % (cross_val, dataver),
                            LL.get_all_param_values(network_0))

                # Reset evaluation reports
                eval_index += 1
                this_train_loss = 0.0
                this_valid_loss = 0.0

        aug_index += 1

        end_time = time.time()
        print("Computing time for batch %d: %f" %
              (batch, end_time - start_time))

    print("Best validation loss %f after %d epochs" %
          (best_valid_loss, (best_iter * train_minibatch_size // epoch_size)))

    del train_set_x, train_set_y, valid_set_x, valid_set_y
    gc.collect()

    return None
Example #37
0
 def _step(i, t, s):
     t *= (i - b) * value / i
     step = t / (a + i)
     s += step
     return ((t, s), until(tt.abs_(step) < threshold))
Example #38
0
def SGMGHMC_old(tparams, cost, inps, ntrain, lr, iterations, rho=0.9, epsilon=1e-6, a_i = 2, clip_norm=5):
    """ Additional parameters """
    mom_tparams = OrderedDict()
    xi_tparams = OrderedDict()
    for k, p0 in tparams.iteritems():
        mom_tparams[k] = theano.shared(p0.get_value() * 0. + 1e-10, name='%s_mom'%k) 
        xi_tparams[k] = theano.shared(p0.get_value() * 0. + 1e-10, name='%s_xi'%k) 
    
    a = theano.shared(numpy_floatX(1.))
    m = theano.shared(numpy_floatX(1.))
    c = theano.shared(numpy_floatX(5.))
    sigma_p = theano.shared(numpy_floatX(10.))
    sigma_xi = theano.shared(numpy_floatX(1.))
    gamma_xi = theano.shared(numpy_floatX(0.001))
    logger = logging.getLogger('eval_ptb_sgmgnht')
    logger.setLevel(logging.INFO)
    fh = logging.FileHandler('eval_ptb_sgmgnht.log')
    logger.info('a {} m {} c {} s_p{} s_xi{} g_xi{}'.format(a.get_value(), m.get_value(), c.get_value(), sigma_p.get_value(), sigma_xi.get_value(), gamma_xi.get_value()))
    
    p = tensor.vector('p', dtype='float32')
    
    """ default: lr=0.001 """
    
    trng = RandomStreams(123)
    
    grads = tensor.grad(cost, tparams.values())
    norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads]))
    if tensor.ge(norm, clip_norm):
        grads = [g*clip_norm/norm for g in grads]
        
    gshared = [theano.shared(p0.get_value() * 0., name='%s_grad'%k) 
                for k, p0 in tparams.iteritems()]
    gsup = [(gs, g) for gs, g in zip(gshared, grads)]
    f_grad_shared = theano.function(inps, cost, updates=gsup)     
    
    updates = []
 
    for p, mom, xi, g in zip(tparams.values(),mom_tparams.values(),xi_tparams.values(), gshared):
        
        g_f = tensor.sgn(mom)/m*(tensor.abs_(mom)**(1/a))
        K_f = -g_f + 2/c*(c*g_f + tensor.log(1+tensor.exp(-c*g_f)))
        
        psi_f_1 = (1- tensor.exp(-c*g_f) )/( 1 + tensor.exp(-c*g_f) )
        f1_f_1 = 1/m/a*psi_f_1*(tensor.abs_(mom+1e-100)**(1/a-1))
        psi_grad_f_1 = 2*c*tensor.exp(- c*g_f)/(1 + tensor.exp(-c*g_f))**2
        f3_f_1 =  1/m**2/a**2*(psi_f_1**2-psi_grad_f_1)*tensor.abs_(mom+1e-100)**(2/a-2) - (1/a-1)/m/a*psi_f_1*tensor.sgn(mom)*tensor.abs_(mom+1e-100)**(1/a-2)
        
        psi_f = (tensor.exp(c*g_f) - 1)/(tensor.exp(c*g_f) + 1)
        f1_f = 1/m/a*psi_f*(tensor.abs_(mom+1e-100)**(1/a-1))
        psi_grad_f = 2*c*tensor.exp(c*g_f)/(tensor.exp(c*g_f) + 1)**2
        f3_f =  1/m**2/a**2*(psi_f**2-psi_grad_f)*tensor.abs_(mom+1e-100)**(2/a-2) - (1/a-1)/m/a*psi_f*tensor.sgn(mom)*tensor.abs_(mom+1e-100)**(1/a-2)
 
        temp_f1 = tensor.switch(tensor.ge(g_f,0), f1_f_1, f1_f)
        temp_f3 = tensor.switch(tensor.ge(g_f,0), f3_f_1, f3_f)       


        noise_p = trng.normal(p.get_value().shape, avg = 0.0, std = 1., 
                          dtype='float32')
        noise_xi = trng.normal(p.get_value().shape, avg = 0.0, std = 1., 
                          dtype='float32')
        # generata gamma(a,2): N(0,1)^2 = gamma(1/2,2)
        noise_temp = tensor.zeros(p.get_value().shape)
        for aa in xrange(a_i*2):
            this_noise = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32')
            noise_temp = tensor.inc_subtensor(noise_temp[:], this_noise**2)
        randmg = (noise_temp*m/2)**a*tensor.sgn(trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32'))    

        
        updated_p = p +  temp_f1 * lr
	updated_mom = (mom - temp_f1* xi *lr  - g * lr * ntrain + tensor.sqrt(2*sigma_p*lr) * noise_p)* (1-tensor.eq(tensor.mod(iterations,100),0)) + randmg * tensor.eq(tensor.mod(iterations,100),0)
        #updated_mom = mom - temp_f1* xi *lr  - g * lr * ntrain + tensor.sqrt(2*sigma_p*lr) * noise_p
        temp_xi = trng.normal(p.get_value().shape, avg = sigma_p, std = tensor.sqrt(sigma_xi/2) , dtype='float32')
        updated_xi = (xi + temp_f3* sigma_xi * lr - (xi - sigma_p)*gamma_xi*lr + tensor.sqrt(2*sigma_xi*gamma_xi*lr) * noise_xi) * (1-tensor.eq(tensor.mod(iterations,100),50)) + temp_xi * tensor.eq(tensor.mod(iterations,100),50)
        

        updates.append((p, updated_p))
        updates.append((mom, updated_mom))
        updates.append((xi, updated_xi))
    
    f_update = theano.function([lr,ntrain,iterations], [p,mom,xi], updates=updates)
    #f_params = theano.function([], [a, m, c, mom.shape])
    return f_grad_shared, f_update
Example #39
0
def mae_clip(y_true, y_pred):
    """Return the MAE with clipping to provide resistance to outliers"""
    CLIP_VALUE = 6
    return T.clip(T.abs_(y_true - y_pred), 0, CLIP_VALUE).mean(axis=-1)
Example #40
0
# BEGAN variables
Disc_output_real = lasagne.layers.get_output(Disc_out_layer,inputs=input_var)
Disc_output_fake = lasagne.layers.get_output(Disc_out_layer,
                                              inputs=lasagne.layers.get_output(Gen_out_layer,
                                                                               inputs=noise_var))
Gen_output = lasagne.layers.get_output(Gen_out_layer,inputs=noise_var)

# Classifier variables
NetB_all_input = T.concatenate([lasagne.layers.get_output(Gen_out_layer,inputs=noise_var),
                                input_var], axis=0)

NetB_output_all = T.add(lasagne.layers.get_output(networkBOut,inputs=NetB_all_input),np.finfo(np.float32).eps)

# BEGAN losses
Disc_loss_real = T.abs_(Disc_output_real - input_var)
Disc_loss_real = Disc_loss_real.mean()

Disc_loss_fake = T.abs_(Disc_output_fake - Gen_output)
Disc_loss_fake = Disc_loss_fake.mean()

Gen_loss = T.abs_(Disc_output_fake - Gen_output)
Gen_loss = Gen_loss.mean()

# Classifier losses
#NetB_loss_fake = lasagne.objectives.categorical_crossentropy(NetB_output_fake,targets_fake)
#NetB_loss_fake = NetB_loss_fake.mean()

NetB_loss_all = lasagne.objectives.binary_crossentropy(NetB_output_all,target_varB)
NetB_loss_all = NetB_loss_all.mean()
Example #41
0
def student_t_likelihood(
    new_cases_inferred,
    pr_beta_sigma_obs=30,
    nu=4,
    offset_sigma=1,
    model=None,
    data_obs=None,
    name_student_t="_new_cases_studentT",
    name_sigma_obs="sigma_obs"
):
    """
        Set the likelihood to apply to the model observations (`model.new_cases_obs`)
        We assume a student-t distribution, the mean of the distribution matches `new_cases_inferred` as provided.

        Parameters
        ----------
        new_cases_inferred : array
            One or two dimensonal array.
            If 2 dimensional, the first dimension is time and the second are the
            regions/countries

        pr_beta_sigma_obs : float

        nu : float
            How flat the tail of the distribution is. Larger nu should  make the model
            more robust to outliers

        offset_sigma : float

        model:
            The model on which we want to add the distribution

        data_obs : array
            The data that is observed. By default it is ``model.new_cases_ob``

        name_student_t :
            The name under which the studentT distribution is saved in the trace.

        name_sigma_obs :
            The name under which the distribution of the observable error is saved in the trace

        Returns
        -------
        None

        TODO
        ----
        #@jonas, can we make it more clear that this whole stuff gets attached to the
        # model? like the with model as context...
        #@jonas doc description for sigma parameters

    """

    model = modelcontext(model)

    len_sigma_obs = () if model.sim_ndim == 1 else model.sim_shape[1]
    sigma_obs = pm.HalfCauchy(name_sigma_obs, beta=pr_beta_sigma_obs, shape=len_sigma_obs)

    if data_obs is None:
        data_obs = model.new_cases_obs

    pm.StudentT(
        name=name_student_t,
        nu=nu,
        mu=new_cases_inferred[: len(data_obs)],
        sigma=tt.abs_(new_cases_inferred[: len(data_obs)] + offset_sigma) ** 0.5
        * sigma_obs,  # offset and tt.abs to avoid nans
        observed=data_obs,
    )
def main(args):
    
    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'dp_dis1-nosch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args['save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps# int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k']) #a mixture of K Gaussian functions
    lr = float(args['lr'])
    typeLoad = int(args['typeLoad'])
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 150#250
    x2s_dim = 100#250
    y2s_dim = 100
    z2s_dim = 100#150
    target_dim = k#x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(data_path, windows, appliances,numApps=flgAgg, period=period,
                                              n_steps= n_steps, stride_train = stride_train, stride_test = stride_test,
                                              trainPer=0.6, valPer=0.2, testPer=0.2, typeLoad=typeLoad,
                                              flgAggSumScaled = 1, flgFilterZeros = 1)
    print(reader.stdTrain, reader.meanTrain)
    instancesPlot = {0:[4], 2:[5]} #for now use hard coded instancesPlot for kelly sampling

    train_data = Dataport(name='train',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         inputX=Xtrain,
                         labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(name='valid',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         X_mean=X_mean,
                         X_std=X_std,
                         inputX=Xval,
                         labels = yval)

    test_data = Dataport(name='valid',
                         prep='normalize',
                         cond=True,# False
                         #path=data_path,
                         X_mean=X_mean,
                         X_std=X_std,
                         inputX=Xtest,
                         labels = ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y , y_mask = train_data.theano_vars()
    scheduleSamplingMask  = T.fvector('schedMask')
    
    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    pickelModel = '/home/gissella/Documents/Research/Disaggregation/PecanStreet-dataport/VRNN_theano_version/output/gmmAE/18-05-30_16-27_app6/dp_dis1-sch_1_best.pkl'
    fmodel = open(pickelModel, 'rb')
    mainloop = cPickle.load(fmodel)
    fmodel.close()

    #define layers
    rnn = mainloop.model.nodes[0]
    x_1 = mainloop.model.nodes[1]
    y_1 = mainloop.model.nodes[2]
    z_1 = mainloop.model.nodes[3]
    phi_1 = mainloop.model.nodes[4]
    phi_mu = mainloop.model.nodes[5]
    phi_sig = mainloop.model.nodes[6]
    prior_1 = mainloop.model.nodes[7]
    prior_mu = mainloop.model.nodes[8]
    prior_sig = mainloop.model.nodes[9]
    theta_1 = mainloop.model.nodes[10]
    theta_mu = mainloop.model.nodes[11]
    theta_sig = mainloop.model.nodes[12]
    coeff = mainloop.model.nodes[13]


    nodes = [rnn,
             x_1, y_1, z_1, #dissag_pred,
             phi_1, phi_mu, phi_sig,
             prior_1, prior_mu, prior_sig,
             theta_1, theta_mu, theta_sig, coeff]#, corr, binary

    params = mainloop.model.params

    """params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)"""

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_val(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t,s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(prior_mu_t, prior_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)

        pred_t = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t)
        pred_1_t = y_1.fprop([pred_t], params)
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred_t#, y_pred
        #corr_temp, binary_temp
    ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val,  theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val, prediction_val), updates_val) =\
        theano.scan(fn=inner_fn_val,
                    sequences=[x_1_temp],
                    outputs_info=[s_0, None, None, None, None, None, None])

    for k, v in updates_val.iteritems():
        k.default_update = v

    s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0)


    x_shape = x.shape

    ######################## TEST (GENERATION) TIME
    prediction_val.name = 'generated__'+str(flgAgg)
    mse_val = T.mean((prediction_val - y)**2) # As axis = None is calculated for all
    mae_val = T.mean( T.abs_(prediction_val - y) )

    mse_val.name = 'mse_val'
    mae_val.name = 'mae_val'
    pred_in_val = y.reshape((y.shape[0]*y.shape[1],-1))

    theta_mu_in_val = theta_mu_temp_val.reshape((x_shape[0]*x_shape[1], -1))
    theta_sig_in_val = theta_sig_temp_val.reshape((x_shape[0]*x_shape[1], -1))
    coeff_in_val = coeff_temp_val.reshape((x_shape[0]*x_shape[1], -1))

    recon_val = GMM(pred_in_val, theta_mu_in_val, theta_sig_in_val, coeff_in_val)# BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out_val'

    recon_term_val= recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term_val'

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes


    data=Iterator(test_data, batch_size)

    test_fn = theano.function(inputs=[x, y],#[x, y],
                              allow_input_downcast=True,
                              outputs=[prediction_val, recon_term_val, mse_val, mae_val]#prediction_val, mse_val, mae_val
                              ,updates=updates_val#, allow_input_downcast=True, on_unused_input='ignore'
                              )
    testOutput = []
    numBatchTest = 0
    for batch in data:
      outputGeneration = test_fn(batch[0], batch[2])#(20, 220, 1)
      testOutput.append(outputGeneration[1:])
      # outputGeneration[0].shape #(20, 220, 40)
      #if (numBatchTest<5):

      '''
      plt.figure(1)
      plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_z_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(2)
      plt.plot(np.transpose(outputGeneration[1],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_s_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(3)
      plt.plot(np.transpose(outputGeneration[2],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_theta_0-4".format(numBatchTest))
      plt.clf()
      '''
      plt.figure(4)
      plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4])
      plt.plot(np.transpose(batch[2],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_RealAndPred_0-4".format(numBatchTest))
      plt.clf()

      plt.figure(4)
      plt.plot(np.transpose(batch[0],[1,0,2])[4])
      plt.savefig(save_path+"/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
      plt.clf()
      numBatchTest+=1

    testOutput = np.asarray(testOutput)
    print(testOutput.shape)
    recon_test = testOutput[:, 0].mean()
    mse_test =  testOutput[:, 1].mean()
    mae_test =  testOutput[:, 2].mean()
    #mseUnNorm_test =  testOutput[:, 3].mean()
    #maeUnNorm_test =  testOutput[:, 4].mean()

    fLog = open(save_path+'/output.csv', 'w')
    fLog.write(str(lr_iterations)+"\n")
    fLog.write(str(windows)+"\n")
    fLog.write("logTest,mseTest,maeTest, mseTestUnNorm, maeTestUnNorm\n")
    fLog.write("{},{},{}\n".format(recon_test,mse_test,mae_test))
    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim))
    header = "epoch,log,kl,mse,mae\n"
    fLog.write(header)
    for i , item in enumerate(mainloop.trainlog.monitor['recon_term']):
      f = mainloop.trainlog.monitor['epoch'][i]
      a = mainloop.trainlog.monitor['recon_term'][i]
      b = mainloop.trainlog.monitor['kl_term'][i]
      d = mainloop.trainlog.monitor['mse'][i]
      e = mainloop.trainlog.monitor['mae'][i]
      fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f,a,b,d,e))
Example #43
0
def main(args):

    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'dp_dis1-nosch_%d' % trial
    channel_name = 'mae'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    typeLoad = int(args['typeLoad'])
    debug = int(args['debug'])
    kSchedSamp = int(args['kSchedSamp'])
    typeActivFunc = args['typeActivFunc']

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150
    p_z_dim = 150
    p_x_dim = 150  #250
    x2s_dim = 100  #250
    y2s_dim = 100
    z2s_dim = 100  #150
    target_dim = k  #x_dim #(x_dim-1)*k

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(
        data_path,
        windows,
        appliances,
        numApps=flgAgg,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        trainPer=0.6,
        valPer=0.2,
        testPer=0.2,
        typeLoad=typeLoad,
        flgAggSumScaled=1,
        flgFilterZeros=1)
    print(reader.stdTrain, reader.meanTrain)
    instancesPlot = {
        0: [4],
        2: [5]
    }  #for now use hard coded instancesPlot for kelly sampling

    train_data = Dataport(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')
    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1', 'y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1', 's_tm1'],
                                  parent_dim=[x2s_dim, rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit=typeActivFunc,
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    def inner_fn_train(x_t, y_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t,
                          coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred  #, y_pred
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\
        theano.scan(fn=inner_fn_train,
                    sequences=[x_1_temp, y_1_temp],
                    outputs_info=[s_0, None, None, None, None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0

    theta_mu_temp.name = 'theta_mu_temp'
    theta_sig_temp.name = 'theta_sig_temp'
    coeff_temp.name = 'coeff'

    if (flgAgg == -1):
        prediction.name = 'x_reconstructed'
        mse = T.mean((prediction - x)**2)  # CHECK RESHAPE with an assertion
        mae = T.mean(T.abs(prediction - x))
        mse.name = 'mse'
        pred_in = x.reshape((x_shape[0] * x_shape[1], -1))
    else:
        prediction.name = 'pred_' + str(flgAgg)
        mse = T.mean(
            (prediction - y)**2)  # As axis = None is calculated for all
        mae = T.mean(T.abs_(prediction - y))
        mse.name = 'mse'
        mae.name = 'mae'
        pred_in = y.reshape((y.shape[0] * y.shape[1], -1))

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape

    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMM(
        pred_in, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term  #+ mse
    if (flgMSE):
        nll_upper_bound = nll_upper_bound + mse
    nll_upper_bound.name = 'nll_upper_bound'

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, mse, mae, theta_mu_temp,
                prediction
            ],
            indexSep=5,
            instancesPlot=instancesPlot,  #{0:[4,20],2:[5,10]},#, 80,150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}

    mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[recon_term, kl_term, nll_upper_bound, mse, mae],
        n_steps=n_steps,
        extension=extension,
        lr_iterations=lr_iterations,
        k_speedOfconvergence=kSchedSamp)
    mainloop.run()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(windows) + "\n")

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    header = "epoch,log,kl,mse,mae\n"
    fLog.write(header)
    for i, item in enumerate(mainloop.trainlog.monitor['recon_term']):
        f = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f, a, b, d, e))
Example #44
0
def log_abs_det_T(W):
    # TODO option for stable way
    return T.log(T.abs_(T.nlinalg.det(W)))
Example #45
0
    def ready(self):
        encoder = self.encoder
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = embedding_layer.vocab_map["<padding>"]

        dropout = self.dropout = encoder.dropout

        # len*batch
        x = self.x = encoder.x
        z = self.z = encoder.z

        n_d = args.hidden_dimension
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = []
        layer_type = args.layer.lower()
        for i in xrange(2):
            if layer_type == "rcnn":
                l = RCNN(
                    n_in=n_e,  # if i == 0 else n_d,
                    n_out=n_d,
                    activation=activation,
                    order=args.order)
            elif layer_type == "lstm":
                l = LSTM(
                    n_in=n_e,  # if i == 0 else n_d,
                    n_out=n_d,
                    activation=activation)
            layers.append(l)

        # len * batch
        #masks = T.cast(T.neq(x, padding_id), theano.config.floatX)
        masks = T.cast(T.neq(x, padding_id), "int8").dimshuffle((0, 1, "x"))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)

        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h2 = layers[1].forward_all(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        output_layer = self.output_layer = Layer(n_in=size,
                                                 n_out=1,
                                                 activation=sigmoid)

        # len*batch*1
        probs = output_layer.forward(h_final)

        # len*batch
        probs2 = probs.reshape(x.shape)
        self.MRG_rng = MRG_RandomStreams()
        z_pred = self.z_pred = T.cast(
            self.MRG_rng.binomial(size=probs2.shape, p=probs2), "int8")

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        self.z_pred = theano.gradient.disconnected_grad(z_pred)

        z2 = z.dimshuffle((0, 1, "x"))
        logpz = -T.nnet.binary_crossentropy(probs, z2) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        zdiff = T.sum(T.abs_(z[1:] - z[:-1]),
                      axis=0,
                      dtype=theano.config.floatX)

        loss_mat = encoder.loss_mat
        if args.aspect < 0:
            loss_vec = T.mean(loss_mat, axis=1)
        else:
            assert args.aspect < self.nclasses
            loss_vec = loss_mat[:, args.aspect]
        self.loss_vec = loss_vec

        coherent_factor = args.sparsity * args.coherent
        loss = self.loss = T.mean(loss_vec)
        sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \
                                             T.mean(zdiff) * coherent_factor
        cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor
        cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0))
        self.obj = T.mean(cost_vec)

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg

        cost = self.cost = cost_logpz * 10 + l2_cost
        print "cost.dtype", cost.dtype

        self.cost_e = loss * 10 + encoder.l2_cost
    def __init__(self,
                 numTruncate=20,
                 numHidden=500,
                 inputsSize=[576],
                 outputsSize=[1, 4]):
        ####################################
        #       Create model               #
        ####################################

        # Create tensor variables to store input / output data
        FeaturesXGt = T.matrix('FeaturesXGt', dtype='float32')
        FeaturesX = T.tensor3('FeaturesX')
        TargetY = T.tensor3('PredY')
        BboxY = T.tensor3('BboxY')
        C = T.vector('C', dtype='float32')
        S = T.vector('S', dtype='float32')
        BoxsVariances = T.matrix('BoxsVariances')
        RatioPosNeg = T.scalar('RatioPosNeg')

        # Create shared variable for input
        net = LSTMNet()
        net.NetName = 'LSTMTrackingNet'

        # Input
        # net.Layer['input']                 = InputLayer(net, X)
        net.LayerOpts['lstm_num_truncate'] = numTruncate
        # net.LayerOpts['reshape_new_shape'] = (net.LayerOpts['lstm_num_truncate'], 576)          # TODO: Need to set this size later
        # net.Layer['input_2d']              = ReshapeLayer(net, net.Layer['input'].Output)

        # Setting LSTM architecture
        net.LayerOpts['lstm_num_hidden'] = numHidden
        net.LayerOpts['lstm_inputs_size'] = inputsSize
        net.LayerOpts['lstm_outputs_size'] = outputsSize

        # Truncate lstm model
        currentC = C
        currentS = S
        preds = []
        bboxs = []
        predictLayers = []
        for truncId in range(net.LayerOpts['lstm_num_truncate']):
            # Create LSTM layer
            currentInput = FeaturesXGt[truncId]
            net.Layer['lstm_truncid_%d' % (truncId)] = LSTMLayer(
                net, currentInput, currentC, currentS)
            net.LayerOpts['lstm_params'] = net.Layer['lstm_truncid_%d' %
                                                     (truncId)].Params

            # Predict next position based on current state
            currentInput = FeaturesX[truncId]
            tempLayer = LSTMLayer(net, currentInput, currentC, currentS)
            predictLayers.append(tempLayer)
            pred = SigmoidLayer(tempLayer.Output[0]).Output
            bbox = tempLayer.Output[1]
            preds.append(pred)
            bboxs.append(bbox)

            # Update stateS and stateC
            currentC = net.Layer['lstm_truncid_%d' % (truncId)].C
            currentS = net.Layer['lstm_truncid_%d' % (truncId)].S
        lastS = currentS
        lastC = currentC
        self.Net = net

        # Calculate cost function
        # Confidence loss
        cost = 0
        costPos = 0
        costLoc = 0
        costNeg = 0

        k0 = None
        k1 = None
        k2 = None
        k3 = None
        k4 = None
        for truncId in range(net.LayerOpts['lstm_num_truncate']):
            pred = preds[truncId]
            bbox = bboxs[truncId]
            target = TargetY[truncId]
            bboxgt = BboxY[truncId]

            numFeaturesPerIm = pred.shape[0]
            numAnchorBoxPerLoc = pred.shape[1]

            pred = pred.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 1))
            target = target.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 1))
            bbox = bbox.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 4))
            bbox = bbox / BoxsVariances
            bboxgt = bboxgt.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 4))

            allLocCost = T.sum(T.abs_(bbox - bboxgt), axis=1,
                               keepdims=True) * target

            allConfPosCost = -target * T.log(pred)
            allConfNegCost = -(1 - target) * T.log(1 - pred)

            allPosCost = allConfPosCost + allLocCost * 0
            allNegCost = allConfNegCost

            allPosCostSum = T.sum(allPosCost, axis=1)
            allNegCostSum = T.sum(allNegCost, axis=1)

            sortedPosCostIdx = T.argsort(allPosCostSum, axis=0)
            sortedNegCostIdx = T.argsort(allNegCostSum, axis=0)

            sortedPosCost = allPosCostSum[sortedPosCostIdx]
            sortedNegCost = allNegCostSum[sortedNegCostIdx]

            if k0 == None:
                k0 = target
            if k1 == None:
                k1 = allLocCost
            if k2 == None:
                k2 = pred
            if k3 == None:
                k3 = sortedPosCostIdx
            if k4 == None:
                k4 = sortedNegCostIdx

            numMax = T.sum(T.neq(sortedPosCost, 0))
            # numNegMax = T.cast(T.floor(T.minimum(T.maximum(numMax * RatioPosNeg, 2), 300)), dtype = 'int32')
            numNegMax = T.cast(T.floor(numMax * RatioPosNeg), dtype='int32')

            top2PosCost = sortedPosCost[-numMax:]
            top6NegCost = sortedNegCost[-numNegMax:]

            layerCost = (T.sum(top2PosCost) + T.sum(top6NegCost)) / numMax
            cost = cost + layerCost

            costPos = costPos + pred[sortedPosCostIdx[-numMax:]].mean()
            costLoc = costLoc + allLocCost.sum() / numMax
            costNeg = costNeg + pred[sortedNegCostIdx[-numNegMax:]].mean()

        cost = cost / net.LayerOpts['lstm_num_truncate']
        costPos = costPos / net.LayerOpts['lstm_num_truncate']
        costLoc = costLoc / net.LayerOpts['lstm_num_truncate']
        costNeg = costNeg / net.LayerOpts['lstm_num_truncate']

        # Create update function
        params = self.Net.Layer['lstm_truncid_0'].Params
        grads = T.grad(cost, params)
        updates = AdamGDUpdate(net, params=params, grads=grads).Updates

        # Train function
        self.TrainFunc = theano.function(inputs=[
            FeaturesXGt, FeaturesX, TargetY, BboxY, S, C, BoxsVariances,
            RatioPosNeg
        ],
                                         updates=updates,
                                         outputs=[
                                             cost, lastS, lastC, costPos,
                                             costLoc, costNeg, k0, k1, k2, k3,
                                             k4
                                         ])

        self.PredFunc = theano.function(inputs=[FeaturesX, S, C],
                                        outputs=[preds[0], bboxs[0]])

        nextS = self.Net.Layer['lstm_truncid_0'].S
        nextC = self.Net.Layer['lstm_truncid_0'].C
        self.NextState = theano.function(inputs=[FeaturesXGt, S, C],
                                         outputs=[nextS, nextC])
def main(args):

    #theano.optimizer='fast_compile'
    #theano.config.exception_verbosity='high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'nll_upper_bound'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    flgMSE = int(args['flgMSE'])

    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps  # int(args['stride_test'])

    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    flgAgg = int(args['flgAgg'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    debug = int(args['debug'])
    num_sequences_per_batch = int(args['numSequences'])  #based on appliance
    typeLoad = int(args['typeLoad'])
    target_inclusion_prob = float(args['target_inclusion_prob'])
    n_steps_val = n_steps

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 150  #150
    p_z_dim = 150  #150
    p_x_dim = 250  #250
    x2s_dim = 250  #250
    z2s_dim = 150  #150
    target_dim = k  #x_dim #(x_dim-1)*k
    '''
    f = open(sample_path+'vrnn_gmm_1_best.pkl', 'rb')
    mainloop = cPickle.load(f)
    f.close()
    '''

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_ukdale(
        data_path,
        windows,
        appliances,
        numApps=flgAgg,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1,
        typeLoad=typeLoad,
        seq_per_batch=num_sequences_per_batch,
        target_inclusion_prob=target_inclusion_prob)

    instancesPlot = {
        0: [4]
    }  #for now use hard coded instancesPlot for kelly sampling
    if (typeLoad == 0):  #original split according time
        instancesPlot = reader.build_dict_instances_plot(
            listDates, batch_size, Xval.shape[0])

    train_data = UKdale(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=ytrain,
        labels=Xtrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = UKdale(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=yval,
        labels=Xval)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')

    x.name = 'x_original'
    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1'],
               parent_dim=[x2s_dim, z2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=init_W,
               init_U=init_U,
               init_b=init_b)
    '''
    dissag_pred = FullyConnectedLayer(name='disag_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=num_apps,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)
    '''
    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1'],
                                parent_dim=[x2s_dim, rnn_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['s_tm1'],
                                  parent_dim=[rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu = FullyConnectedLayer(name='theta_mu',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig = FullyConnectedLayer(name='theta_sig',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff = FullyConnectedLayer(name='coeff',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    corr = FullyConnectedLayer(name='corr',
                               parent=['theta_1'],
                               parent_dim=[p_x_dim],
                               nout=k,
                               unit='tanh',
                               init_W=init_W,
                               init_b=init_b)

    binary = FullyConnectedLayer(name='binary',
                                 parent=['theta_1'],
                                 parent_dim=[p_x_dim],
                                 nout=1,
                                 unit='sigmoid',
                                 init_W=init_W,
                                 init_b=init_b)

    nodes = [
        rnn,
        x_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu,
        theta_sig,
        coeff
    ]  #, corr, binary

    params = OrderedDict()

    for node in nodes:
        if node.initialize() is not None:
            params.update(node.initialize())

    params = init_tparams(params)

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)

    def inner_val_fn(s_tm1):
        '''
        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)
        '''
        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(prior_mu_t, prior_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)
        coeff_t = coeff.fprop([theta_1_t], params)

        pred_t = GMM_sample(theta_mu_t, theta_sig_t,
                            coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        pred_1_t = x_1.fprop([pred_t], params)

        s_t = rnn.fprop([[pred_1_t, z_1_t], [s_tm1]], params)

        return s_t, pred_t, z_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t
        # prior_mu_temp_val, prior_sig_temp_val
    ((s_temp_val, prediction_val, z_t_temp_val, theta_1_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val), updates_val) =\
        theano.scan(fn=inner_val_fn , n_steps=n_steps_val, #already 1 subtracted if doing next step
                    outputs_info=[s_0, None, None,  None, None, None, None])

    for k, v in updates_val.iteritems():
        k.default_update = v

    def inner_fn(x_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu_t = theta_mu.fprop([theta_1_t], params)
        theta_sig_t = theta_sig.fprop([theta_1_t], params)

        coeff_t = coeff.fprop([theta_1_t], params)
        #corr_t = corr.fprop([theta_1_t], params)
        #binary_t = binary.fprop([theta_1_t], params)

        pred = GMM_sample(theta_mu_t, theta_sig_t,
                          coeff_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)
        s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred  #, y_pred
        #corr_temp, binary_temp
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp],
                    outputs_info=[s_0, None, None, None, None, None, None,  None, None, None, None, None])

    for k, v in updates.iteritems():
        k.default_update = v

    s_temp = concatenate(
        [s_0[None, :, :], s_temp[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0
    '''
    theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params)
    theta_mu_temp = theta_mu.fprop([theta_1_temp], params)
    theta_sig_temp = theta_sig.fprop([theta_1_temp], params)
    coeff_temp = coeff.fprop([theta_1_temp], params)
    corr_temp = corr.fprop([theta_1_temp], params)
    binary_temp = binary.fprop([theta_1_temp], params)
    '''

    s_temp.name = 'h_1'  #gisse
    z_1_temp.name = 'z_1'  #gisse
    z_t_temp.name = 'z'
    theta_mu_temp.name = 'theta_mu_temp'
    theta_sig_temp.name = 'theta_sig_temp'
    coeff_temp.name = 'coeff'

    prediction.name = 'pred_' + str(flgAgg)
    mse = T.mean((prediction - x)**2)  # As axis = None is calculated for all
    mae = T.mean(T.abs_(prediction - x))
    mse.name = 'mse'
    mae.name = 'mae'
    x_in = x.reshape((batch_size * n_steps, -1))

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp,
                                 prior_sig_temp)

    x_shape = x.shape

    theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1))
    theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1))
    coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1))
    #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1))
    #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1))

    recon = GMM(
        x_in, theta_mu_in, theta_sig_in, coeff_in
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'

    #recon = recon * mask

    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask

    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    nll_upper_bound = recon_term + kl_term  #+ mse
    if (flgMSE):
        nll_upper_bound = nll_upper_bound + mse
    nll_upper_bound.name = 'nll_upper_bound'

    ############## TEST  ###############
    theta_mu_in_val = theta_mu_temp_val.reshape((batch_size * n_steps, -1))
    theta_sig_in_val = theta_sig_temp_val.reshape((batch_size * n_steps, -1))
    coeff_in_val = coeff_temp_val.reshape((batch_size * n_steps, -1))

    pred_in = prediction_val.reshape((batch_size * n_steps, -1))
    recon_val = GMM(
        pred_in, theta_mu_in_val, theta_sig_in_val, coeff_in_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((batch_size, n_steps))
    recon_val.name = 'gmm_out_val'

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)

    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            ddout=[
                nll_upper_bound, recon_term, kl_term, mse, mae, theta_mu_temp,
                prediction
            ],
            indexSep=5,
            indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp),
                            (3, prediction)],
            instancesPlot=instancesPlot,  #{0:[4,20],2:[5,10]},#, 80,150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}

    mainloop = Training(name=pkl_name,
                        data=Iterator(train_data, batch_size),
                        model=model,
                        optimizer=optimizer,
                        cost=nll_upper_bound,
                        outputs=[nll_upper_bound],
                        n_steps=n_steps,
                        extension=extension,
                        lr_iterations=lr_iterations)
    mainloop.run()

    test_fn = theano.function(
        inputs=[],
        outputs=[prediction_val, recon_val],
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )

    outputGeneration = test_fn()

    #{0:[4,20], 2:[5,10]}
    '''
    plt.figure(1)
    plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4])
    plt.savefig(save_path+"/vrnn_dis_generated_z_0-4")

    plt.figure(2)
    plt.plot(np.transpose(outputGeneration[1],[1,0,2])[4])
    plt.savefig(save_path+"/vrnn_dis_generated_s_0-4")

    plt.figure(3)
    plt.plot(np.transpose(outputGeneration[2],[1,0,2])[4])
    plt.savefig(save_path+"/vrnn_dis_generated_theta_0-4")
    '''
    plt.figure(1)
    plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[4])
    plt.savefig(save_path + "/vrnn_dis_generated_pred_4.ps")

    plt.figure(2)
    plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[10])
    plt.savefig(save_path + "/vrnn_dis_generated_pred_10.ps")

    plt.figure(3)
    plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[20])
    plt.savefig(save_path + "/vrnn_dis_generated_pred_20.ps")

    testLogLike = np.asarray(outputGeneration[1]).mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write("Test-log-likelihood\n")
    fLog.write("{}\n".format(testLogLike))
    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                         z2s_dim))
    #fLog.write("{}\n".format(outputGeneration[0]))#logLIkelihood in the test set
    fLog.write("epoch,log,kl,mse,mae\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        f = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        d = mainloop.trainlog.monitor['mse'][i]
        e = mainloop.trainlog.monitor['mae'][i]
        fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f, a, b, d, e))

    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()
Example #48
0
    def __init__(self, nwk, x=None, y=None, u=None, v=None, *arg, **kwd):
        """
        : -------- parameters -------- :
        nwk: an expression builder for the neural network to be trained,
        could be a Nnt object.

        x: the inputs, with the first dimension standing for sample units.
        If unspecified, the trainer will try to evaluate the entry point and
        cache the result as source data.

        y: the labels, with the first dimension standing for sample units.
        if unspecified, a simi-unsupervied training is assumed as the labels
        will be identical to the inputs.

        u: the valication data inputs
        v: the validation data labels

        : -------- kwd: keywords -------- :
        ** bsz: batch size.
        ** lrt: learning rate.
        ** lmb: weight decay factor, the lambda
        ** dpc: dot product cap, preventing numerical explosion

        ** err: expression builder for the computation of training error
        between the network output {pred} and the label {y}. the expression
        must evaluate to a scalar.

        ** reg: expression builder for the computation of weight panalize
        the vector of parameters {vhat}, the expression must evaluate to a
        scalar.

        ** mmt: momentom of the trainer

        ** vdr: validation disruption rate
        ** hte: the halting training error.
        """
        # numpy random number generator
        seed = kwd.pop('seed', None)
        nrng = kwd.pop('nrng', np.random.RandomState(seed))
        trng = kwd.pop('trng', RandomStreams(nrng.randint(0x7FFFFFFF)))

        # private members
        self.__seed__ = seed
        self.__nrng__ = nrng
        self.__trng__ = trng

        # expression of error and regulator terms
        err = getattr(exb, kwd.get('err', 'CE'))
        reg = getattr(exb, kwd.get('reg', 'L1'))

        # the validation disruption
        self.vdr = S(kwd.get('vdr'), 'VDR')

        # the denoising
        self.dns = S(kwd.get('dns'), 'DNS')

        # the halting rules, and halting status
        self.hte = kwd.get('hte', 1e-3)  # 1. low training error
        self.hgd = kwd.get('htg', 1e-7)  # 2. low gradient
        self.hlr = kwd.get('hlr', 1e-7)  # 3. low learning rate
        self.hvp = kwd.get('hvp', 100)  # 4. out of validation patients
        self.hlt = 0

        # current epoch index, use int64
        ep = kwd.get('ep', 0)
        self.ep = S(ep, 'EP')

        # training batch ppsize, use int64
        bsz = kwd.get('bsz', 20)
        self.bsz = S(bsz, 'BSZ')

        # current batch index, use int64
        self.bt = S(0, 'BT')

        # momentumn, make sure momentum is a sane value
        mmt = kwd.get('mmt', .0)
        self.mmt = S(mmt, 'MMT')

        # learning rate
        lrt = kwd.get('lrt', 0.01)  # learning rate
        acc = kwd.get('acc', 1.02)  # acceleration
        dec = kwd.get('dec', 0.95)  # deceleration
        self.lrt = S(lrt, 'LRT', 'f')
        self.acc = S(acc, 'ACC', 'f')
        self.dec = S(dec, 'DEC', 'f')

        # weight decay, lambda
        lmd = kwd.get('lmd', .0)
        self.lmd = S(lmd, 'LMD', 'f')

        # the neural network
        self.nwk = nwk

        # inputs and labels, for modeling and validation
        x = S(np.zeros((bsz * 2, nwk.dim[0]), 'f') if x is None else x)
        y = x if y is None else S(y)
        u = x if u is None else S(u)
        v = y if v is None else S(v)
        self.x, self.y, self.u, self.v = x, y, u, v

        # -------- construct trainer function -------- *
        # 1) symbolic expressions
        x = T.tensor(name='x', dtype=x.dtype, broadcastable=x.broadcastable)
        y = T.tensor(name='y', dtype=y.dtype, broadcastable=y.broadcastable)
        u = T.tensor(name='u', dtype=u.dtype, broadcastable=u.broadcastable)
        v = T.tensor(name='v', dtype=v.dtype, broadcastable=v.broadcastable)

        # prediction
        pred = nwk(x)  # generic

        # mean correlation between testing outcome {v} and that predicted
        # from testing input {u}.
        vcor = exb.mcr(nwk(x), y)

        # list of symbolic parameters to be tuned
        pars = parms(pred)

        # unlist symbolic weights into a vector
        vwgt = T.concatenate([p.flatten() for p in pars if p.name == 'w'])

        # symbolic batch cost, which is the mean trainning erro over all
        # observations and sub attributes.

        # The observations are indexed by the first dimension of y, the last
        # dimension indices data entries for each observation,
        # e.g. voxels in an MRI region, and SNPs in a gene.
        # The objective function, err, returns a scalar of training loss, it
        # can be the L1, L2 norm and CE.
        erro = err(pred, y).mean()

        # the sum of weights calculated for weight decay.
        wsum = reg(vwgt)
        cost = erro + wsum * self.lmd

        # symbolic gradient of cost WRT parameters
        grad = T.grad(cost, pars)
        gvec = T.concatenate([g.flatten() for g in grad])
        gabs = T.abs_(gvec)
        gsup = T.max(gabs)

        # trainer control
        nwep = ((self.bt + 1) * self.bsz) // self.x.shape[-2]  # new epoch?

        # 2) define updates after each batch training
        up = []

        # update parameters using gradiant decent, and momentum
        for p, g in zip(pars, grad):
            # initialize accumulated gradient
            # NOTE: p.eval() causes mehem!!
            h = S(np.zeros_like(p.get_value()))

            # accumulate gradient, partially historical (due to the momentum),
            # partially noval
            up.append((h, self.mmt * h + (1 - self.mmt) * g))

            # update parameters by stepping down the accumulated gradient
            up.append((p, p - self.lrt * h))

        # update batch and eqoch index
        up.append((self.bt, (self.bt + 1) * (1 - nwep)))
        up.append((self.ep, self.ep + nwep))

        # 3) the trainer functions
        # expression of batch and whole data feed:
        _ = T.arange((self.bt + 0) * self.bsz, (self.bt + 1) * self.bsz)

        # enable denoise training
        if self.dns:
            msk = self.__trng__.binomial(self.y.shape, 1, self.dns, dtype=FX)
            msk = 1 - msk
            bts = {
                x: self.x.take(_, 0, 'wrap'),
                y: self.y.take(_, 0, 'wrap') * msk.take(_, -2, 'wrap')
            }
            dts = {x: self.x, y: self.y * msk}
        else:
            bts = {x: self.x.take(_, 0, 'wrap'), y: self.y.take(_, 0, 'wrap')}
            dts = {x: self.x, y: self.y}

        # each invocation sends one batch of training examples to the network,
        # calculate total cost and tune the parameters by gradient decent.
        self.step = F([], cost, name="step", givens=bts, updates=up)

        # training error, training cost
        self.terr = F([], erro, name="terr", givens=dts)
        self.tcst = F([], cost, name="tcst", givens=dts)

        # weights, and parameters
        self.wsum = F([], wsum, name="wsum")
        self.gsup = F([], gsup, name="gsup", givens=dts)

        # * -------- done with trainer functions -------- *

        # * -------- validation functions -------- *
        # enable validation binary disruption (binary)?
        if self.vdr:
            _ = self.__trng__.binomial(self.v.shape, 1, self.vdr, dtype=FX)
            vts = {x: self.u, y: (self.v + _) % C(2.0, FX)}
        else:
            vts = {x: self.u, y: self.v}
        self.verr = F([], erro, name="verr", givens=vts)
        # validation correlation performance
        self.vcor = F([], vcor, name="vcor", givens=vts)

        # * ---------- logging and recording ---------- *
        hd, skip = [], ['step', 'gvec']
        for k, v in vars(self).items():
            if k.startswith('__') or k in skip:
                continue
            # possible theano shared cpu variable
            if isinstance(v, type(self.lmd)) and v.ndim < 1:
                hd.append((k, v.get_value))
            # possible theano shared gpu variable
            if isinstance(v, type(self.ep)) and v.ndim < 1:
                hd.append((k, v.get_value))
            if isinstance(v, type(self.step)):
                hd.append((k, v))
            if isinstance(v, float) or isinstance(v, int):
                hd.append((k, v))

        self.__head__ = hd
        self.__time__ = .0

        # the initial record, and history
        self.__hist__ = [self.__rpt__()]
        self.__mver__ = self.__hist__[-1]  # min verr
        self.__mter__ = self.__hist__[-1]  # min terr

        # printing format
        self.__pfmt__ = (
            '{ep:04d}: {tcst:.1e} = {terr:.1e} + {lmd:.1e}*{wsum:.1e}'
            '|{verr:.1e}, {gsup:.2e}, {vcor:.2e}, {lrt:.2e}')
Example #49
0
 def log_normal(self,x, mean, std, eps=0.0):
     """computes log-proba of normal distribution"""
     std += eps
     return - 0.5 * np.log(2 * np.pi) - T.log(T.abs_(std)) - (x - mean) ** 2 / (2 * std ** 2)
Example #50
0
w_h2 = theano.shared(np.random.randn(no_hidden1, 20)*.01, floatX)
b_h2 = theano.shared(np.random.randn(20)*0.01, floatX)

w_o = theano.shared(np.random.randn(20)*.01, floatX)
b_o = theano.shared(np.random.randn()*.01, floatX)

# learning rate
alpha = theano.shared(learning_rate, floatX)

#Define mathematical expression:
h1_out = T.nnet.sigmoid(T.dot(x, w_h1) + b_h1)
h2_out = T.nnet.sigmoid(T.dot(h1_out,w_h2) + b_h2)

y = T.dot(h2_out, w_o) + b_o # 4-layer

cost = T.abs_(T.mean(T.sqr(d - y)))
accuracy = T.mean(d - y)

#define gradients
dw_o, db_o, dw_h1, db_h1, dw_h2, db_h2 = T.grad(cost, [w_o, b_o, w_h1, b_h1, w_h2, b_h2]) # 4-layer 

train = theano.function(
        inputs = [x, d],
        outputs = cost,
        updates = [[w_o, w_o - alpha*dw_o],
                   [b_o, b_o - alpha*db_o],
                   [w_h1, w_h1 - alpha*dw_h1],
                   [b_h1, b_h1 - alpha*db_h1],
                   [w_h2, w_h2 - alpha*dw_h2],
                   [b_h2, b_h2 - alpha*db_h2]],
        allow_input_downcast=True
Example #51
0
def SGMGHMC_p(tparams, cost, inps, ntrain, lr, rho=0.9, epsilon=1e-6, clip_norm=0.1):
    """ Additional parameters """
    mom_tparams = OrderedDict()
    xi_tparams = OrderedDict()
    for k, p0 in tparams.iteritems():
        mom_tparams[k] = theano.shared(p0.get_value() * 0. + 1e-10, name='%s_mom'%k) 
        xi_tparams[k] = theano.shared(p0.get_value() * 0. + 1e-10, name='%s_xi'%k) 
    
    a = theano.shared(numpy_floatX(2.))
    m_org = theano.shared(numpy_floatX(5.))
    c = theano.shared(numpy_floatX(5.))
    sigma_p = theano.shared(numpy_floatX(10.))
    sigma_xi = theano.shared(numpy_floatX(0.001))
    gamma_xi = theano.shared(numpy_floatX(1))
    logger = logging.getLogger('eval_ptb_sgmgnht')
    logger.setLevel(logging.INFO)
    fh = logging.FileHandler('eval_ptb_sgmgnht.log')
    logger.info('a {} m {} c {} s_p{} s_xi{} g_xi{}'.format(a.get_value(), m_org.get_value(), c.get_value(), sigma_p.get_value(), sigma_xi.get_value(), gamma_xi.get_value()))
    
    p = tensor.vector('p', dtype='float32')
    
    """ default: lr=0.001 """
    
    trng = RandomStreams(123)
    
    grads = tensor.grad(cost, tparams.values())
    norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads]))
    if tensor.ge(norm, clip_norm):
        grads = [g*clip_norm/norm for g in grads]
        
    gshared = [theano.shared(p0.get_value() * 0., name='%s_grad'%k) 
                for k, p0 in tparams.iteritems()]
    gsup = [(gs, g) for gs, g in zip(gshared, grads)]
    f_grad_shared = theano.function(inps, cost, updates=gsup)     
    
    updates = []
    # reset mom
    # counter = theano.shared(numpy_floatX(0.))
    # updates.append((counter,counter+1))
 
    for p, mom, xi, g in zip(tparams.values(),mom_tparams.values(),xi_tparams.values(), gshared):
	#rms prop
        t = theano.shared(p.get_value() * 0.)
        t_new = rho * t + (1-rho) * g**2
        updates.append((t, t_new))

        m = (tensor.sqrt(t_new) + 1e-10)
        m = m/tensor.max(m)*m_org
        #m = tensor.switch(tensor.ge(m,1*m_org), 1*m_org, m)
        m = tensor.switch(tensor.le(m,m_org*0.01), m_org*0.01, m)
        
        g_f = tensor.sgn(mom)/m*(tensor.abs_(mom)**(1/a))
        K_f = -g_f + 2/c*(c*g_f + tensor.log(1+tensor.exp(-c*g_f)))
        
        psi_f_1 = (1- tensor.exp(-c*g_f) )/( 1 + tensor.exp(-c*g_f) )
        f1_f_1 = 1/m/a*psi_f_1*(tensor.abs_(mom+1e-100)**(1/a-1))
        psi_grad_f_1 = 2*c*tensor.exp(- c*g_f)/(1 + tensor.exp(-c*g_f))**2
        f3_f_1 =  1/m**2/a**2*(psi_f_1**2-psi_grad_f_1)*tensor.abs_(mom+1e-100)**(2/a-2) - (1/a-1)/m/a*psi_f_1*tensor.sgn(mom)*tensor.abs_(mom+1e-100)**(1/a-2)
        
        psi_f = (tensor.exp(c*g_f) - 1)/(tensor.exp(c*g_f) + 1)
        f1_f = 1/m/a*psi_f*(tensor.abs_(mom+1e-100)**(1/a-1))
        psi_grad_f = 2*c*tensor.exp(c*g_f)/(tensor.exp(c*g_f) + 1)**2
        f3_f =  1/m**2/a**2*(psi_f**2-psi_grad_f)*tensor.abs_(mom+1e-100)**(2/a-2) - (1/a-1)/m/a*psi_f*tensor.sgn(mom)*tensor.abs_(mom+1e-100)**(1/a-2)
 
        temp_f1 = tensor.switch(tensor.ge(g_f,0), f1_f_1, f1_f)
        temp_f3 = tensor.switch(tensor.ge(g_f,0), f3_f_1, f3_f)       


        noise_p = trng.normal(p.get_value().shape, avg = 0.0, std = 1., 
                          dtype='float32')
        noise_xi = trng.normal(p.get_value().shape, avg = 0.0, std = 1., 
                          dtype='float32')
        
        #lr_new = 1 / tensor.sqrt(tensor.abs_(temp_f1)) * lr 
        lr_new = lr
        updated_p = p +  temp_f1 * lr_new
        #updated_mom = (mom - temp_f1* xi *lr  - g * lr * ntrain + tensor.sqrt(2*sigma_p*lr) * noise_p)* (1-tensor.eq(tensor.mod(iterations,100),0)) + randmg * tensor.eq(tensor.mod(iterations,100),0)
        updated_mom = mom - 1.2*temp_f1* xi *lr_new  - g * lr_new * ntrain + tensor.sqrt(2*sigma_p*lr_new) * noise_p
        updated_xi = xi + temp_f3* sigma_xi * lr_new - (xi - sigma_p)*gamma_xi*lr_new + tensor.sqrt(2*sigma_xi*gamma_xi*lr_new) * noise_xi 
        

        updates.append((p, updated_p))
            
        updates.append((mom, updated_mom))
        updates.append((xi, updated_xi))
    
    f_update = theano.function([lr,ntrain], [p,mom,m], updates=updates)
    
    return f_grad_shared, f_update
def laplace_loss(percept_length, sigma_min, y_true, y_pred):
    sigmas_pred = sigma_min + y_pred[:, percept_length:]
    mus_pred = y_pred[:, :percept_length]
    return T.mean((T.abs_(y_true - mus_pred)) / sigmas_pred +
                  T.log(sigmas_pred))
Example #53
0
def mean_absolute_error(y_true, y_pred, weight=None):
    if weight is not None:
        return T.abs_(
            weight.reshape((weight.shape[0], 1)) * (y_pred - y_true)).mean()
    else:
        return T.abs_(y_pred - y_true).mean()
Example #54
0
def sd2rho(sd):
    """
    `sd -> rho` theano converter
    :math:`mu + sd*e = mu + log(1+exp(rho))*e`"""
    return tt.log(tt.exp(tt.abs_(sd)) - 1.)
Example #55
0
    def ready(self):
        embedding_layer = self.embedding_layer
        args = self.args
        padding_id = self.padding_id
        weights = self.weights

        dropout = self.dropout = theano.shared(
            np.float64(args.dropout).astype(theano.config.floatX))

        # len*batch
        x = self.x = T.imatrix()

        n_d = args.hidden_dim2
        n_e = embedding_layer.n_d
        activation = get_activation_by_name(args.activation)

        layers = self.layers = []
        layer_type = args.layer.lower()
        for i in xrange(2):
            if layer_type == "rcnn":
                l = RCNN(
                    n_in=n_e,  # if i == 0 else n_d,
                    n_out=n_d,
                    activation=activation,
                    order=args.order)
            elif layer_type == "lstm":
                l = LSTM(
                    n_in=n_e,  # if i == 0 else n_d,
                    n_out=n_d,
                    activation=activation)
            layers.append(l)

        # len * batch
        masks = T.cast(T.neq(x, padding_id), "float32")

        #masks = masks.dimshuffle((0,1,"x"))

        # (len*batch)*n_e
        embs = embedding_layer.forward(x.ravel())
        if weights is not None:
            embs_w = weights[x.ravel()].dimshuffle((0, 'x'))
            embs = embs * embs_w

        # len*batch*n_e
        embs = embs.reshape((x.shape[0], x.shape[1], n_e))
        embs = apply_dropout(embs, dropout)
        self.word_embs = embs
        flipped_embs = embs[::-1]

        # len*bacth*n_d
        h1 = layers[0].forward_all(embs)
        h2 = layers[1].forward_all(flipped_embs)
        h_final = T.concatenate([h1, h2[::-1]], axis=2)
        h_final = apply_dropout(h_final, dropout)
        size = n_d * 2

        output_layer = self.output_layer = ZLayer(n_in=size,
                                                  n_hidden=n_d,
                                                  activation=activation)

        # sample z given text (i.e. x)
        z_pred, sample_updates = output_layer.sample_all(h_final)

        # we are computing approximated gradient by sampling z;
        # so should mark sampled z not part of the gradient propagation path
        #
        z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred)
        self.sample_updates = sample_updates
        print "z_pred", z_pred.ndim

        self.p1 = T.sum(masks * z_pred) / (T.sum(masks) + 1e-8)

        # len*batch*1
        probs = output_layer.forward_all(h_final, z_pred)
        print "probs", probs.ndim

        logpz = -T.nnet.binary_crossentropy(probs, z_pred) * masks
        logpz = self.logpz = logpz.reshape(x.shape)
        probs = self.probs = probs.reshape(x.shape)

        # batch
        z = z_pred
        self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX)
        self.zdiff = T.sum(T.abs_(z[1:] - z[:-1]),
                           axis=0,
                           dtype=theano.config.floatX)

        params = self.params = []
        for l in layers + [output_layer]:
            for p in l.params:
                params.append(p)
        nparams = sum(len(x.get_value(borrow=True).ravel()) \
                                        for x in params)
        say("total # parameters: {}\n".format(nparams))

        l2_cost = None
        for p in params:
            if l2_cost is None:
                l2_cost = T.sum(p**2)
            else:
                l2_cost = l2_cost + T.sum(p**2)
        l2_cost = l2_cost * args.l2_reg
        self.l2_cost = l2_cost
Example #56
0
def standard_laplace(x):
    return np.log(0.5) - T.abs_(x)
Example #57
0
def normal(x, mean, sd):
    return C - T.log(T.abs_(sd)) - ((x - mean)**2 / (2 * sd**2))
def abs(x):
    return T.abs_(x)
                                    context_name=ctx_name)()
            mode = get_mode('FAST_RUN').including('gpuarray')
            f = theano.function([guard_in],
                                op(guard_in),
                                mode=mode,
                                profile=False)
            result.cache[key] = f
        return f(inp)

    result.cache = dict()
    return result


f_gpua_min = f_compute(T.min)
f_gpua_max = f_compute(T.max)
f_gpua_absmax = f_compute(lambda x: T.max(T.abs_(x)))


class NanGuardMode(Mode):
    """
    A Theano compilation Mode that makes the compiled function automatically
    detect NaNs and Infs and detect an error if they occur.

    Parameters
    ----------
    nan_is_error : bool
        If True, raise an error anytime a NaN is encountered.
    inf_is_error : bool
        If True, raise an error anytime an Inf is encountered.  Note that some
        pylearn2 modules currently use np.inf as a default value (e.g.
        mlp.max_pool) and these will cause an error if inf_is_error is True.
def main(args):

    theano.optimizer = 'fast_compile'
    theano.config.exception_verbosity = 'high'

    trial = int(args['trial'])
    pkl_name = 'vrnn_gmm_%d' % trial
    channel_name = 'mse'

    data_path = args['data_path']
    save_path = args[
        'save_path']  #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M")
    period = int(args['period'])
    n_steps = int(args['n_steps'])
    stride_train = int(args['stride_train'])
    stride_test = n_steps
    typeLoad = int(args['typeLoad'])

    flgMSE = int(args['flgMSE'])
    monitoring_freq = int(args['monitoring_freq'])
    epoch = int(args['epoch'])
    batch_size = int(args['batch_size'])
    x_dim = int(args['x_dim'])
    y_dim = int(args['y_dim'])
    z_dim = int(args['z_dim'])
    rnn_dim = int(args['rnn_dim'])
    k = int(args['num_k'])  #a mixture of K Gaussian functions
    lr = float(args['lr'])
    origLR = lr
    debug = int(args['debug'])

    print "trial no. %d" % trial
    print "batch size %d" % batch_size
    print "learning rate %f" % lr
    print "saving pkl file '%s'" % pkl_name
    print "to the save path '%s'" % save_path

    q_z_dim = 350
    p_z_dim = 400
    p_x_dim = 450
    x2s_dim = 400
    y2s_dim = 200
    z2s_dim = 350
    target_dim = k  # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians

    model = Model()
    Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(
        data_path,
        windows,
        appliances,
        numApps=-1,
        period=period,
        n_steps=n_steps,
        stride_train=stride_train,
        stride_test=stride_test,
        flgAggSumScaled=1,
        flgFilterZeros=1,
        typeLoad=typeLoad)

    instancesPlot = {0: [10]}
    #instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0])

    train_data = Dataport(
        name='train',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        inputX=Xtrain,
        labels=ytrain)

    X_mean = train_data.X_mean
    X_std = train_data.X_std

    valid_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xval,
        labels=yval)

    test_data = Dataport(
        name='valid',
        prep='normalize',
        cond=True,  # False
        #path=data_path,
        X_mean=X_mean,
        X_std=X_std,
        inputX=Xtest,
        labels=ytest)

    init_W = InitCell('rand')
    init_U = InitCell('ortho')
    init_b = InitCell('zeros')
    init_b_sig = InitCell('const', mean=0.6)

    x, mask, y, y_mask = train_data.theano_vars()
    scheduleSamplingMask = T.fvector('schedMask')
    x.name = 'x_original'

    if debug:
        x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32)
        temp = np.ones((15, batch_size), dtype=np.float32)
        temp[:, -2:] = 0.
        mask.tag.test_value = temp
    """rnn = LSTM(name='rnn',
               parent=['x_1', 'z_1', 'y_1'],
               parent_dim=[x2s_dim, z2s_dim, y2s_dim],
               nout=rnn_dim,
               unit='tanh',
               init_W=mainloop.model.nodes[0].init_W,
               init_U=mainloop.model.nodes[0].init_U,
               init_b=mainloop.model.nodes[0].init_b)

    x_1 = FullyConnectedLayer(name='x_1',
                              parent=['x_t'],
                              parent_dim=[x_dim],
                              nout=x2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    y_1 = FullyConnectedLayer(name='y_1',
                              parent=['y_t'],
                              parent_dim=[y_dim],
                              nout=y2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    z_1 = FullyConnectedLayer(name='z_1',
                              parent=['z_t'],
                              parent_dim=[z_dim],
                              nout=z2s_dim,
                              unit='relu',
                              init_W=init_W,
                              init_b=init_b)

    phi_1 = FullyConnectedLayer(name='phi_1',
                                parent=['x_1', 's_tm1','y_1'],
                                parent_dim=[x2s_dim, rnn_dim, y2s_dim],
                                nout=q_z_dim,
                                unit='relu',
                                init_W=init_W,
                                init_b=init_b)

    phi_mu = FullyConnectedLayer(name='phi_mu',
                                 parent=['phi_1'],
                                 parent_dim=[q_z_dim],
                                 nout=z_dim,
                                 unit='linear',
                                 init_W=init_W,
                                 init_b=init_b)

    phi_sig = FullyConnectedLayer(name='phi_sig',
                                  parent=['phi_1'],
                                  parent_dim=[q_z_dim],
                                  nout=z_dim,
                                  unit='softplus',
                                  cons=1e-4,
                                  init_W=init_W,
                                  init_b=init_b_sig)

    prior_1 = FullyConnectedLayer(name='prior_1',
                                  parent=['x_1','s_tm1'],
                                  parent_dim=[x2s_dim,rnn_dim],
                                  nout=p_z_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    prior_mu = FullyConnectedLayer(name='prior_mu',
                                   parent=['prior_1'],
                                   parent_dim=[p_z_dim],
                                   nout=z_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    prior_sig = FullyConnectedLayer(name='prior_sig',
                                    parent=['prior_1'],
                                    parent_dim=[p_z_dim],
                                    nout=z_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_1 = FullyConnectedLayer(name='theta_1',
                                  parent=['z_1', 's_tm1'],
                                  parent_dim=[z2s_dim, rnn_dim],
                                  nout=p_x_dim,
                                  unit='relu',
                                  init_W=init_W,
                                  init_b=init_b)

    theta_mu1 = FullyConnectedLayer(name='theta_mu1',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_mu2 = FullyConnectedLayer(name='theta_mu2',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_mu3 = FullyConnectedLayer(name='theta_mu3',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_mu4 = FullyConnectedLayer(name='theta_mu4',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_mu5 = FullyConnectedLayer(name='theta_mu5',
                                   parent=['theta_1'],
                                   parent_dim=[p_x_dim],
                                   nout=target_dim,
                                   unit='linear',
                                   init_W=init_W,
                                   init_b=init_b)

    theta_sig1 = FullyConnectedLayer(name='theta_sig1',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_sig2 = FullyConnectedLayer(name='theta_sig2',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_sig3 = FullyConnectedLayer(name='theta_sig3',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_sig4 = FullyConnectedLayer(name='theta_sig4',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    theta_sig5 = FullyConnectedLayer(name='theta_sig5',
                                    parent=['theta_1'],
                                    parent_dim=[p_x_dim],
                                    nout=target_dim,
                                    unit='softplus',
                                    cons=1e-4,
                                    init_W=init_W,
                                    init_b=init_b_sig)

    coeff1 = FullyConnectedLayer(name='coeff1',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    coeff2 = FullyConnectedLayer(name='coeff2',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    coeff3 = FullyConnectedLayer(name='coeff3',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)
 
    coeff4 = FullyConnectedLayer(name='coeff4',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)

    coeff5 = FullyConnectedLayer(name='coeff5',
                                parent=['theta_1'],
                                parent_dim=[p_x_dim],
                                nout=k,
                                unit='softmax',
                                init_W=init_W,
                                init_b=init_b)"""

    #from experiment 18-05-31_18-48
    fmodel = open('disall.pkl', 'rb')
    mainloop = cPickle.load(fmodel)
    fmodel.close()

    #define layers
    rnn = mainloop.model.nodes[0]
    x_1 = mainloop.model.nodes[1]
    y_1 = mainloop.model.nodes[2]
    z_1 = mainloop.model.nodes[3]
    phi_1 = mainloop.model.nodes[4]
    phi_mu = mainloop.model.nodes[5]
    phi_sig = mainloop.model.nodes[6]
    prior_1 = mainloop.model.nodes[7]
    prior_mu = mainloop.model.nodes[8]
    prior_sig = mainloop.model.nodes[9]
    theta_1 = mainloop.model.nodes[10]
    theta_mu1 = mainloop.model.nodes[11]
    theta_sig1 = mainloop.model.nodes[12]
    coeff1 = mainloop.model.nodes[13]

    nodes = [
        rnn,
        x_1,
        y_1,
        z_1,  #dissag_pred,
        phi_1,
        phi_mu,
        phi_sig,
        prior_1,
        prior_mu,
        prior_sig,
        theta_1,
        theta_mu1,
        theta_sig1,
        coeff1
    ]

    params = mainloop.model.params

    dynamicOutput = [None, None, None, None, None, None, None, None]
    #dynamicOutput_val = [None, None, None, None, None, None,None,  None, None]
    if (y_dim > 1):
        theta_mu2 = mainloop.model.nodes[14]
        theta_sig2 = mainloop.model.nodes[15]
        coeff2 = mainloop.model.nodes[16]
        nodes = nodes + [theta_mu2, theta_sig2, coeff2]
        dynamicOutput = dynamicOutput + [None, None, None, None
                                         ]  #mu, sig, coef and pred
    if (y_dim > 2):
        theta_mu3 = mainloop.model.nodes[17]
        theta_sig3 = mainloop.model.nodes[18]
        coeff3 = mainloop.model.nodes[19]
        nodes = nodes + [theta_mu3, theta_sig3, coeff3]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 3):
        theta_mu4 = mainloop.model.nodes[20]
        theta_sig4 = mainloop.model.nodes[21]
        coeff4 = mainloop.model.nodes[22]
        nodes = nodes + [theta_mu4, theta_sig4, coeff4]
        dynamicOutput = dynamicOutput + [None, None, None, None]
    if (y_dim > 4):
        theta_mu5 = mainloop.model.nodes[23]
        theta_sig5 = mainloop.model.nodes[24]
        coeff5 = mainloop.model.nodes[25]
        nodes = nodes + [theta_mu5, theta_sig5, coeff5]
        dynamicOutput = dynamicOutput + [None, None, None, None]

    s_0 = rnn.get_init_state(batch_size)

    x_1_temp = x_1.fprop([x], params)
    y_1_temp = y_1.fprop([y], params)

    output_fn = [s_0] + dynamicOutput
    output_fn_val = [s_0] + dynamicOutput[2:]
    print(len(output_fn), len(output_fn_val))

    def inner_fn_test(x_t, s_tm1):

        prior_1_t = prior_1.fprop([x_t, s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(
            prior_mu_t, prior_sig_t
        )  #in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(
            theta_mu1_t, theta_sig1_t,
            coeff1_t)  #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1

        if (y_dim > 1):
            theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
            theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
            coeff2_t = coeff2.fprop([theta_1_t], params)
            y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
            y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1)
            tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t,
                                       y_pred2)

        if (y_dim > 2):
            theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
            theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
            coeff3_t = coeff3.fprop([theta_1_t], params)
            y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
            y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1)
            tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t,
                                       y_pred3)

        if (y_dim > 3):
            theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
            theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
            coeff4_t = coeff4.fprop([theta_1_t], params)
            y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
            y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1)
            tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t,
                                       y_pred4)

        if (y_dim > 4):
            theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
            theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
            coeff5_t = coeff5.fprop([theta_1_t], params)
            y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
            y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1)
            tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t,
                                       y_pred5)

        pred_1_t = y_1.fprop([y_pred1], params)
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return (s_t, ) + tupleMulti
        #corr_temp, binary_temp

    (restResults_val, updates_val) = theano.scan(fn=inner_fn_test,
                                                 sequences=[x_1_temp],
                                                 outputs_info=output_fn_val)

    for k, v in updates_val.iteritems():
        k.default_update = v
    """def inner_fn(x_t, y_t, s_tm1):

        phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params)
        phi_mu_t = phi_mu.fprop([phi_1_t], params)
        phi_sig_t = phi_sig.fprop([phi_1_t], params)

        prior_1_t = prior_1.fprop([x_t,s_tm1], params)
        prior_mu_t = prior_mu.fprop([prior_1_t], params)
        prior_sig_t = prior_sig.fprop([prior_1_t], params)

        z_t = Gaussian_sample(phi_mu_t, phi_sig_t)#in the original code it is gaussian. GMM is for the generation
        z_1_t = z_1.fprop([z_t], params)

        theta_1_t = theta_1.fprop([z_1_t, s_tm1], params)
        theta_mu1_t = theta_mu1.fprop([theta_1_t], params)
        theta_sig1_t = theta_sig1.fprop([theta_1_t], params)
        coeff1_t = coeff1.fprop([theta_1_t], params)

        y_pred1 = GMM_sampleY(theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t)

        tupleMulti = phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1

        if (y_dim>1):
          theta_mu2_t = theta_mu2.fprop([theta_1_t], params)
          theta_sig2_t = theta_sig2.fprop([theta_1_t], params)
          coeff2_t = coeff2.fprop([theta_1_t], params)
          y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t)
          tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2)

        if (y_dim>2):
          theta_mu3_t = theta_mu3.fprop([theta_1_t], params)
          theta_sig3_t = theta_sig3.fprop([theta_1_t], params)
          coeff3_t = coeff3.fprop([theta_1_t], params)
          y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t)
          tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3)

        if (y_dim>3):
          theta_mu4_t = theta_mu4.fprop([theta_1_t], params)
          theta_sig4_t = theta_sig4.fprop([theta_1_t], params)
          coeff4_t = coeff4.fprop([theta_1_t], params)
          y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t)
          tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4)

        if (y_dim>4):
          theta_mu5_t = theta_mu5.fprop([theta_1_t], params)
          theta_sig5_t = theta_sig5.fprop([theta_1_t], params)
          coeff5_t = coeff5.fprop([theta_1_t], params)
          y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t)
          tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5)
        
        #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 )
        s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params)
        #y_pred = dissag_pred.fprop([s_t], params)

        return (s_t,)+tupleMulti
        #corr_temp, binary_temp
    (restResults, updates) = theano.scan(fn=inner_fn, sequences=[x_1_temp, y_1_temp],
                            outputs_info=output_fn )
    '''
    ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, 
      theta_mu1_temp, theta_sig1_temp, coeff1_temp, theta_mu2_temp, theta_sig2_temp, coeff2_temp, 
      theta_mu3_temp, theta_sig3_temp, coeff3_temp, theta_mu4_temp, theta_sig4_temp, coeff4_temp,
      theta_mu5_temp, theta_sig5_temp, coeff5_temp, 
      y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp, y_pred5_temp), updates) =\
        theano.scan(fn=inner_fn,
                    sequences=[x_1_temp, y_1_temp],
                    outputs_info=[s_0,  None, None, None, None, None, None, None, None,None,  None, None, 
                                  None, None, None, None, None, None, None, None,
                                  None, None, None, None, None, None, None, None])
    '''
    s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp = restResults[:9]
    restResults = restResults[9:]

    for k, v in updates.iteritems():
        k.default_update = v

    #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0


    theta_mu1_temp.name = 'theta_mu1'
    theta_sig1_temp.name = 'theta_sig1'
    coeff1_temp.name = 'coeff1'
    y_pred1_temp.name = 'disaggregation1'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1 = T.mean((y_pred1_temp - y[:,:,0].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    mae1 = T.mean( T.abs_(y_pred1_temp - y[:,:,0].reshape((y.shape[0],y.shape[1],1))) )
    mse1.name = 'mse1'
    mae1.name = 'mae1'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp)"""

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0] * x_shape[1], -1))
    y_in = y.reshape((y_shape[0] * y_shape[1], -1))
    """theta_mu1_in = theta_mu1_temp.reshape((x_shape[0]*x_shape[1], -1))
    theta_sig1_in = theta_sig1_temp.reshape((x_shape[0]*x_shape[1], -1))
    coeff1_in = coeff1_temp.reshape((x_shape[0]*x_shape[1], -1))


    ddoutMSEA = []
    ddoutYpreds = [y_pred1_temp]
    
    indexSepDynamic = 6 # plus 1 for TOTAMSE

    
    #totaMSE = T.copy(mse1)
    mse2 = T.zeros((1,))
    mae2 = T.zeros((1,))
    mse3 = T.zeros((1,))
    mae3 = T.zeros((1,))
    mse4 = T.zeros((1,))
    mae4 = T.zeros((1,))
    mse5 = T.zeros((1,))
    mae5 = T.zeros((1,))
    
    if (y_dim>1):
      theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp = restResults[:4]
      restResults = restResults[4:]
      theta_mu2_temp.name = 'theta_mu2'
      theta_sig2_temp.name = 'theta_sig2'
      coeff2_temp.name = 'coeff2'
      y_pred2_temp.name = 'disaggregation2'
      mse2 = T.mean((y_pred2_temp - y[:,:,1].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      mae2 = T.mean( T.abs_(y_pred2_temp - y[:,:,1].reshape((y.shape[0],y.shape[1],1))) )
      mse2.name = 'mse2'
      mae2.name = 'mae2'

      theta_mu2_in = theta_mu2_temp.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig2_in = theta_sig2_temp.reshape((x_shape[0]*x_shape[1], -1))
      coeff2_in = coeff2_temp.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM = theta_mu2_in, theta_sig2_in, coeff2_in

      ddoutMSEA = ddoutMSEA + [mse2, mae2]
      ddoutYpreds = ddoutYpreds + [y_pred2_temp]
      #totaMSE+=mse2
      indexSepDynamic +=2

    if (y_dim>2):
      theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp = restResults[:4]
      restResults = restResults[4:]
      theta_mu3_temp.name = 'theta_mu3'
      theta_sig3_temp.name = 'theta_sig3'
      coeff3_temp.name = 'coeff3'
      y_pred3_temp.name = 'disaggregation3'
      mse3 = T.mean((y_pred3_temp - y[:,:,2].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      mae3 = T.mean( T.abs_(y_pred3_temp - y[:,:,2].reshape((y.shape[0],y.shape[1],1))) )
      mse3.name = 'mse3'
      mae3.name = 'mae3'

      theta_mu3_in = theta_mu3_temp.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig3_in = theta_sig3_temp.reshape((x_shape[0]*x_shape[1], -1))
      coeff3_in = coeff3_temp.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM = argsGMM + (theta_mu3_in, theta_sig3_in, coeff3_in)
      ddoutMSEA = ddoutMSEA + [mse3, mae3]
      ddoutYpreds = ddoutYpreds + [y_pred3_temp]
      #totaMSE+=mse3
      indexSepDynamic +=2

    if (y_dim>3):
      theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp = restResults[:4]
      restResults = restResults[4:]
      theta_mu4_temp.name = 'theta_mu4'
      theta_sig4_temp.name = 'theta_sig4'
      coeff4_temp.name = 'coeff4'
      y_pred4_temp.name = 'disaggregation4'
      mse4 = T.mean((y_pred4_temp - y[:,:,3].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      mae4 = T.mean( T.abs_(y_pred4_temp - y[:,:,3].reshape((y.shape[0],y.shape[1],1))) )
      mse4.name = 'mse4'
      mae4.name = 'mae4'

      theta_mu4_in = theta_mu4_temp.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig4_in = theta_sig4_temp.reshape((x_shape[0]*x_shape[1], -1))
      coeff4_in = coeff4_temp.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM = argsGMM + (theta_mu4_in, theta_sig4_in, coeff4_in)
      ddoutMSEA = ddoutMSEA + [mse4, mae4]
      ddoutYpreds = ddoutYpreds + [y_pred4_temp]
      #totaMSE+=mse4
      indexSepDynamic +=2

    if (y_dim>4):
      theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred5_temp = restResults[:4]
      restResults = restResults[4:]
      theta_mu5_temp.name = 'theta_mu5'
      theta_sig5_temp.name = 'theta_sig5'
      coeff5_temp.name = 'coeff5'
      y_pred5_temp.name = 'disaggregation5'
      mse5 = T.mean((y_pred5_temp - y[:,:,4].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
      mae5 = T.mean( T.abs_(y_pred5_temp - y[:,:,4].reshape((y.shape[0],y.shape[1],1))) )
      mse5.name = 'mse5'
      mae5.name = 'mae5'

      theta_mu5_in = theta_mu5_temp.reshape((x_shape[0]*x_shape[1], -1))
      theta_sig5_in = theta_sig5_temp.reshape((x_shape[0]*x_shape[1], -1))
      coeff5_in = coeff5_temp.reshape((x_shape[0]*x_shape[1], -1))

      argsGMM = argsGMM + (theta_mu5_in, theta_sig5_in, coeff5_in)
      ddoutMSEA = ddoutMSEA + [mse5, mae5]
      ddoutYpreds = ddoutYpreds + [y_pred5_temp]
      #totaMSE+=mse5
      indexSepDynamic +=2

    totaMSE = (mse1+mse2+mse3+mse4+mse5)/y_dim
    totaMSE.name = 'mse'

    kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp)

    x_shape = x.shape
    y_shape = y.shape
    x_in = x.reshape((x_shape[0]*x_shape[1], -1))
    y_in = y.reshape((y_shape[0]*y_shape[1], -1))
    

    recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, *argsGMM)# BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon = recon.reshape((x_shape[0], x_shape[1]))
    recon.name = 'gmm_out'"""
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    """recon_term = recon.sum(axis=0).mean()
    recon_term = recon.sum(axis=0).mean()
    recon_term.name = 'recon_term'

    #kl_temp = kl_temp * mask
    
    kl_term = kl_temp.sum(axis=0).mean()
    kl_term.name = 'kl_term'

    #nll_upper_bound_0 = recon_term + kl_term
    #nll_upper_bound_0.name = 'nll_upper_bound_0'
    if (flgMSE==1):
      nll_upper_bound =  recon_term + kl_term + totaMSE
    else:
      nll_upper_bound =  recon_term + kl_term
    nll_upper_bound.name = 'nll_upper_bound'"""

    ######################## TEST (GENERATION) TIME
    s_temp_val, prior_mu_temp_val, prior_sig_temp_val, \
      theta_mu1_temp_val, theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val = restResults_val[:7]
    restResults_val = restResults_val[7:]

    s_temp_val = concatenate(
        [s_0[None, :, :], s_temp_val[:-1]], axis=0
    )  # seems like this is for creating an additional dimension to s_0

    theta_mu1_temp_val.name = 'theta_mu1_val'
    theta_sig1_temp_val.name = 'theta_sig1_val'
    coeff1_temp_val.name = 'coeff1_val'
    y_pred1_temp_val.name = 'disaggregation1_val'

    #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1)
    mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape(
        (y.shape[0], y.shape[1],
         1)))**2)  # As axis = None is calculated for all
    mae1_val = T.mean(
        T.abs_(y_pred1_temp_val -
               y[:, :, 0].reshape((y.shape[0], y.shape[1], 1))))

    #NEURALNILM #(sum_output - sum_target) / max(sum_output, sum_target))
    totPred = T.sum(y_pred1_temp_val)
    totReal = T.sum(y[:, :, 0])
    relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal)
    propAssigned1_val = 1 - T.sum(
        T.abs_(y_pred1_temp_val - y[:, :, 0].reshape(
            (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

    #y_unNormalize = (y[:,:,0] * reader.stdTraining[0]) + reader.meanTraining[0]
    #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTraining[0]) + reader.meanTraining[0]

    #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all
    #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1))))
    mse1_val.name = 'mse1_val'
    mae1_val.name = 'mae1_val'

    theta_mu1_in_val = theta_mu1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    theta_sig1_in_val = theta_sig1_temp_val.reshape(
        (x_shape[0] * x_shape[1], -1))
    coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1))

    ddoutMSEA_val = []
    ddoutYpreds_val = [y_pred1_temp_val]
    totaMSE_val = mse1_val
    totaMAE_val = mae1_val
    indexSepDynamic_val = 5
    prediction_val = y_pred1_temp_val

    #Initializing values of mse and mae
    mse2_val = T.zeros((1, ))
    mae2_val = T.zeros((1, ))
    mse3_val = T.zeros((1, ))
    mae3_val = T.zeros((1, ))
    mse4_val = T.zeros((1, ))
    mae4_val = T.zeros((1, ))
    mse5_val = T.zeros((1, ))
    mae5_val = T.zeros((1, ))

    relErr2_val = T.zeros((1, ))
    relErr3_val = T.zeros((1, ))
    relErr4_val = T.zeros((1, ))
    relErr5_val = T.zeros((1, ))
    propAssigned2_val = T.zeros((1, ))
    propAssigned3_val = T.zeros((1, ))
    propAssigned4_val = T.zeros((1, ))
    propAssigned5_val = T.zeros((1, ))

    if (y_dim > 1):
        theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val, y_pred2_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu2_temp_val.name = 'theta_mu2_val'
        theta_sig2_temp_val.name = 'theta_sig2_val'
        coeff2_temp_val.name = 'coeff2_val'
        y_pred2_temp_val.name = 'disaggregation2_val'
        mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae2_val = T.mean(
            T.abs_(y_pred2_temp_val -
                   y[:, :, 1].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred2_temp_val)
        totReal = T.sum(y[:, :, 1])
        relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned2_val = 1 - T.sum(
            T.abs_(y_pred2_temp_val - y[:, :, 1].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        mse2_val.name = 'mse2_val'
        mae2_val.name = 'mae2_val'

        theta_mu2_in_val = theta_mu2_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig2_in_val = theta_sig2_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val

        ddoutMSEA_val = ddoutMSEA_val + [mse2_val, mae2_val]
        ddoutYpreds_val = ddoutYpreds_val + [y_pred2_temp_val]
        totaMSE_val += mse2_val
        totaMAE_val += mae2_val
        indexSepDynamic_val += 2

        prediction_val = T.concatenate([prediction_val, y_pred2_temp_val],
                                       axis=2)

    if (y_dim > 2):
        theta_mu3_temp_val, theta_sig3_temp_val, coeff3_temp_val, y_pred3_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu3_temp_val.name = 'theta_mu3_val'
        theta_sig3_temp_val.name = 'theta_sig3_val'
        coeff3_temp_val.name = 'coeff3_val'
        y_pred3_temp_val.name = 'disaggregation3_val'
        mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae3_val = T.mean(
            T.abs_(y_pred3_temp_val -
                   y[:, :, 2].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred3_temp_val)
        totReal = T.sum(y[:, :, 2])
        relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned3_val = 1 - T.sum(
            T.abs_(y_pred3_temp_val - y[:, :, 2].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        mse3_val.name = 'mse3_val'
        mae3_val.name = 'mae3_val'

        theta_mu3_in_val = theta_mu3_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig3_in_val = theta_sig3_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu3_in_val, theta_sig3_in_val,
                                     coeff3_in_val)
        ddoutMSEA_val = ddoutMSEA_val + [mse3_val, mae3_val]
        ddoutYpreds_val = ddoutYpreds_val + [y_pred3_temp_val]
        totaMSE_val += mse3_val
        totaMAE_val += mae3_val
        indexSepDynamic_val += 2

        prediction_val = T.concatenate([prediction_val, y_pred3_temp_val],
                                       axis=2)

    if (y_dim > 3):
        theta_mu4_temp_val, theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu4_temp_val.name = 'theta_mu4_val'
        theta_sig4_temp_val.name = 'theta_sig4_val'
        coeff4_temp_val.name = 'coeff4_val'
        y_pred4_temp_val.name = 'disaggregation4_val'
        mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae4_val = T.mean(
            T.abs_(y_pred4_temp_val -
                   y[:, :, 3].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred4_temp_val)
        totReal = T.sum(y[:, :, 3])
        relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned4_val = 1 - T.sum(
            T.abs_(y_pred4_temp_val - y[:, :, 3].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        mse4_val.name = 'mse4_val'
        mae4_val.name = 'mae4_val'

        theta_mu4_in_val = theta_mu4_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig4_in_val = theta_sig4_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu4_in_val, theta_sig4_in_val,
                                     coeff4_in_val)
        ddoutMSEA_val = ddoutMSEA_val + [mse4_val, mae4_val]
        ddoutYpreds_val = ddoutYpreds_val + [y_pred4_temp_val]
        totaMSE_val += mse4_val
        totaMAE_val += mae4_val
        indexSepDynamic_val += 2
        prediction_val = T.concatenate([prediction_val, y_pred4_temp_val],
                                       axis=2)

    if (y_dim > 4):
        theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val, y_pred5_temp_val = restResults_val[:
                                                                                                     4]
        restResults_val = restResults_val[4:]
        theta_mu5_temp_val.name = 'theta_mu5_val'
        theta_sig5_temp_val.name = 'theta_sig5_val'
        coeff5_temp_val.name = 'coeff5_val'
        y_pred5_temp_val.name = 'disaggregation5_val'
        mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape(
            (y.shape[0], y.shape[1],
             1)))**2)  # As axis = None is calculated for all
        mae5_val = T.mean(
            T.abs_(y_pred5_temp_val -
                   y[:, :, 4].reshape((y.shape[0], y.shape[1], 1))))

        totPred = T.sum(y_pred5_temp_val)
        totReal = T.sum(y[:, :, 4])
        relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal)
        propAssigned5_val = 1 - T.sum(
            T.abs_(y_pred5_temp_val - y[:, :, 4].reshape(
                (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x))

        mse5_val.name = 'mse5_val'
        mae5_val.name = 'mae5_val'

        theta_mu5_in_val = theta_mu5_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        theta_sig5_in_val = theta_sig5_temp_val.reshape(
            (x_shape[0] * x_shape[1], -1))
        coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1))

        argsGMM_val = argsGMM_val + (theta_mu5_in_val, theta_sig5_in_val,
                                     coeff5_in_val)
        ddoutMSEA_val = ddoutMSEA_val + [mse5_val, mae5_val]
        ddoutYpreds_val = ddoutYpreds_val + [y_pred5_temp_val]
        totaMSE_val += mse5_val
        totaMAE_val += mae5_val
        indexSepDynamic_val += 2
        prediction_val = T.concatenate([prediction_val, y_pred5_temp_val],
                                       axis=2)

    recon_val = GMMdisagMulti(
        y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val,
        *argsGMM_val
    )  # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in)
    recon_val = recon_val.reshape((x_shape[0], x_shape[1]))
    recon_val.name = 'gmm_out'
    totaMSE_val = totaMSE_val / y_dim
    totaMAE_val = totaMAE_val / y_dim
    '''
    recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in)
    recon5 = recon.reshape((x_shape[0], x_shape[1]))    
    '''
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val = recon_val.sum(axis=0).mean()
    recon_term_val.name = 'recon_term'

    ######################

    model.inputs = [x, mask, y, y_mask, scheduleSamplingMask]
    model.params = params
    model.nodes = nodes

    optimizer = Adam(lr=lr)
    header = "epoch,log,kl,nll_upper_bound,mse,mae\n"
    extension = [
        GradientClipping(batch_size=batch_size),
        EpochCount(epoch, save_path, header),
        Monitoring(
            freq=monitoring_freq,
            #ddout=[nll_upper_bound, recon_term, totaMSE, kl_term, mse1, mae1]+ddoutMSEA+ddoutYpreds ,
            #indexSep=indexSepDynamic,
            indexDDoutPlot=[13],  # adding indexes of ddout for the plotting
            #, (6,y_pred_temp)
            instancesPlot=instancesPlot,  #0-150
            data=[Iterator(valid_data, batch_size)],
            savedFolder=save_path),
        Picklize(freq=monitoring_freq, path=save_path),
        EarlyStopping(freq=monitoring_freq,
                      path=save_path,
                      channel=channel_name),
        WeightNorm()
    ]

    lr_iterations = {0: lr}
    '''
    mainloop = Training(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[nll_upper_bound],
        n_steps = n_steps,
        extension=extension,
        lr_iterations=lr_iterations
    )
    '''
    """mainloop.restore(
        name=pkl_name,
        data=Iterator(train_data, batch_size),
        model=model,
        optimizer=optimizer,
        cost=nll_upper_bound,
        outputs=[nll_upper_bound],
        n_steps = n_steps,
        extension=extension,
        lr_iterations=lr_iterations
    )

    mainloop.run()"""

    data = Iterator(test_data, batch_size)

    test_fn = theano.function(
        inputs=[x, y],  #[x, y],
        #givens={x:Xtest},
        #on_unused_input='ignore',
        #z=( ,200,1)
        allow_input_downcast=True,
        outputs=[
            prediction_val, recon_term_val, totaMSE_val, totaMAE_val, mse1_val,
            mse2_val, mse3_val, mse4_val, mse5_val, mae1_val, mae2_val,
            mae3_val, mae4_val, mae5_val, relErr1_val, relErr2_val,
            relErr3_val, relErr4_val, relErr5_val, propAssigned1_val,
            propAssigned2_val, propAssigned3_val, propAssigned4_val,
            propAssigned5_val
        ]  #prediction_val, mse_val, mae_val
        ,
        updates=
        updates_val  #, allow_input_downcast=True, on_unused_input='ignore'
    )
    testOutput = []
    testMetrics2 = []
    numBatchTest = 0
    for batch in data:
        outputGeneration = test_fn(batch[0], batch[2])
        testOutput.append(outputGeneration[1:14])
        testMetrics2.append(outputGeneration[14:])
        #{0:[4,20], 2:[5,10]}
        #if (numBatchTest==0):

        plt.figure(1)
        plt.plot(np.transpose(outputGeneration[0],
                              [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(2)
        plt.plot(np.transpose(batch[2], [1, 0, 2])[4])
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest))
        plt.clf()

        plt.figure(3)
        plt.plot(np.transpose(batch[0], [1, 0, 2])[4])  #ORIGINAL 1,0,2
        plt.savefig(save_path +
                    "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest))
        plt.clf()
        numBatchTest += 1

    testOutput = np.asarray(testOutput)
    testMetrics2 = np.asarray(testMetrics2)
    print(testOutput.shape)
    print(testMetrics2.shape)
    recon_test = testOutput[:, 0].mean()
    mse_test = testOutput[:, 1].mean()
    mae_test = testOutput[:, 2].mean()
    mse1_test = testOutput[:, 3].mean()
    mae1_test = testOutput[:, 8].mean()
    mse2_test = testOutput[:, 4].mean()
    mae2_test = testOutput[:, 9].mean()
    mse3_test = testOutput[:, 5].mean()
    mae3_test = testOutput[:, 10].mean()
    mse4_test = testOutput[:, 6].mean()
    mae4_test = testOutput[:, 11].mean()
    mse5_test = testOutput[:, 7].mean()
    mae5_test = testOutput[:, 12].mean()

    relErr1_test = testMetrics2[:, 0].mean()
    relErr2_test = testMetrics2[:, 1].mean()
    relErr3_test = testMetrics2[:, 2].mean()
    relErr4_test = testMetrics2[:, 3].mean()
    relErr5_test = testMetrics2[:, 4].mean()

    propAssigned1_test = testMetrics2[:, 5].mean()
    propAssigned2_test = testMetrics2[:, 6].mean()
    propAssigned3_test = testMetrics2[:, 7].mean()
    propAssigned4_test = testMetrics2[:, 8].mean()
    propAssigned5_test = testMetrics2[:, 9].mean()

    fLog = open(save_path + '/output.csv', 'w')
    fLog.write(str(lr_iterations) + "\n")
    fLog.write(str(appliances) + "\n")
    fLog.write(str(windows) + "\n")
    fLog.write(
        "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test,mseTest,maeTest\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n\n".format(
        recon_test, mse1_test, mse2_test, mse3_test, mse4_test, mse5_test,
        mae1_test, mae2_test, mae3_test, mae4_test, mae5_test, mse_test,
        mae_test))
    fLog.write(
        "relErr1,relErr2,relErr3,relErr4,relErr5,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n"
    )
    fLog.write("{},{},{},{},{},{},{},{},{},{}\n".format(
        relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test,
        propAssigned1_test, propAssigned2_test, propAssigned3_test,
        propAssigned4_test, propAssigned5_test))

    fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n")
    fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim,
                                            y2s_dim, z2s_dim))
    fLog.write(
        "epoch,log,kl,mse1,mse2,mse3,mse4,mse5,mae1,mae2,mae3,mae4,mae5\n")
    for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']):
        d, e, f, g, j, k, l, m = 0, 0, 0, 0, 0, 0, 0, 0
        ep = mainloop.trainlog.monitor['epoch'][i]
        a = mainloop.trainlog.monitor['recon_term'][i]
        b = mainloop.trainlog.monitor['kl_term'][i]
        c = mainloop.trainlog.monitor['mse1'][i]
        h = mainloop.trainlog.monitor['mae1'][i]

        if (y_dim > 1):
            d = mainloop.trainlog.monitor['mse2'][i]
            j = mainloop.trainlog.monitor['mae2'][i]
        if (y_dim > 2):
            e = mainloop.trainlog.monitor['mse3'][i]
            k = mainloop.trainlog.monitor['mae3'][i]
        if (y_dim > 3):
            f = mainloop.trainlog.monitor['mse4'][i]
            l = mainloop.trainlog.monitor['mae4'][i]
        if (y_dim > 4):
            g = mainloop.trainlog.monitor['mse5'][i]
            m = mainloop.trainlog.monitor['mae5'][i]
        fLog.write(
            "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n"
            .format(ep, a, b, c, d, e, f, g, h, j, k, l, m))

    f = open(save_path + '/outputRealGeneration.pkl', 'wb')
    pickle.dump(outputGeneration, f, -1)
    f.close()