def _recurrence(v_h_, x_h_, v_t_, x_t_, a_t_, is_aggressive): state = tt.concatenate([v_h_, x_h_, tt.flatten(v_t_), tt.flatten(x_t_), tt.flatten(a_t_)]) h0 = tt.dot(state, self.W_a_0) + self.b_a_0 relu0 = tt.nnet.relu(h0) h1 = tt.dot(relu0, self.W_a_1) + self.b_a_1 relu1 = tt.nnet.relu(h1) h2 = tt.dot(relu1, self.W_a_2) + self.b_a_2 relu2 = tt.nnet.relu(h2) a = tt.dot(relu2, self.W_a_c) v_h, x_h, v_t, x_t, a_t, cost_transition = _step_state(v_h_, x_h_, v_t_, x_t_, a_t_, a, is_aggressive) # cost: # 0. smooth acceleration policy cost_accel = tt.abs_(a) # 1. forcing the host to move forward (until the top point of the roundabout) cost_progress = tt.nnet.relu(0.5*self.two_pi_r-x_h) # 2. keeping distance from close vehicles x_abs_diffs = tt.abs_(x_h - x_t) cost_accident = tt.mean(3*tt.nnet.relu( self.require_distance-x_abs_diffs )) * (x_h > - 0.5*self.host_length) #tt.nnet.sigmoid(x_h + 0.5*self.host_length) cost = self.alpha_accel * cost_accel + self.alpha_progress * cost_progress + self.alpha_accident * cost_accident return (v_h, x_h, v_t, x_t, a_t, cost, cost_transition), t.scan_module.until(x_h[0]>=0.45*self.two_pi_r)
def batch_multicrop(bboxes, frame): att_col = img_col att_row = img_row _cx = (bboxes[:, :, 1] + bboxes[:, :, 3]) / 2; cx = (_cx + 1) / 2. * img_col _cy = (bboxes[:, :, 0] + bboxes[:, :, 2]) / 2; cy = (_cy + 1) / 2. * img_row _w = TT.abs_(bboxes[:, :, 3] - bboxes[:, :, 1]) / 2; w = _w * img_col _h = TT.abs_(bboxes[:, :, 2] - bboxes[:, :, 0]) / 2; h = _h * img_row dx = w / (img_col - 1) dy = h / (img_row - 1) mx = cx.dimshuffle(0, 1, 'x') + dx.dimshuffle(0, 1, 'x') * (TT.arange(att_col, dtype=T.config.floatX).dimshuffle('x', 'x', 0) - (att_col - 1) / 2.) my = cy.dimshuffle(0, 1, 'x') + dy.dimshuffle(0, 1, 'x') * (TT.arange(att_row, dtype=T.config.floatX).dimshuffle('x', 'x', 0) - (att_row - 1) / 2.) a = TT.arange(img_col, dtype=T.config.floatX) b = TT.arange(img_row, dtype=T.config.floatX) # (batch_size, nr_samples, channels, frame_size, att_size) ax = TT.maximum(0, 1 - TT.abs_(a.dimshuffle('x', 'x', 'x', 0, 'x') - mx.dimshuffle(0, 1, 'x', 'x', 2))) by = TT.maximum(0, 1 - TT.abs_(b.dimshuffle('x', 'x', 'x', 0, 'x') - my.dimshuffle(0, 1, 'x', 'x', 2))) def __batch_multicrop_dot(a, b): return (a.dimshuffle(0, 1, 2, 3, 4, 'x') * b.dimshuffle(0, 1, 2, 'x', 3, 4)).sum(axis=4) crop = __batch_multicrop_dot(by.dimshuffle(0, 1, 2, 4, 3), __batch_multicrop_dot(frame.dimshuffle(0, 'x', 1, 2, 3), ax)) return crop
def smoothL1(x): #x is vector of scalars lto = T.abs_(x)<1 gteo = T.abs_(x)>=1 new_x = T.set_subtensor(x[lto.nonzero()],0.5 * T.square(x[lto.nonzero()])) new_x = T.set_subtensor(new_x[gteo.nonzero()], T.abs_(new_x[gteo.nonzero()]) - 0.5) return new_x
def get_cost_updates(self, persistant, k=2, lr=0.01, l1=0., l2=0.01): chain_start = persistant V_burn_in, updates = theano.scan(fn=self.gibbs_VhV, outputs_info=[chain_start], n_steps=k, name='MultiRTRBM Gibbs Smapler') chain_end = V_burn_in[-1] # Contrastive Divergence (Variational method Cost)/ Approxiamted # likelihood L1 = T.sum(T.abs_(self.W)) + T.sum(T.abs_(self.Wt)) L2 = T.sum(self.W**2) + T.sum(self.Wt**2) KL_diff = T.mean(self.free_energy_RTRBM(self.input) - self.free_energy_RTRBM(chain_end)) +\ T.cast(l1, theano.config.floatX) * L1 + \ T.cast(l2, theano.config.floatX) * L2 self.gparams = T.grad(KL_diff, self.params, consider_constant=[chain_end]) for param, gparam in zip(self.params, self.gparams): if param in [self.W, self.Wt]: updates[param] = param - 0.0001 * gparam else: updates[param] = param - lr * gparam cost, updates = self.get_pseudo_likelihood_cost(updates) return cost, updates
def call(self, X): if type(X) is not list or len(X) != 2: raise Exception("SquareAttention must be called on a list of two tensors. Got: " + str(X)) frame, position = X[0], X[1] # Reshaping the input to exclude the time dimension frameShape = K.shape(frame) positionShape = K.shape(position) (chans, height, width) = frameShape[-3:] targetDim = positionShape[-1] frame = K.reshape(frame, (-1, chans, height, width)) position = K.reshape(position, (-1, ) + (targetDim, )) # Applying the attention hw = THT.abs_(position[:, 2] - position[:, 0]) * self.scale / 2.0 hh = THT.abs_(position[:, 3] - position[:, 1]) * self.scale / 2.0 position = THT.maximum(THT.set_subtensor(position[:, 0], position[:, 0] - hw), -1.0) position = THT.minimum(THT.set_subtensor(position[:, 2], position[:, 2] + hw), 1.0) position = THT.maximum(THT.set_subtensor(position[:, 1], position[:, 1] - hh), -1.0) position = THT.minimum(THT.set_subtensor(position[:, 3], position[:, 3] + hh), 1.0) rX = Data.linspace(-1.0, 1.0, width) rY = Data.linspace(-1.0, 1.0, height) FX = THT.gt(rX, position[:,0].dimshuffle(0,'x')) * THT.le(rX, position[:,2].dimshuffle(0,'x')) FY = THT.gt(rY, position[:,1].dimshuffle(0,'x')) * THT.le(rY, position[:,3].dimshuffle(0,'x')) m = FY.dimshuffle(0, 1, 'x') * FX.dimshuffle(0, 'x', 1) m = m + self.alpha - THT.gt(m, 0.) * self.alpha frame = frame * m.dimshuffle(0, 'x', 1, 2) # Reshaping the frame to include time dimension output = K.reshape(frame, frameShape) return output
def pass_fn(*inputs): ''' Function for scan op. Has to work with variable number of arguments. Input layout: diff, message[message_order[0]], ..., message[message_order[N], initial_potential[0], ..., initial_potential[M] ''' input_messages = {} ''' Quick creation of message potential tables by using a shallow copy of existing potential tables''' for i,midx in enumerate(message_order): input_messages[midx] = first_messages[midx].replace_tensor(inputs[i+1]) off = 1+len(message_order) # offset into input for the initial potentials ipotentials = [] ''' Create initial potentials from passed inputs''' for i, pot in enumerate(mpstate.initial_potentials): ipotentials.append(pot.replace_tensor(inputs[off+i])) ''' Pass messages and calculate next set of messages ''' (used_message_order, next_messages) = mpstate.pass_messages(input_messages=input_messages, initial_potentials=ipotentials) if (convergence_threshold>=0.0): ''' Calculate absolute difference between last set of differences and current set for convergence diagnostics''' diff = T.sum( T.abs_(next_messages[used_message_order[0]].pt_tensor.flatten() - input_messages[used_message_order[0]].pt_tensor.flatten())) for i in range(1, len(used_message_order)): diff += T.sum( T.abs_(next_messages[used_message_order[i]].pt_tensor.flatten() - input_messages[used_message_order[i]].pt_tensor.flatten())) ''' Create result which conforms to the start of the input layout''' resvalues = [diff] + [next_messages[midx].pt_tensor for midx in message_order] ''' Return updated values plus a convergence criterion''' return resvalues, theano.scan_module.until(diff<=convergence_threshold) else: diff = convergence_criterion resvalues = [diff] + [next_messages[midx].pt_tensor for midx in message_order] return resvalues
def get_cost_updates(self, x, W, W_prime, b, b_prime, corruption_level, learning_rate, l2reg=0., l1reg=0.): """ This function computes the cost and the updates for one trainng step of the dA """ self.x = x self.W = W self.W_prime = W_prime self.b = b self.b_prime = b_prime self.params = [self.W, self.W_prime, self.b, self.b_prime] if corruption_level == None: tilde_x = self.x else: tilde_x = self.get_corrupted_input(self.x, corruption_level) y = self.get_hidden_values( tilde_x) z = self.get_reconstructed_input(y) # note : we sum over the size of a datapoint; if we are using minibatches, # L will be a vector, with one entry per example in minibatch XE = self.x * T.log(z) + (1 - self.x) * T.log(1-z) cost = -T.mean(T.sum(XE, axis=1),axis=0) if l2reg != 0.: cost += l2reg * (T.mean(T.sum(self.W*self.W,1),0) + T.mean(T.sum(self.W_prime*self.W_prime,1),0)) if l1reg != 0.: cost += l1reg * (T.mean(T.sum(T.abs_(y),1),0) + T.mean(T.sum(T.abs_(y),1),0)) # compute the gradients of the cost of the `dA` with respect # to its parameters gparams = T.grad(cost, self.params) # # generate the list of updates # updates = {} # for param, gparam in zip(self.params, gparams): # updates[param] = param - learning_rate*gparam updates = [-learning_rate*gparam for gparam in gparams] return (cost, updates)
def prepareTraining(self): ''' Prepares the relevant functions (details on neural_net_creator's prepareTraining) ''' #loss objective to minimize self.prediction = lasagne.layers.get_output(self.network) self.prediction=self.prediction[:,0] #self.loss = lasagne.objectives.categorical_crossentropy(self.prediction, self.target_var) #the loss is now the squared error in the output self.loss = lasagne.objectives.squared_error(self.prediction, self.target_var) self.loss = self.loss.mean() self.params = lasagne.layers.get_all_params(self.network, trainable=True) self.updates = lasagne.updates.nesterov_momentum( self.loss, self.params, learning_rate=0.01, momentum=0.9) self.test_prediction = lasagne.layers.get_output(self.network, deterministic=True) self.test_prediction=self.test_prediction[:,0] self.test_loss = lasagne.objectives.squared_error(self.test_prediction, self.target_var) self.test_loss = self.test_loss.mean() #the accuracy is now the number of sample that achieve a 0.01 precision (can be changed) self.test_acc = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.01) , dtype=theano.config.floatX) self.test_acc2 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.05) , dtype=theano.config.floatX) self.test_acc3 = T.mean(T.le(T.abs_(T.sub(self.test_prediction,self.target_var)),0.1) , dtype=theano.config.floatX) self.train_fn = theano.function([self.input_var, self.target_var], self.loss, updates=self.updates) self.val_fn = theano.function([self.input_var, self.target_var], [self.test_loss,self.test_acc,self.test_acc2,self.test_acc3]) self.use = theano.function([self.input_var],[self.test_prediction])
def criteria(self): F = T.dot(self.w, self.X) Fs = T.sqrt(F**2 + 1e-8) L2Fs = (Fs**2).sum(axis=[1]) L2Fs = T.sqrt(L2Fs) NFs = Fs/L2Fs.dimshuffle(0, 'x') L2Fn = (NFs**2).sum(axis=[0]) L2Fn = T.sqrt(L2Fn) self.Fhat = NFs/L2Fn.dimshuffle('x', 0) # self.Fhat = self.feedForward(self.dot()) F = T.sqrt(T.dot(self.gMat, T.sqr(self.Fhat))) # self.Fhat1)) # self.feedForward(self.dot() Fs = T.sqrt(F**2 + 1e-8) L2Fs = (Fs**2).sum(axis=[1]) L2Fs = T.sqrt(L2Fs) NFs = Fs/L2Fs.dimshuffle(0, 'x') L2Fn = (NFs**2).sum(axis=[0]) L2Fn = T.sqrt(L2Fn) self.gFhat = NFs/L2Fn.dimshuffle('x', 0) # from connections import distMat # x = distMat(self.w.shape[0].eval(), 20) # inhibition = T.dot(T.sqr(self.Fhat1.T), x) # inhibition = self.Fhat1 * inhibition.T return T.abs_(self.Fhat) + T.abs_(self.gFhat) #+ T.abs_(inhibition)
def crop_attention_bilinear(bbox, frame): att = bbox frame_col = img_col frame_row = img_row _cx = (att[1] + att[3]) / 2; cx = (_cx + 1) / 2. * frame_col _cy = (att[0] + att[2]) / 2; cy = (_cy + 1) / 2. * frame_row _w = TT.abs_(att[3] - att[1]) / 2; w = _w * frame_col _h = TT.abs_(att[2] - att[0]) / 2; h = _h * frame_row dx = w / (att_col - 1) dy = h / (att_row - 1) mx = cx + dx * (TT.arange(att_col, dtype=T.config.floatX) - (att_col - 1) / 2.) my = cy + dy * (TT.arange(att_row, dtype=T.config.floatX) - (att_row - 1) / 2.) a = TT.arange(frame_col, dtype=T.config.floatX) b = TT.arange(frame_row, dtype=T.config.floatX) ax = TT.maximum(0, 1 - TT.abs_(a.dimshuffle(0, 'x') - mx.dimshuffle('x', 0))) by = TT.maximum(0, 1 - TT.abs_(b.dimshuffle(0, 'x') - my.dimshuffle('x', 0))) bilin = TT.dot(by.T, TT.dot(frame, ax)) return bilin
def attention_gate(self, facts, memory, question): # TODO: for the first iteration question and memory are the same so # we can speedup the computation # facts is (num_batch * fact_length * memory_dim) # questions is (num_batch * memory_dim) # memory is (num_batch * memory_dim) # attention_gates must be (fact_length * nb_batch * 1) # Compute z (num_batch * fact_length * (7*memory_dim + 2)) # Dimshuffle facts to get a shape of # (fact_length * num_batch * memory_dim) facts = facts.dimshuffle(1, 0, 2) # Pad questions and memory to be of shape # (_ * num_batch * memory_dim) memory = T.shape_padleft(memory) question = T.shape_padleft(question) to_concatenate = list() to_concatenate.extend([facts, memory, question]) to_concatenate.extend([facts * question, facts * memory]) to_concatenate.extend([T.abs_(facts - question), T.abs_(facts - memory)]) # z = concatenate(to_concatenate, axis=2) # TODO: to be continued for the moment just return ones return T.ones((facts.shape[1], facts.shape[0], 1))
def theano_setup(self): W = T.dmatrix('W') b = T.dvector('b') c = T.dvector('c') x = T.dmatrix('x') s = T.dot(x, W) + c # h = 1 / (1 + T.exp(-s)) # h = T.nnet.sigmoid(s) h = T.tanh(s) # r = T.dot(h,W.T) + b # r = theano.printing.Print("r=")(2*T.tanh(T.dot(h,W.T) + b)) ract = T.dot(h,W.T) + b r = self.output_scaling_factor * T.tanh(ract) #g = function([W,b,c,x], h) #f = function([W,b,c,h], r) #fg = function([W,b,c,x], r) # Another variable to be able to call a function # with a noisy x and compare it to a reference x. y = T.dmatrix('y') all_losses = ((r - y)**2) loss = T.sum(all_losses) #loss = ((r - y)**2).sum() self.theano_encode_decode = function([W,b,c,x], r) self.theano_all_losses = function([W,b,c,x,y], [all_losses, T.abs_(s), T.abs_(ract)]) self.theano_gradients = function([W,b,c,x,y], [T.grad(loss, W), T.grad(loss, b), T.grad(loss, c)])
def power_pool_2d(x, ds, p=3, b=0): n_batch, n_ch, s0, s1 = x.shape d0, d1 = ds c = tt.ones((s0, s1)) # sum elements in regions y = tt.abs_(x[:, :, 0::d0, 0::d1])**p d = c[0::d0, 0::d1].copy() for i in range(0, d0): for j in range(0, d1): if i != 0 or j != 0: ni = (s0 - i - 1) / d0 + 1 nj = (s1 - j - 1) / d1 + 1 xij = tt.abs_(x[:, :, i::d0, j::d1])**p y = tt.inc_subtensor(y[:, :, :ni, :nj], xij) d = tt.inc_subtensor(d[:ni, :nj], c[i::d0, j::d1]) # divide by number of elements y /= d y += b**p # take root y = y**(1. / p) return y
def forward_jacobian_log_det(self, x): if x.ndim == 1: return tt.log(tt.abs_(self.diag_weights)).sum() elif x.ndim == 2: return x.shape[0] * tt.log(tt.abs_(self.diag_weights)).sum() else: raise ValueError('x must be one or two dimensional.')
def relevance_conv_a_b_abs(inputs, weights, out_relevances, a, b, bias=None): assert a is not None assert b is not None assert a - b == 1 weights_plus = weights * T.gt(weights, 0) weights_neg = weights * T.lt(weights, 0) plus_norm = conv2d(T.abs_(inputs), weights_plus) # stabilize, prevent division by 0 eps = 1e-4 plus_norm += T.eq(plus_norm, 0) * eps plus_rel_normed = out_relevances / plus_norm in_rel_plus = conv2d(plus_rel_normed, weights_plus.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full") in_rel_plus *= T.abs_(inputs) # minuses to get positive outputs, since will be subtracted # at end of function neg_norm = -conv2d(T.abs_(inputs), weights_neg) neg_norm += T.eq(neg_norm, 0) * eps neg_rel_normed = out_relevances / neg_norm in_rel_neg = -conv2d(neg_rel_normed, weights_neg.dimshuffle(1, 0, 2, 3)[:, :, ::-1, ::-1], border_mode="full") in_rel_neg *= T.abs_(inputs) in_relevance = a * in_rel_plus - b * in_rel_neg return in_relevance
def __init__(self, rng, input1, input2, n_in1, n_in2, n_hidden_layers, d_hidden, W1=None, W2=None): self.input1 = input1 self.input2 = input2 CouplingFunc = WarpNetwork(rng, input1, n_hidden_layers, d_hidden, n_in1, n_in2) if W1 is None: bin = numpy.sqrt(6. / (n_in1 + n_in1)) W1_values = numpy.identity(n_in1, dtype=theano.config.floatX) W1 = theano.shared(value=W1_values, name='W1') if W2 is None: bin = numpy.sqrt(6. / (n_in2 + n_in2)) W2_values = numpy.identity(n_in2, dtype=theano.config.floatX) W2 = theano.shared(value=W2_values, name='W2') V1u = T.triu(W1) V1l = T.tril(W1) V1l = T.extra_ops.fill_diagonal(V1l, 1.) V1 = T.dot(V1u, V1l) V2u = T.triu(W2) V2l = T.tril(W2) V2l = T.extra_ops.fill_diagonal(V2l, 1.) V2 = T.dot(V2u, V2l) self.output1 = T.dot(input1, V1) self.output2 = T.dot(input2, V2) + CouplingFunc.output self.log_jacobian = T.log(T.abs_(T.nlinalg.ExtractDiag()(V1u))).sum() \ + T.log(T.abs_(T.nlinalg.ExtractDiag()(V2u))).sum() self.params = CouplingFunc.params
def init_param_updates(self, layer, parameter): step = self.variables.step parameter_shape = T.shape(parameter).eval() prev_delta = theano.shared( name="{}/prev-delta".format(parameter.name), value=asfloat(np.zeros(parameter_shape)), ) prev_gradient = theano.shared( name="{}/prev-grad".format(parameter.name), value=asfloat(np.zeros(parameter_shape)), ) gradient = T.grad(self.variables.error_func, wrt=parameter) grad_delta = T.abs_(prev_gradient - gradient) parameter_delta = ifelse( T.eq(self.variables.epoch, 1), gradient, T.clip( T.abs_(prev_delta) * gradient / grad_delta, -self.upper_bound, self.upper_bound ) ) return [ (parameter, parameter - step * parameter_delta), (prev_gradient, gradient), (prev_delta, parameter_delta), ]
def _calc_regularization_cost(self): """Calculate the regularization cost given the weight decay parameters. Only the parameters will be considered that are stored in the set self.regularize. We need to handle it manually in this class, because the weight matrices contain bias columns, which should not be considered in regularization computation. Therefore, do not!!! add W1 and W2 to self.regularize Returns ------- theano variable regularization cost depending on the parameters to be regularized and the weight decay parameters for L1 and L2 regularization. """ cost = super(SLmNce, self)._calc_regularization_cost() l1_cost = T.sum(T.abs_(self.W1[:, :-1])) l1_cost += T.sum(T.abs_(self.W2[:, :-1])) l2_cost = T.sum(T.sqr(self.W1[:, :-1])) l2_cost += T.sum(T.sqr(self.W2[:, :-1])) if self.l1_weight != 0: cost += self.l1_weight * l1_cost if self.l2_weight != 0: cost += self.l2_weight * l2_cost return cost
def update_params(self, x1, x2, lrate): #this function samples from the joint posterior and performs # a step of gradient ascent on the log-likelihood sp=self.get_prediction(self.s_past) sp_big=T.reshape(T.extra_ops.repeat(sp,self.nsamps,axis=1).T,(self.ns, self.npcl*self.nsamps)) #s2_idxs=self.sample_multinomial_vec(self.weights_now,4) bsamp=self.theano_rng.multinomial(pvals=T.extra_ops.repeat(T.reshape(self.weights_now,(1,self.npcl)),self.nsamps,axis=0)) s2_idxs=T.dot(self.idx_vec,bsamp.T) s2_samps=self.s_now[s2_idxs] #ns by nsamps s2_big=T.extra_ops.repeat(s2_samps,self.npcl,axis=0).T #ns by npcl*nsamps diffs=T.sum(T.abs_(sp_big-s2_big)/self.br,axis=0) #diffs=T.sum(T.abs_(sp_big-s2_big),axis=0) probs_unnorm=self.weights_past*T.exp(-T.reshape(diffs,(self.nsamps,self.npcl))) #s1_idxs=self.sample_multinomial_mat(probs_unnorm,4) s1_idxs=T.dot(self.idx_vec,self.theano_rng.multinomial(pvals=probs_unnorm).T) s1_samps=self.s_past[s1_idxs] x2_recons=T.dot(self.W, s2_samps.T) s_pred = self.get_prediction(s1_samps) sterm=-T.mean(T.sum(T.abs_((s2_samps-s_pred)/self.b),axis=1)) - T.sum(T.log(self.b)) #xterm1=-T.mean(T.sum((x1_recons-T.reshape(x1,(self.nx,1)))**2,axis=0)/(2.0*self.xvar**2)) xterm2=-T.mean(T.sum((x2_recons-T.reshape(x2,(self.nx,1)))**2,axis=0)/(2.0*self.xvar**2)) energy = xterm2 + sterm learning_params=[self.params[i] for i in range(len(self.params)) if self.rel_lrates[i]!=0.0] learning_rel_lrates=[self.rel_lrates[i] for i in range(len(self.params)) if self.rel_lrates[i]!=0.0] gparams=T.grad(energy, learning_params, consider_constant=[s1_samps, s2_samps]) updates={} # constructs the update dictionary for gparam, param, rel_lr in zip(gparams, learning_params, learning_rel_lrates): #gnat=T.dot(param, T.dot(param.T,param)) if param==self.M: #I do this so the derivative of M doesn't depend on the sparsity parameters updates[param] = T.cast(param + gparam*T.reshape(self.b,(1,self.ns))*lrate*rel_lr,'float32') #updates[param] = T.cast(param + gparam*lrate*rel_lr,'float32') elif param==self.b: updates[param] = T.cast(param + gparam*T.reshape(1.0/self.b,(1,self.ns))*lrate*rel_lr,'float32') else: updates[param] = T.cast(param + gparam*lrate*rel_lr,'float32') newW=updates[self.W] updates[self.W]=newW/T.sqrt(T.sum(newW**2,axis=0)) return energy, updates
def kulczynski3_theano(X, W, b=None): """ GMean of precision and recall """ XW = T.dot(X, W.T) XX = T.abs_(X).sum(axis=1).reshape((-1, 1)) WW = T.abs_(W).sum(axis=1).reshape((1, -1)) return T.sqrt((XW / XX) * (XW / WW))
def version1(self): Fhat = self.feedForward(self.dot()) latFhat = T.dot(T.abs_(Fhat.T), self.distMat.T) latFhat = T.dot(latFhat, T.abs_(Fhat)) self.latFhat = T.diagonal(latFhat) return self.latFhat
def define_loss(self): self.pred_func = - TT.sum(TT.abs_(self.e[self.rows,:] + self.r[self.cols,:] - self.e[self.tubes,:]),1) self.loss = TT.maximum( 0, self.margin + TT.sum(TT.abs_(self.e[self.rows[:self.batch_size],:] + self.r[self.cols[:self.batch_size],:] - self.e[self.tubes[:self.batch_size],:]),1) \ - (1.0/self.neg_ratio) * TT.sum(TT.sum(TT.abs_(self.e[self.rows[self.batch_size:],:] + self.r[self.cols[self.batch_size:],:] - self.e[self.tubes[self.batch_size:],:]),1).reshape((int(self.batch_size),int(self.neg_ratio))),1) ).mean() self.regul_func = 0
def test_grad_clip(): W = T.fmatrix() t = 2. y = T.switch(T.abs_(W) > t, t / T.abs_(W) * W, W) f = theano.function(inputs=[W], outputs=[y]) w = [[1, -3], [-4, 1]] print f(w)
def new_attn_step(self,c_t,g_tm1,m_im1,q): cWq = T.stack([T.dot(T.dot(c_t, self.Wb), q)]) cWm = T.stack([T.dot(T.dot(c_t, self.Wb), m_im1)]) z = T.concatenate([c_t,m_im1,q,c_t*q,c_t*m_im1,T.abs_(c_t-q),T.abs_(c_t-m_im1),cWq,cWm],axis=0) l_1 = T.dot(self.W1, z) + self.b1 l_1 = T.tanh(l_1) l_2 = T.dot(self.W2,l_1) + self.b2 return l_2[0]
def mapped_log_density_theano(self, y): n_in_bounds = (tt.abs_(y) < self.b).sum() n_dim = y.shape[0] x = self.inverse_theano(y[(tt.abs_(y) >= self.b).nonzero()]) return ((n_in_bounds - n_dim) * tt.log(np.pi * 2) / 2. - n_in_bounds * tt.log(2) + 0.5 * (tt.log(self.beta**2 - 4 * self.alpha * (self.gamma - tt.abs_(x))) - x**2).sum() )
def new_attention_step(self, ct, prev_g, mem, q_q): cWq = T.dot(T.ones((1, self.batch_size), dtype=floatX), T.dot(T.dot(ct.T, self.W_b), q_q) * T.eye(n=self.batch_size, m=self.batch_size, dtype=floatX)) cWm = T.dot(T.ones((1, self.batch_size), dtype=floatX), T.dot(T.dot(ct.T, self.W_b), mem) * T.eye(n=self.batch_size, m=self.batch_size, dtype=floatX)) z = T.concatenate([ct, mem, q_q, ct * q_q, ct * mem, T.abs_(ct - q_q), T.abs_(ct - mem), cWq, cWm], axis=0) l_1 = T.dot(self.W_1, z) + self.b_1.dimshuffle(0, 'x') l_1 = T.tanh(l_1) l_2 = T.dot(self.W_2, l_1) + self.b_2.dimshuffle(0, 'x') G = T.nnet.sigmoid(l_2)[0] return G
def errors(self, y): # check if y has same dimension of y_pred if y.ndim != self.y_pred.ndim: raise TypeError('y should have the same shape as self.y_pred', ('y', y.type, 'y_pred', self.y_pred.type)) # check if y is of the correct datatype if y.dtype.startswith('int'): return T.mean(T.abs_(y - self.y_pred)) else: return T.mean(T.abs_(y - self.y_pred))
def new_attention_step(self, ct, prev_g, mem, q_q): cWq = T.stack([T.dot(T.dot(ct, self.W_b), q_q)]) cWm = T.stack([T.dot(T.dot(ct, self.W_b), mem)]) z = T.concatenate([ct, mem, q_q, ct * q_q, ct * mem, T.abs_(ct - q_q), T.abs_(ct - mem), cWq, cWm]) l_1 = T.dot(self.W_1, z) + self.b_1 l_1 = T.tanh(l_1) l_2 = T.dot(self.W_2, l_1) + self.b_2 G = T.nnet.sigmoid(l_2)[0] return G
def theano_setup(self): # The matrices Wb and Wc were originally tied. # Because of that, I decided to keep Wb and Wc with # the same shape (instead of being transposed) to # avoid disturbing the code as much as possible. Wb = T.dmatrix('Wb') Wc = T.dmatrix('Wc') b = T.dvector('b') c = T.dvector('c') x = T.dmatrix('x') s = T.dot(x, Wc) + c # h = 1 / (1 + T.exp(-s)) # h = T.nnet.sigmoid(s) h = T.tanh(s) # r = T.dot(h,W.T) + b # r = theano.printing.Print("r=")(2*T.tanh(T.dot(h,W.T) + b)) ract = T.dot(h, Wb.T) + b r = self.output_scaling_factor * T.tanh(ract) # Another variable to be able to call a function # with a noisy x and compare it to a reference x. y = T.dmatrix('y') loss = ((r - y)**2) sum_loss = T.sum(loss) # theano_encode_decode : vectorial function in argument X. # theano_loss : vectorial function in argument X. # theano_gradients : returns triplet of gradients, each of # which involves the all data X summed # so it's not a "vectorial" function. self.theano_encode_decode = function([Wb,Wc,b,c,x], r) self.theano_loss = function([Wb,Wc,b,c,x,y], [loss, T.abs_(s), T.abs_(ract)]) self.theano_gradients = function([Wb,Wc,b,c,x,y], [T.grad(sum_loss, Wb), T.grad(sum_loss, Wc), T.grad(sum_loss, b), T.grad(sum_loss, c)]) # other useful theano functions for the experiments that involve # adding noise to the hidden states self.theano_encode = function([Wc,c,x], h) self.theano_decode = function([Wb,b,h], r) # A non-vectorial implementation of the jacobian # of the encoder. Meant to be used with only one x # at a time, returning a matrix. jacob_x = T.dvector('jacob_x') jacob_c = T.dvector('jacob_c') jacob_Wc = T.dmatrix('jacob_Wc') jacob_s = T.dot(jacob_x, jacob_Wc) + jacob_c jacob_h = T.tanh(jacob_s) self.theano_encoder_jacobian_single = function([jacob_Wc,jacob_c,jacob_x], gradient.jacobian(jacob_h,jacob_x,consider_constant=[jacob_Wc,jacob_c]))
def get_model(input_var, target_var, multiply_var): # input layer with unspecified batch size layer_input = InputLayer(shape=(None, 30, 80, 80), input_var=input_var) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) layer_0 = DimshuffleLayer(layer_input, (0, 'x', 1, 2, 3)) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_1 = batch_norm(Conv3DDNNLayer(incoming=layer_0, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_2 = batch_norm(Conv3DDNNLayer(incoming=layer_1, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_3 = MaxPool3DDNNLayer(layer_2, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1)) layer_4 = DropoutLayer(layer_3, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_5 = batch_norm(Conv3DDNNLayer(incoming=layer_4, num_filters=32, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_6 = batch_norm(Conv3DDNNLayer(incoming=layer_5, num_filters=32, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_7 = MaxPool3DDNNLayer(layer_6, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1)) layer_8 = DropoutLayer(layer_7, p=0.25) # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer_5 = batch_norm(Conv3DDNNLayer(incoming=layer_8, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_6 = batch_norm(Conv3DDNNLayer(incoming=layer_5, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_7 = batch_norm(Conv3DDNNLayer(incoming=layer_6, num_filters=64, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=leaky_rectify)) layer_8 = MaxPool3DDNNLayer(layer_7, pool_size=(2, 2, 2), stride=(2, 2, 2), pad=(1, 1, 1)) layer_9 = DropoutLayer(layer_8, p=0.25) # LSTM layer = DimshuffleLayer(layer_9, (0,2,1,3,4)) # layer_prediction = LSTMLayer(layer, num_units=2, only_return_final=True, learn_init=True, cell=Gate(linear)) layer = LSTMLayer(layer, num_units=2, only_return_final=True, learn_init=True) layer_prediction = DenseLayer(layer, 2, nonlinearity=linear) # Output Layer # layer_hidden = DenseLayer(layer_flatten, 500, nonlinearity=linear) # layer_prediction = DenseLayer(layer_hidden, 2, nonlinearity=linear) # Loss prediction = get_output(layer_prediction) / multiply_var**2 loss = T.abs_(prediction - target_var) loss = loss.mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) / multiply_var**2 test_loss = T.abs_(test_prediction - target_var) test_loss = test_loss.mean() # crps estimate crps = T.abs_(test_prediction - target_var).mean()/600 return test_prediction, crps, loss, params
def compute_norms(array, norm_axes=None): """ Compute incoming weight vector norms. Parameters ---------- array : numpy array or Theano expression Weight or bias. norm_axes : sequence (list or tuple) The axes over which to compute the norm. This overrides the default norm axes defined for the number of dimensions in `array`. When this is not specified and `array` is a 2D array, this is set to `(0,)`. If `array` is a 3D, 4D or 5D array, it is set to a tuple listing all axes but axis 0. The former default is useful for working with dense layers, the latter is useful for 1D, 2D and 3D convolutional layers. Finally, in case `array` is a vector, `norm_axes` is set to an empty tuple, and this function will simply return the absolute value for each element. This is useful when the function is applied to all parameters of the network, including the bias, without distinction. (Optional) Returns ------- norms : 1D array or Theano vector (1D) 1D array or Theano vector of incoming weight/bias vector norms. Examples -------- >>> array = np.random.randn(100, 200) >>> norms = compute_norms(array) >>> norms.shape (200,) >>> norms = compute_norms(array, norm_axes=(1,)) >>> norms.shape (100,) """ # Check if supported type if not isinstance(array, theano.Variable) and \ not isinstance(array, np.ndarray): raise RuntimeError("Unsupported type {}. " "Only theano variables and numpy arrays " "are supported".format(type(array))) # Compute default axes to sum over ndim = array.ndim if norm_axes is not None: sum_over = tuple(norm_axes) elif ndim == 1: # For Biases that are in 1d (e.g. b of DenseLayer) sum_over = () elif ndim == 2: # DenseLayer sum_over = (0, ) elif ndim in [3, 4, 5]: # Conv{1,2,3}DLayer sum_over = tuple(range(1, ndim)) else: raise ValueError("Unsupported tensor dimensionality {}. " "Must specify `norm_axes`".format(array.ndim)) # Run numpy or Theano norm computation if isinstance(array, theano.Variable): # Apply theano version if it is a theano variable if len(sum_over) == 0: norms = T.abs_(array) # abs if we have nothing to sum over else: norms = T.sqrt(T.sum(array**2, axis=sum_over)) elif isinstance(array, np.ndarray): # Apply the numpy version if ndarray if len(sum_over) == 0: norms = abs(array) # abs if we have nothing to sum over else: norms = np.sqrt(np.sum(array**2, axis=sum_over)) return norms
if use.load: args["W"], args["b"] = load_params() layers.append(LogRegr(out, **args)) """ layers[-1] : softmax layer layers[-2] : hidden layer (video if late fusion) layers[-3] : hidden layer (trajectory, only if late fusion) """ # cost function cost = layers[-1].negative_log_likelihood(y) if reg.L1_vid > 0 or reg.L2_vid > 0: # L1 and L2 regularization L1 = T.abs_(layers[-2].W).sum() + T.abs_(layers[-1].W).sum() L2 = (layers[-2].W**2).sum() + (layers[-1].W**2).sum() cost += reg.L1_vid * L1 + reg.L2_vid * L2 if net.fusion == "late": L1_traj = T.abs_(layers[-3].W).sum() L2_traj = (layers[-3].W**2).sum() cost += reg.L1_traj * L1_traj + reg.L2_traj * L2_traj # function computing the number of errors errors = layers[-1].errors(y) # gradient descent # ------------------------------------------------------------------------------
def SGMGNHT_2(tparams, cost, inps, ntrain, lr, iterations, rho=0.9, epsilon=1e-6, resamp = 50, clip_norm=1): """ Additional parameters """ mom_tparams = OrderedDict() xi_tparams = OrderedDict() #rng = np.random.RandomState(3435) #+ rng.normal(0,1,p0.shape()) for k, p0 in tparams.iteritems(): mom_tparams[k] = theano.shared(p0.get_value() * 0. +1e-1, name='%s_mom'%k) xi_tparams[k] = theano.shared(p0.get_value() * 0. + 10.0, name='%s_xi'%k) #a = theano.shared(numpy_floatX(2.)) # m = theano.shared(numpy_floatX(1.)) # c = theano.shared(numpy_floatX(1.)) # sigma_p = theano.shared(numpy_floatX(10.)) # sigma_xi = theano.shared(numpy_floatX(0.01)) # sigma_theta = theano.shared(numpy_floatX(0.1)) # gamma = theano.shared(numpy_floatX(1.)) m = theano.shared(numpy_floatX(1.)) c = theano.shared(numpy_floatX(3.)) sigma_p = theano.shared(numpy_floatX(0.01)) sigma_mom = theano.shared(numpy_floatX(10.)) sigma_xi = theano.shared(numpy_floatX(0.01)) gamma = theano.shared(numpy_floatX(1.0)) logger = logging.getLogger('eval_ptb_sgmgnht') logger.setLevel(logging.INFO) fh = logging.FileHandler('eval_ptb_sgmgnht.log') logger.info('a = 1, m {} c {} s_p{} s_mom{} s_xi{} g_xi{}'.format( m.get_value(), c.get_value(), sigma_p.get_value(), sigma_mom.get_value(), sigma_xi.get_value(), gamma.get_value())) p = tensor.vector('p', dtype='float32') """ default: lr=0.001 """ trng = RandomStreams(123) grads = tensor.grad(cost, tparams.values()) # clip norm norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p0.get_value() * 0., name='%s_grad'%k) for k, p0 in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, mom, xi, g in zip(tparams.values(),mom_tparams.values(),xi_tparams.values(), gshared): g_f = (tensor.sqrt(tensor.abs_(mom+1e-100)))/m K_f = g_f + 4/c/(1 + tensor.exp(c*g_f)) psi_f_1 = -1 + 2/( 1 + tensor.exp(-c*g_f)) f1_f_1 = 1/2.0/m**2 *psi_f_1**2 /g_f*tensor.sgn(mom) #f1_f_1 = 1/2.0/m*psi_f_1**2* tensor.abs_(mom+1e-100)**(-1/2) *tensor.sgn(mom) psi_grad_f_1 = 2*c*tensor.exp(- c*g_f)/(1 + tensor.exp(-c*g_f))**2 f3_f_1 = f1_f_1**2 - 1/2.0/m**2 * psi_f_1 * psi_grad_f_1 / tensor.abs_(mom) + 1/4.0/m * psi_f_1**2 * (tensor.abs_(mom+1e-100)**(-1.5)) # psi_f = (tensor.exp(c*g_f) - 1)/(tensor.exp(c*g_f) + 1) # f1_f = 1/2/m*psi_f**2 * (tensor.abs_(mom+1e-100)**(-1/2))*tensor.sgn(mom) # psi_grad_f = 2*c*tensor.exp(c*g_f)/(tensor.exp(c*g_f) + 1)**2 # f3_f = f1_f**2 - c/2/m**2 * psi_f * psi_grad_f / tensor.abs_(mom) + 1/4/m * psi_f**2 * (tensor.abs_(mom+1e-100)**(-3/2)) # temp_f1 = tensor.switch(tensor.ge(g_f,0), f1_f_1, f1_f) # temp_f3 = tensor.switch(tensor.ge(g_f,0), f3_f_1, f3_f) temp_f1 = f1_f_1 temp_f3 = f3_f_1 noise_p = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') noise_mom = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') noise_xi = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') # generata gamma(a,2): N(0,1)^2 = gamma(1/2,2) noise_temp = tensor.zeros(p.get_value().shape) for aa in xrange(4): this_noise = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') noise_temp = tensor.inc_subtensor(noise_temp[:], this_noise**2) randmg = (noise_temp*m/2)**2*tensor.sgn(trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32')) updated_p = p + temp_f1 * lr - g * lr * ntrain * sigma_p + tensor.sqrt(2*sigma_p*lr) * noise_p updated_mom = (mom - temp_f1* xi *lr - g * lr * ntrain + tensor.sqrt(2*sigma_mom*lr) * noise_mom)* (1-tensor.eq(tensor.mod(iterations,resamp),0)) + randmg * tensor.eq(tensor.mod(iterations,resamp),0) #updated_mom = mom - temp_f1* xi *lr - g * lr * ntrain + tensor.sqrt(2*sigma_p*lr) * noise_p temp_xi = trng.normal(p.get_value().shape, avg = sigma_mom, std = tensor.sqrt(sigma_xi/2) , dtype='float32') updated_xi = (xi + temp_f3* gamma * lr - (xi - sigma_mom)*sigma_xi/(gamma+1e-10)*lr + tensor.sqrt(2*sigma_xi*lr) * noise_xi) * (1-tensor.eq(tensor.mod(iterations,resamp),resamp/2)) + temp_xi * tensor.eq(tensor.mod(iterations,resamp),resamp/2) updates.append((p, updated_p)) updates.append((mom, updated_mom)) updates.append((xi, updated_xi)) f_update = theano.function([lr,ntrain,iterations], [p,mom,xi], updates=updates) #f_params = theano.function([], [a, m, c, mom.shape]) return f_grad_shared, f_update
def l1(self, s1w, s2w): return -T.abs_(s1w - s2w)
def run_mlp(step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2, counter, X, Z, TX, TZ, image_height, image_width, nouts): print step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2, counter, image_height, image_width, nouts seed = 3453 np.random.seed(seed) batch_size = batch_size #max_iter = max_passes * X.shape[ 0] / batch_size max_iter = 25000000 n_report = X.shape[0] / batch_size weights = [] #input_size = len(X[0]) #Normalize mean = X.mean(axis=0) std = (X - mean).std() X = (X - mean) / std TX = (TX - mean) / std stop = climin.stops.AfterNIterations(max_iter) pause = climin.stops.ModuloNIterations(n_report) optimizer = opt, {'step_rate': step, 'momentum': momentum, 'decay': decay} typ = 'Lenet' if typ == 'Lenet': m = Lenet(image_height, image_width, 1, X, Z, n_hiddens_conv=n_hidden_conv, filter_shapes=filter_shapes, pool_shapes=pool_size, n_hiddens_full=n_hidden_full, n_output=nouts, hidden_transfers_conv=hidden_conv_transfers, hidden_transfers_full=hidden_full_transfers, out_transfer='identity', loss='squared', optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) elif typ == 'SimpleCnn2d': m = SimpleCnn2d(2099, [400, 100], 1, X, Z, TX, TZ, hidden_transfers=['tanh', 'tanh'], out_transfer='identity', loss='squared', p_dropout_inpt=.1, p_dropout_hiddens=.2, optimizer=optimizer, batch_size=batch_size, max_iter=max_iter) climin.initialize.randomize_normal(m.parameters.data, 0, par_std) #m.parameters.data[...] = np.random.normal(0, 0.01, m.parameters.data.shape) # Transform the test data #TX = m.transformedData(TX) m.init_weights() TX = np.array([TX for _ in range(10)]).mean(axis=0) print TX.shape losses = [] print 'max iter', max_iter X, Z, TX, TZ = [ breze.learn.base.cast_array_to_local_type(i) for i in (X, Z, TX, TZ) ] for layer in m.lenet.mlp.layers: weights.append(m.parameters[layer.weights]) weight_decay = ((weights[0]**2).sum() + (weights[1]**2).sum() #+ (weights[2]**2).sum() ) weight_decay /= m.exprs['inpt'].shape[0] m.exprs['true_loss'] = m.exprs['loss'] c_wd = L2 m.exprs['loss'] = m.exprs['loss'] + c_wd * weight_decay mae = T.abs_((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels)) - m.exprs['target']).mean() f_mae = m.function(['inpt', 'target'], mae) rmse = T.sqrt( T.square((m.exprs['output'] * np.std(train_labels) + np.mean(train_labels)) - m.exprs['target']).mean()) f_rmse = m.function(['inpt', 'target'], rmse) start = time.time() # Set up a nice printout. keys = '#', 'seconds', 'loss', 'val loss', 'mae_train', 'rmse_train', 'mae_test', 'rmse_test' max_len = max(len(i) for i in keys) header = '\t'.join(i for i in keys) print header print '-' * len(header) results = open('result_hp.txt', 'a') results.write(header + '\n') results.write('-' * len(header) + '\n') results.close() EXP_DIR = os.getcwd() base_path = os.path.join(EXP_DIR, "pars_hp" + str(counter) + ".pkl") n_iter = 0 if os.path.isfile(base_path): with open("pars_hp" + str(counter) + ".pkl", 'rb') as tp: n_iter, best_pars = cp.load(tp) m.parameters.data[...] = best_pars for i, info in enumerate(m.powerfit((X, Z), (TX, TZ), stop, pause)): if info['n_iter'] % n_report != 0: continue passed = time.time() - start losses.append((info['loss'], info['val_loss'])) info.update({ 'time': passed, 'mae_train': f_mae(X, train_labels), 'rmse_train': f_rmse(X, train_labels), 'mae_test': f_mae(TX, test_labels), 'rmse_test': f_rmse(TX, test_labels) }) info['n_iter'] += n_iter row = '%(n_iter)i\t%(time)g\t%(loss)f\t%(val_loss)f\t%(mae_train)g\t%(rmse_train)g\t%(mae_test)g\t%(rmse_test)g' % info results = open('result_hp.txt', 'a') print row results.write(row + '\n') results.close() with open("pars_hp" + str(counter) + ".pkl", 'wb') as fp: cp.dump((info['n_iter'], info['best_pars']), fp) with open("hps" + str(counter) + ".pkl", 'wb') as tp: cp.dump((step, momentum, decay, n_hidden_full, n_hidden_conv, hidden_full_transfers, hidden_conv_transfers, filter_shapes, pool_size, par_std, batch_size, opt, L2, counter, info['n_iter']), tp) m.parameters.data[...] = info['best_pars'] cp.dump(info['best_pars'], open('best_pars.pkl', 'wb')) Y = m.predict(X) TY = m.predict(TX) output_train = Y * np.std(train_labels) + np.mean(train_labels) output_test = TY * np.std(train_labels) + np.mean(train_labels) print 'TRAINING SET\n' print('MAE: %5.2f kcal/mol' % np.abs(output_train - train_labels).mean(axis=0)) print('RMSE: %5.2f kcal/mol' % np.square(output_train - train_labels).mean(axis=0)**.5) print 'TESTING SET\n' print('MAE: %5.2f kcal/mol' % np.abs(output_test - test_labels).mean(axis=0)) print('RMSE: %5.2f kcal/mol' % np.square(output_test - test_labels).mean(axis=0)**.5) mae_train = np.abs(output_train - train_labels).mean(axis=0) rmse_train = np.square(output_train - train_labels).mean(axis=0)**.5 mae_test = np.abs(output_test - test_labels).mean(axis=0) rmse_test = np.square(output_test - test_labels).mean(axis=0)**.5 results = open('result.txt', 'a') results.write('Training set:\n') results.write('MAE:\n') results.write("%5.2f" % mae_train) results.write('\nRMSE:\n') results.write("%5.2f" % rmse_train) results.write('\nTesting set:\n') results.write('MAE:\n') results.write("%5.2f" % mae_test) results.write('\nRMSE:\n') results.write("%5.2f" % rmse_test) results.close()
def do_regression( num_epochs=60, # No. of epochs to train init_file=None, # Saved parameters to initialise training epoch_size=680780, # Whole dataset size valid_size=34848, train_batch_multiple=10637, # No. of minibatches per batch valid_batch_multiple=1089, # No. of minibatches per batch train_minibatch_size=64, valid_minibatch_size=32, eval_multiple=50, # No. of minibatches to ave. in report save_model=True, input_width=19, rng_seed=100009, cross_val=0, # Cross-validation subset label dataver=1, # Label for different runs/architectures/etc rate_init=1.0, rate_decay=0.999983): ################################################### ################# 0. User inputs ################## ################################################### for i in range(1, len(sys.argv)): if sys.argv[i].startswith('-'): option = sys.argv[i][1:] if option == 'i': init_file = sys.argv[i + 1] elif option[0:2] == 'v=': dataver = int(option[2:]) elif option[0:3] == 'cv=': cross_val = int(option[3:]) elif option[0:3] == 'rs=': rng_seed = int(option[3:]) elif option[0:3] == 'ri=': rate_init = np.float32(option[3:]) elif option[0:3] == 'rd=': rate_decay = np.float32(option[3:]) print("Running with dataver %s" % (dataver)) print("Running with cross_val %s" % (cross_val)) ################################################### ############# 1. Housekeeping values ############## ################################################### # Batch size is possibly not equal to epoch size due to memory limits train_batch_size = train_batch_multiple * train_minibatch_size assert epoch_size >= train_batch_size # Number of times we expect the training/validation generator to be called max_train_gen_calls = (num_epochs * epoch_size) // train_batch_size # Number of evaluations (total minibatches / eval_multiple) num_eval = max_train_gen_calls * train_batch_multiple / eval_multiple ################################################### ###### 2. Define model and theano variables ####### ################################################### if rng_seed is not None: print("Setting RandomState with seed=%i" % (rng_seed)) rng = np.random.RandomState(rng_seed) set_rng(rng) print("Defining variables...") index = T.lscalar() # Minibatch index x = T.tensor3('x') # Inputs y = T.fvector('y') # Target print("Defining model...") network_0 = build_1Dregression_v1(input_var=x, input_width=input_width, nin_units=12, h_num_units=[64, 128, 256, 128, 64], h_grad_clip=1.0, output_width=1) if init_file is not None: print("Loading initial model parametrs...") init_model = np.load(init_file) init_params = init_model[init_model.files[0]] LL.set_all_param_values([network_0], init_params) ################################################### ################ 3. Import data ################### ################################################### # Loading data generation model parameters print("Defining shared variables...") train_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX), borrow=True) train_set_x = theano.shared(np.zeros((1, 1, 1), dtype=theano.config.floatX), borrow=True) valid_set_y = theano.shared(np.zeros(1, dtype=theano.config.floatX), borrow=True) valid_set_x = theano.shared(np.zeros((1, 1, 1), dtype=theano.config.floatX), borrow=True) # Validation data (pick a single augmented instance, rand0 here) print("Creating validation data...") chunk_valid_data = np.load( "./valid/data_valid_augmented_cv%s_t%s_rand0.npy" % (cross_val, input_width)).astype(theano.config.floatX) chunk_valid_answers = np.load("./valid/data_valid_expected_cv%s.npy" % (cross_val)).astype(theano.config.floatX) print "chunk_valid_answers.shape", chunk_valid_answers.shape print("Assigning validation data...") valid_set_y.set_value(chunk_valid_answers[:]) valid_set_x.set_value(chunk_valid_data.transpose(0, 2, 1)) # Create output directory if not os.path.exists("output_cv%s_v%s" % (cross_val, dataver)): os.makedirs("output_cv%s_v%s" % (cross_val, dataver)) ################################################### ########### 4. Create Loss expressions ############ ################################################### print("Defining loss expressions...") prediction_0 = LL.get_output(network_0) train_loss = aggregate(T.abs_(prediction_0 - y.dimshuffle(0, 'x'))) valid_prediction_0 = LL.get_output(network_0, deterministic=True) valid_loss = aggregate(T.abs_(valid_prediction_0 - y.dimshuffle(0, 'x'))) ################################################### ############ 5. Define update method ############# ################################################### print("Defining update choices...") params = LL.get_all_params(network_0, trainable=True) learn_rate = T.scalar('learn_rate', dtype=theano.config.floatX) updates = lasagne.updates.adadelta(train_loss, params, learning_rate=learn_rate) ################################################### ######### 6. Define train/valid functions ######### ################################################### print("Defining theano functions...") train_model = theano.function( [index, learn_rate], train_loss, updates=updates, givens={ x: train_set_x[(index * train_minibatch_size):((index + 1) * train_minibatch_size)], y: train_set_y[(index * train_minibatch_size):((index + 1) * train_minibatch_size)] }) validate_model = theano.function( [index], valid_loss, givens={ x: valid_set_x[index * valid_minibatch_size:(index + 1) * valid_minibatch_size], y: valid_set_y[index * valid_minibatch_size:(index + 1) * valid_minibatch_size] }) ################################################### ################ 7. Begin training ################ ################################################### print("Begin training...") sys.stdout.flush() cum_iterations = 0 this_train_loss = 0.0 this_valid_loss = 0.0 best_valid_loss = np.inf best_iter = 0 train_eval_scores = np.empty(num_eval) valid_eval_scores = np.empty(num_eval) eval_index = 0 aug_index = 0 for batch in xrange(max_train_gen_calls): start_time = time.time() chunk_train_data = np.load( "./train/data_train_augmented_cv%s_t%s_rand%s.npy" % (cross_val, input_width, aug_index)).astype(theano.config.floatX) chunk_train_answers = np.load("./train/data_train_expected_cv%s.npy" % (cross_val)).astype(theano.config.floatX) train_set_y.set_value(chunk_train_answers[:]) train_set_x.set_value(chunk_train_data.transpose(0, 2, 1)) # Iterate over minibatches in each batch for mini_index in xrange(train_batch_multiple): this_rate = np.float32(rate_init * (rate_decay**cum_iterations)) this_train_loss += train_model(mini_index, this_rate) cum_iterations += 1 # Report loss if (cum_iterations % eval_multiple == 0): this_train_loss = this_train_loss / eval_multiple this_valid_loss = np.mean( [validate_model(i) for i in xrange(valid_batch_multiple)]) train_eval_scores[eval_index] = this_train_loss valid_eval_scores[eval_index] = this_valid_loss # Save report every five evaluations if ((eval_index + 1) % 5 == 0): np.savetxt("output_cv%s_v%s/training_scores.txt" % (cross_val, dataver), train_eval_scores, fmt="%.5f") np.savetxt("output_cv%s_v%s/validation_scores.txt" % (cross_val, dataver), valid_eval_scores, fmt="%.5f") np.savetxt("output_cv%s_v%s/last_learn_rate.txt" % (cross_val, dataver), [np.array(this_rate)], fmt="%.5f") # Save model if best validation score if (this_valid_loss < best_valid_loss): best_valid_loss = this_valid_loss best_iter = cum_iterations - 1 if save_model: np.savez( "output_cv%s_v%s/model.npz" % (cross_val, dataver), LL.get_all_param_values(network_0)) # Reset evaluation reports eval_index += 1 this_train_loss = 0.0 this_valid_loss = 0.0 aug_index += 1 end_time = time.time() print("Computing time for batch %d: %f" % (batch, end_time - start_time)) print("Best validation loss %f after %d epochs" % (best_valid_loss, (best_iter * train_minibatch_size // epoch_size))) del train_set_x, train_set_y, valid_set_x, valid_set_y gc.collect() return None
def _step(i, t, s): t *= (i - b) * value / i step = t / (a + i) s += step return ((t, s), until(tt.abs_(step) < threshold))
def SGMGHMC_old(tparams, cost, inps, ntrain, lr, iterations, rho=0.9, epsilon=1e-6, a_i = 2, clip_norm=5): """ Additional parameters """ mom_tparams = OrderedDict() xi_tparams = OrderedDict() for k, p0 in tparams.iteritems(): mom_tparams[k] = theano.shared(p0.get_value() * 0. + 1e-10, name='%s_mom'%k) xi_tparams[k] = theano.shared(p0.get_value() * 0. + 1e-10, name='%s_xi'%k) a = theano.shared(numpy_floatX(1.)) m = theano.shared(numpy_floatX(1.)) c = theano.shared(numpy_floatX(5.)) sigma_p = theano.shared(numpy_floatX(10.)) sigma_xi = theano.shared(numpy_floatX(1.)) gamma_xi = theano.shared(numpy_floatX(0.001)) logger = logging.getLogger('eval_ptb_sgmgnht') logger.setLevel(logging.INFO) fh = logging.FileHandler('eval_ptb_sgmgnht.log') logger.info('a {} m {} c {} s_p{} s_xi{} g_xi{}'.format(a.get_value(), m.get_value(), c.get_value(), sigma_p.get_value(), sigma_xi.get_value(), gamma_xi.get_value())) p = tensor.vector('p', dtype='float32') """ default: lr=0.001 """ trng = RandomStreams(123) grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p0.get_value() * 0., name='%s_grad'%k) for k, p0 in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] for p, mom, xi, g in zip(tparams.values(),mom_tparams.values(),xi_tparams.values(), gshared): g_f = tensor.sgn(mom)/m*(tensor.abs_(mom)**(1/a)) K_f = -g_f + 2/c*(c*g_f + tensor.log(1+tensor.exp(-c*g_f))) psi_f_1 = (1- tensor.exp(-c*g_f) )/( 1 + tensor.exp(-c*g_f) ) f1_f_1 = 1/m/a*psi_f_1*(tensor.abs_(mom+1e-100)**(1/a-1)) psi_grad_f_1 = 2*c*tensor.exp(- c*g_f)/(1 + tensor.exp(-c*g_f))**2 f3_f_1 = 1/m**2/a**2*(psi_f_1**2-psi_grad_f_1)*tensor.abs_(mom+1e-100)**(2/a-2) - (1/a-1)/m/a*psi_f_1*tensor.sgn(mom)*tensor.abs_(mom+1e-100)**(1/a-2) psi_f = (tensor.exp(c*g_f) - 1)/(tensor.exp(c*g_f) + 1) f1_f = 1/m/a*psi_f*(tensor.abs_(mom+1e-100)**(1/a-1)) psi_grad_f = 2*c*tensor.exp(c*g_f)/(tensor.exp(c*g_f) + 1)**2 f3_f = 1/m**2/a**2*(psi_f**2-psi_grad_f)*tensor.abs_(mom+1e-100)**(2/a-2) - (1/a-1)/m/a*psi_f*tensor.sgn(mom)*tensor.abs_(mom+1e-100)**(1/a-2) temp_f1 = tensor.switch(tensor.ge(g_f,0), f1_f_1, f1_f) temp_f3 = tensor.switch(tensor.ge(g_f,0), f3_f_1, f3_f) noise_p = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') noise_xi = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') # generata gamma(a,2): N(0,1)^2 = gamma(1/2,2) noise_temp = tensor.zeros(p.get_value().shape) for aa in xrange(a_i*2): this_noise = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') noise_temp = tensor.inc_subtensor(noise_temp[:], this_noise**2) randmg = (noise_temp*m/2)**a*tensor.sgn(trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32')) updated_p = p + temp_f1 * lr updated_mom = (mom - temp_f1* xi *lr - g * lr * ntrain + tensor.sqrt(2*sigma_p*lr) * noise_p)* (1-tensor.eq(tensor.mod(iterations,100),0)) + randmg * tensor.eq(tensor.mod(iterations,100),0) #updated_mom = mom - temp_f1* xi *lr - g * lr * ntrain + tensor.sqrt(2*sigma_p*lr) * noise_p temp_xi = trng.normal(p.get_value().shape, avg = sigma_p, std = tensor.sqrt(sigma_xi/2) , dtype='float32') updated_xi = (xi + temp_f3* sigma_xi * lr - (xi - sigma_p)*gamma_xi*lr + tensor.sqrt(2*sigma_xi*gamma_xi*lr) * noise_xi) * (1-tensor.eq(tensor.mod(iterations,100),50)) + temp_xi * tensor.eq(tensor.mod(iterations,100),50) updates.append((p, updated_p)) updates.append((mom, updated_mom)) updates.append((xi, updated_xi)) f_update = theano.function([lr,ntrain,iterations], [p,mom,xi], updates=updates) #f_params = theano.function([], [a, m, c, mom.shape]) return f_grad_shared, f_update
def mae_clip(y_true, y_pred): """Return the MAE with clipping to provide resistance to outliers""" CLIP_VALUE = 6 return T.clip(T.abs_(y_true - y_pred), 0, CLIP_VALUE).mean(axis=-1)
# BEGAN variables Disc_output_real = lasagne.layers.get_output(Disc_out_layer,inputs=input_var) Disc_output_fake = lasagne.layers.get_output(Disc_out_layer, inputs=lasagne.layers.get_output(Gen_out_layer, inputs=noise_var)) Gen_output = lasagne.layers.get_output(Gen_out_layer,inputs=noise_var) # Classifier variables NetB_all_input = T.concatenate([lasagne.layers.get_output(Gen_out_layer,inputs=noise_var), input_var], axis=0) NetB_output_all = T.add(lasagne.layers.get_output(networkBOut,inputs=NetB_all_input),np.finfo(np.float32).eps) # BEGAN losses Disc_loss_real = T.abs_(Disc_output_real - input_var) Disc_loss_real = Disc_loss_real.mean() Disc_loss_fake = T.abs_(Disc_output_fake - Gen_output) Disc_loss_fake = Disc_loss_fake.mean() Gen_loss = T.abs_(Disc_output_fake - Gen_output) Gen_loss = Gen_loss.mean() # Classifier losses #NetB_loss_fake = lasagne.objectives.categorical_crossentropy(NetB_output_fake,targets_fake) #NetB_loss_fake = NetB_loss_fake.mean() NetB_loss_all = lasagne.objectives.binary_crossentropy(NetB_output_all,target_varB) NetB_loss_all = NetB_loss_all.mean()
def student_t_likelihood( new_cases_inferred, pr_beta_sigma_obs=30, nu=4, offset_sigma=1, model=None, data_obs=None, name_student_t="_new_cases_studentT", name_sigma_obs="sigma_obs" ): """ Set the likelihood to apply to the model observations (`model.new_cases_obs`) We assume a student-t distribution, the mean of the distribution matches `new_cases_inferred` as provided. Parameters ---------- new_cases_inferred : array One or two dimensonal array. If 2 dimensional, the first dimension is time and the second are the regions/countries pr_beta_sigma_obs : float nu : float How flat the tail of the distribution is. Larger nu should make the model more robust to outliers offset_sigma : float model: The model on which we want to add the distribution data_obs : array The data that is observed. By default it is ``model.new_cases_ob`` name_student_t : The name under which the studentT distribution is saved in the trace. name_sigma_obs : The name under which the distribution of the observable error is saved in the trace Returns ------- None TODO ---- #@jonas, can we make it more clear that this whole stuff gets attached to the # model? like the with model as context... #@jonas doc description for sigma parameters """ model = modelcontext(model) len_sigma_obs = () if model.sim_ndim == 1 else model.sim_shape[1] sigma_obs = pm.HalfCauchy(name_sigma_obs, beta=pr_beta_sigma_obs, shape=len_sigma_obs) if data_obs is None: data_obs = model.new_cases_obs pm.StudentT( name=name_student_t, nu=nu, mu=new_cases_inferred[: len(data_obs)], sigma=tt.abs_(new_cases_inferred[: len(data_obs)] + offset_sigma) ** 0.5 * sigma_obs, # offset and tt.abs to avoid nans observed=data_obs, )
def main(args): #theano.optimizer='fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'dp_dis1-nosch_%d' % trial channel_name = 'mae' data_path = args['data_path'] save_path = args['save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") flgMSE = int(args['flgMSE']) period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps# int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) typeLoad = int(args['typeLoad']) debug = int(args['debug']) kSchedSamp = int(args['kSchedSamp']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 150 p_z_dim = 150 p_x_dim = 150#250 x2s_dim = 100#250 y2s_dim = 100 z2s_dim = 100#150 target_dim = k#x_dim #(x_dim-1)*k model = Model() Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport(data_path, windows, appliances,numApps=flgAgg, period=period, n_steps= n_steps, stride_train = stride_train, stride_test = stride_test, trainPer=0.6, valPer=0.2, testPer=0.2, typeLoad=typeLoad, flgAggSumScaled = 1, flgFilterZeros = 1) print(reader.stdTrain, reader.meanTrain) instancesPlot = {0:[4], 2:[5]} #for now use hard coded instancesPlot for kelly sampling train_data = Dataport(name='train', prep='normalize', cond=True,# False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = Dataport(name='valid', prep='normalize', cond=True,# False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels = yval) test_data = Dataport(name='valid', prep='normalize', cond=True,# False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xtest, labels = ytest) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y , y_mask = train_data.theano_vars() scheduleSamplingMask = T.fvector('schedMask') x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp pickelModel = '/home/gissella/Documents/Research/Disaggregation/PecanStreet-dataport/VRNN_theano_version/output/gmmAE/18-05-30_16-27_app6/dp_dis1-sch_1_best.pkl' fmodel = open(pickelModel, 'rb') mainloop = cPickle.load(fmodel) fmodel.close() #define layers rnn = mainloop.model.nodes[0] x_1 = mainloop.model.nodes[1] y_1 = mainloop.model.nodes[2] z_1 = mainloop.model.nodes[3] phi_1 = mainloop.model.nodes[4] phi_mu = mainloop.model.nodes[5] phi_sig = mainloop.model.nodes[6] prior_1 = mainloop.model.nodes[7] prior_mu = mainloop.model.nodes[8] prior_sig = mainloop.model.nodes[9] theta_1 = mainloop.model.nodes[10] theta_mu = mainloop.model.nodes[11] theta_sig = mainloop.model.nodes[12] coeff = mainloop.model.nodes[13] nodes = [rnn, x_1, y_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, coeff]#, corr, binary params = mainloop.model.params """params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params)""" s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) y_1_temp = y_1.fprop([y], params) def inner_fn_val(x_t, s_tm1): prior_1_t = prior_1.fprop([x_t,s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(prior_mu_t, prior_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) pred_t = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) pred_1_t = y_1.fprop([pred_t], params) s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred_t#, y_pred #corr_temp, binary_temp ((s_temp_val, prior_mu_temp_val, prior_sig_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val, prediction_val), updates_val) =\ theano.scan(fn=inner_fn_val, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None, None]) for k, v in updates_val.iteritems(): k.default_update = v s_temp_val = concatenate([s_0[None, :, :], s_temp_val[:-1]], axis=0) x_shape = x.shape ######################## TEST (GENERATION) TIME prediction_val.name = 'generated__'+str(flgAgg) mse_val = T.mean((prediction_val - y)**2) # As axis = None is calculated for all mae_val = T.mean( T.abs_(prediction_val - y) ) mse_val.name = 'mse_val' mae_val.name = 'mae_val' pred_in_val = y.reshape((y.shape[0]*y.shape[1],-1)) theta_mu_in_val = theta_mu_temp_val.reshape((x_shape[0]*x_shape[1], -1)) theta_sig_in_val = theta_sig_temp_val.reshape((x_shape[0]*x_shape[1], -1)) coeff_in_val = coeff_temp_val.reshape((x_shape[0]*x_shape[1], -1)) recon_val = GMM(pred_in_val, theta_mu_in_val, theta_sig_in_val, coeff_in_val)# BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon_val = recon_val.reshape((x_shape[0], x_shape[1])) recon_val.name = 'gmm_out_val' recon_term_val= recon_val.sum(axis=0).mean() recon_term_val.name = 'recon_term_val' model.inputs = [x, mask, y, y_mask, scheduleSamplingMask] model.params = params model.nodes = nodes data=Iterator(test_data, batch_size) test_fn = theano.function(inputs=[x, y],#[x, y], allow_input_downcast=True, outputs=[prediction_val, recon_term_val, mse_val, mae_val]#prediction_val, mse_val, mae_val ,updates=updates_val#, allow_input_downcast=True, on_unused_input='ignore' ) testOutput = [] numBatchTest = 0 for batch in data: outputGeneration = test_fn(batch[0], batch[2])#(20, 220, 1) testOutput.append(outputGeneration[1:]) # outputGeneration[0].shape #(20, 220, 40) #if (numBatchTest<5): ''' plt.figure(1) plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated{}_z_0-4".format(numBatchTest)) plt.clf() plt.figure(2) plt.plot(np.transpose(outputGeneration[1],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated{}_s_0-4".format(numBatchTest)) plt.clf() plt.figure(3) plt.plot(np.transpose(outputGeneration[2],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated{}_theta_0-4".format(numBatchTest)) plt.clf() ''' plt.figure(4) plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4]) plt.plot(np.transpose(batch[2],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated{}_RealAndPred_0-4".format(numBatchTest)) plt.clf() plt.figure(4) plt.plot(np.transpose(batch[0],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest)) plt.clf() numBatchTest+=1 testOutput = np.asarray(testOutput) print(testOutput.shape) recon_test = testOutput[:, 0].mean() mse_test = testOutput[:, 1].mean() mae_test = testOutput[:, 2].mean() #mseUnNorm_test = testOutput[:, 3].mean() #maeUnNorm_test = testOutput[:, 4].mean() fLog = open(save_path+'/output.csv', 'w') fLog.write(str(lr_iterations)+"\n") fLog.write(str(windows)+"\n") fLog.write("logTest,mseTest,maeTest, mseTestUnNorm, maeTestUnNorm\n") fLog.write("{},{},{}\n".format(recon_test,mse_test,mae_test)) fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{},{}\n".format(q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim)) header = "epoch,log,kl,mse,mae\n" fLog.write(header) for i , item in enumerate(mainloop.trainlog.monitor['recon_term']): f = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f,a,b,d,e))
def main(args): #theano.optimizer='fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'dp_dis1-nosch_%d' % trial channel_name = 'mae' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") flgMSE = int(args['flgMSE']) period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps # int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) typeLoad = int(args['typeLoad']) debug = int(args['debug']) kSchedSamp = int(args['kSchedSamp']) typeActivFunc = args['typeActivFunc'] print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 150 p_z_dim = 150 p_x_dim = 150 #250 x2s_dim = 100 #250 y2s_dim = 100 z2s_dim = 100 #150 target_dim = k #x_dim #(x_dim-1)*k model = Model() Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport( data_path, windows, appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, trainPer=0.6, valPer=0.2, testPer=0.2, typeLoad=typeLoad, flgAggSumScaled=1, flgFilterZeros=1) print(reader.stdTrain, reader.meanTrain) instancesPlot = { 0: [4], 2: [5] } #for now use hard coded instancesPlot for kelly sampling train_data = Dataport( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = Dataport( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) test_data = Dataport( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xtest, labels=ytest) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() scheduleSamplingMask = T.fvector('schedMask') x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) y_1 = FullyConnectedLayer(name='y_1', parent=['y_t'], parent_dim=[y_dim], nout=y2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1', 'y_1'], parent_dim=[x2s_dim, z2s_dim, y_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1', 'y_1'], parent_dim=[x2s_dim, rnn_dim, y2s_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit=typeActivFunc, init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer(name='coeff', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, y_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, coeff ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) y_1_temp = y_1.fprop([y], params) def inner_fn_train(x_t, y_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([x_t, s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) #corr_t = corr.fprop([theta_1_t], params) #binary_t = binary.fprop([theta_1_t], params) pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu_t, theta_sig_t, coeff_t, pred #, y_pred #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\ theano.scan(fn=inner_fn_train, sequences=[x_1_temp, y_1_temp], outputs_info=[s_0, None, None, None, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0 theta_mu_temp.name = 'theta_mu_temp' theta_sig_temp.name = 'theta_sig_temp' coeff_temp.name = 'coeff' if (flgAgg == -1): prediction.name = 'x_reconstructed' mse = T.mean((prediction - x)**2) # CHECK RESHAPE with an assertion mae = T.mean(T.abs(prediction - x)) mse.name = 'mse' pred_in = x.reshape((x_shape[0] * x_shape[1], -1)) else: prediction.name = 'pred_' + str(flgAgg) mse = T.mean( (prediction - y)**2) # As axis = None is calculated for all mae = T.mean(T.abs_(prediction - y)) mse.name = 'mse' mae.name = 'mae' pred_in = y.reshape((y.shape[0] * y.shape[1], -1)) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = GMM( pred_in, theta_mu_in, theta_sig_in, coeff_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term #+ mse if (flgMSE): nll_upper_bound = nll_upper_bound + mse nll_upper_bound.name = 'nll_upper_bound' model.inputs = [x, mask, y, y_mask, scheduleSamplingMask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) header = "epoch,log,kl,nll_upper_bound,mse,mae\n" extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch, save_path, header), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, mse, mae, theta_mu_temp, prediction ], indexSep=5, instancesPlot=instancesPlot, #{0:[4,20],2:[5,10]},#, 80,150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr} mainloop = Training( name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[recon_term, kl_term, nll_upper_bound, mse, mae], n_steps=n_steps, extension=extension, lr_iterations=lr_iterations, k_speedOfconvergence=kSchedSamp) mainloop.run() fLog = open(save_path + '/output.csv', 'w') fLog.write(str(lr_iterations) + "\n") fLog.write(str(windows) + "\n") fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim, y2s_dim, z2s_dim)) header = "epoch,log,kl,mse,mae\n" fLog.write(header) for i, item in enumerate(mainloop.trainlog.monitor['recon_term']): f = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f, a, b, d, e))
def log_abs_det_T(W): # TODO option for stable way return T.log(T.abs_(T.nlinalg.det(W)))
def ready(self): encoder = self.encoder embedding_layer = self.embedding_layer args = self.args padding_id = embedding_layer.vocab_map["<padding>"] dropout = self.dropout = encoder.dropout # len*batch x = self.x = encoder.x z = self.z = encoder.z n_d = args.hidden_dimension n_e = embedding_layer.n_d activation = get_activation_by_name(args.activation) layers = self.layers = [] layer_type = args.layer.lower() for i in xrange(2): if layer_type == "rcnn": l = RCNN( n_in=n_e, # if i == 0 else n_d, n_out=n_d, activation=activation, order=args.order) elif layer_type == "lstm": l = LSTM( n_in=n_e, # if i == 0 else n_d, n_out=n_d, activation=activation) layers.append(l) # len * batch #masks = T.cast(T.neq(x, padding_id), theano.config.floatX) masks = T.cast(T.neq(x, padding_id), "int8").dimshuffle((0, 1, "x")) # (len*batch)*n_e embs = embedding_layer.forward(x.ravel()) # len*batch*n_e embs = embs.reshape((x.shape[0], x.shape[1], n_e)) embs = apply_dropout(embs, dropout) flipped_embs = embs[::-1] # len*bacth*n_d h1 = layers[0].forward_all(embs) h2 = layers[1].forward_all(flipped_embs) h_final = T.concatenate([h1, h2[::-1]], axis=2) h_final = apply_dropout(h_final, dropout) size = n_d * 2 output_layer = self.output_layer = Layer(n_in=size, n_out=1, activation=sigmoid) # len*batch*1 probs = output_layer.forward(h_final) # len*batch probs2 = probs.reshape(x.shape) self.MRG_rng = MRG_RandomStreams() z_pred = self.z_pred = T.cast( self.MRG_rng.binomial(size=probs2.shape, p=probs2), "int8") # we are computing approximated gradient by sampling z; # so should mark sampled z not part of the gradient propagation path # self.z_pred = theano.gradient.disconnected_grad(z_pred) z2 = z.dimshuffle((0, 1, "x")) logpz = -T.nnet.binary_crossentropy(probs, z2) * masks logpz = self.logpz = logpz.reshape(x.shape) probs = self.probs = probs.reshape(x.shape) # batch zsum = T.sum(z, axis=0, dtype=theano.config.floatX) zdiff = T.sum(T.abs_(z[1:] - z[:-1]), axis=0, dtype=theano.config.floatX) loss_mat = encoder.loss_mat if args.aspect < 0: loss_vec = T.mean(loss_mat, axis=1) else: assert args.aspect < self.nclasses loss_vec = loss_mat[:, args.aspect] self.loss_vec = loss_vec coherent_factor = args.sparsity * args.coherent loss = self.loss = T.mean(loss_vec) sparsity_cost = self.sparsity_cost = T.mean(zsum) * args.sparsity + \ T.mean(zdiff) * coherent_factor cost_vec = loss_vec + zsum * args.sparsity + zdiff * coherent_factor cost_logpz = T.mean(cost_vec * T.sum(logpz, axis=0)) self.obj = T.mean(cost_vec) params = self.params = [] for l in layers + [output_layer]: for p in l.params: params.append(p) nparams = sum(len(x.get_value(borrow=True).ravel()) \ for x in params) say("total # parameters: {}\n".format(nparams)) l2_cost = None for p in params: if l2_cost is None: l2_cost = T.sum(p**2) else: l2_cost = l2_cost + T.sum(p**2) l2_cost = l2_cost * args.l2_reg cost = self.cost = cost_logpz * 10 + l2_cost print "cost.dtype", cost.dtype self.cost_e = loss * 10 + encoder.l2_cost
def __init__(self, numTruncate=20, numHidden=500, inputsSize=[576], outputsSize=[1, 4]): #################################### # Create model # #################################### # Create tensor variables to store input / output data FeaturesXGt = T.matrix('FeaturesXGt', dtype='float32') FeaturesX = T.tensor3('FeaturesX') TargetY = T.tensor3('PredY') BboxY = T.tensor3('BboxY') C = T.vector('C', dtype='float32') S = T.vector('S', dtype='float32') BoxsVariances = T.matrix('BoxsVariances') RatioPosNeg = T.scalar('RatioPosNeg') # Create shared variable for input net = LSTMNet() net.NetName = 'LSTMTrackingNet' # Input # net.Layer['input'] = InputLayer(net, X) net.LayerOpts['lstm_num_truncate'] = numTruncate # net.LayerOpts['reshape_new_shape'] = (net.LayerOpts['lstm_num_truncate'], 576) # TODO: Need to set this size later # net.Layer['input_2d'] = ReshapeLayer(net, net.Layer['input'].Output) # Setting LSTM architecture net.LayerOpts['lstm_num_hidden'] = numHidden net.LayerOpts['lstm_inputs_size'] = inputsSize net.LayerOpts['lstm_outputs_size'] = outputsSize # Truncate lstm model currentC = C currentS = S preds = [] bboxs = [] predictLayers = [] for truncId in range(net.LayerOpts['lstm_num_truncate']): # Create LSTM layer currentInput = FeaturesXGt[truncId] net.Layer['lstm_truncid_%d' % (truncId)] = LSTMLayer( net, currentInput, currentC, currentS) net.LayerOpts['lstm_params'] = net.Layer['lstm_truncid_%d' % (truncId)].Params # Predict next position based on current state currentInput = FeaturesX[truncId] tempLayer = LSTMLayer(net, currentInput, currentC, currentS) predictLayers.append(tempLayer) pred = SigmoidLayer(tempLayer.Output[0]).Output bbox = tempLayer.Output[1] preds.append(pred) bboxs.append(bbox) # Update stateS and stateC currentC = net.Layer['lstm_truncid_%d' % (truncId)].C currentS = net.Layer['lstm_truncid_%d' % (truncId)].S lastS = currentS lastC = currentC self.Net = net # Calculate cost function # Confidence loss cost = 0 costPos = 0 costLoc = 0 costNeg = 0 k0 = None k1 = None k2 = None k3 = None k4 = None for truncId in range(net.LayerOpts['lstm_num_truncate']): pred = preds[truncId] bbox = bboxs[truncId] target = TargetY[truncId] bboxgt = BboxY[truncId] numFeaturesPerIm = pred.shape[0] numAnchorBoxPerLoc = pred.shape[1] pred = pred.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 1)) target = target.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 1)) bbox = bbox.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 4)) bbox = bbox / BoxsVariances bboxgt = bboxgt.reshape((numFeaturesPerIm * numAnchorBoxPerLoc, 4)) allLocCost = T.sum(T.abs_(bbox - bboxgt), axis=1, keepdims=True) * target allConfPosCost = -target * T.log(pred) allConfNegCost = -(1 - target) * T.log(1 - pred) allPosCost = allConfPosCost + allLocCost * 0 allNegCost = allConfNegCost allPosCostSum = T.sum(allPosCost, axis=1) allNegCostSum = T.sum(allNegCost, axis=1) sortedPosCostIdx = T.argsort(allPosCostSum, axis=0) sortedNegCostIdx = T.argsort(allNegCostSum, axis=0) sortedPosCost = allPosCostSum[sortedPosCostIdx] sortedNegCost = allNegCostSum[sortedNegCostIdx] if k0 == None: k0 = target if k1 == None: k1 = allLocCost if k2 == None: k2 = pred if k3 == None: k3 = sortedPosCostIdx if k4 == None: k4 = sortedNegCostIdx numMax = T.sum(T.neq(sortedPosCost, 0)) # numNegMax = T.cast(T.floor(T.minimum(T.maximum(numMax * RatioPosNeg, 2), 300)), dtype = 'int32') numNegMax = T.cast(T.floor(numMax * RatioPosNeg), dtype='int32') top2PosCost = sortedPosCost[-numMax:] top6NegCost = sortedNegCost[-numNegMax:] layerCost = (T.sum(top2PosCost) + T.sum(top6NegCost)) / numMax cost = cost + layerCost costPos = costPos + pred[sortedPosCostIdx[-numMax:]].mean() costLoc = costLoc + allLocCost.sum() / numMax costNeg = costNeg + pred[sortedNegCostIdx[-numNegMax:]].mean() cost = cost / net.LayerOpts['lstm_num_truncate'] costPos = costPos / net.LayerOpts['lstm_num_truncate'] costLoc = costLoc / net.LayerOpts['lstm_num_truncate'] costNeg = costNeg / net.LayerOpts['lstm_num_truncate'] # Create update function params = self.Net.Layer['lstm_truncid_0'].Params grads = T.grad(cost, params) updates = AdamGDUpdate(net, params=params, grads=grads).Updates # Train function self.TrainFunc = theano.function(inputs=[ FeaturesXGt, FeaturesX, TargetY, BboxY, S, C, BoxsVariances, RatioPosNeg ], updates=updates, outputs=[ cost, lastS, lastC, costPos, costLoc, costNeg, k0, k1, k2, k3, k4 ]) self.PredFunc = theano.function(inputs=[FeaturesX, S, C], outputs=[preds[0], bboxs[0]]) nextS = self.Net.Layer['lstm_truncid_0'].S nextC = self.Net.Layer['lstm_truncid_0'].C self.NextState = theano.function(inputs=[FeaturesXGt, S, C], outputs=[nextS, nextC])
def main(args): #theano.optimizer='fast_compile' #theano.config.exception_verbosity='high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'nll_upper_bound' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/gmm/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") flgMSE = int(args['flgMSE']) period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps # int(args['stride_test']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) flgAgg = int(args['flgAgg']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) debug = int(args['debug']) num_sequences_per_batch = int(args['numSequences']) #based on appliance typeLoad = int(args['typeLoad']) target_inclusion_prob = float(args['target_inclusion_prob']) n_steps_val = n_steps print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 150 #150 p_z_dim = 150 #150 p_x_dim = 250 #250 x2s_dim = 250 #250 z2s_dim = 150 #150 target_dim = k #x_dim #(x_dim-1)*k ''' f = open(sample_path+'vrnn_gmm_1_best.pkl', 'rb') mainloop = cPickle.load(f) f.close() ''' model = Model() Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_ukdale( data_path, windows, appliances, numApps=flgAgg, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, flgAggSumScaled=1, flgFilterZeros=1, typeLoad=typeLoad, seq_per_batch=num_sequences_per_batch, target_inclusion_prob=target_inclusion_prob) instancesPlot = { 0: [4] } #for now use hard coded instancesPlot for kelly sampling if (typeLoad == 0): #original split according time instancesPlot = reader.build_dict_instances_plot( listDates, batch_size, Xval.shape[0]) train_data = UKdale( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=ytrain, labels=Xtrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = UKdale( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=yval, labels=Xval) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() scheduleSamplingMask = T.fvector('schedMask') x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) rnn = LSTM(name='rnn', parent=['x_1', 'z_1'], parent_dim=[x2s_dim, z2s_dim], nout=rnn_dim, unit='tanh', init_W=init_W, init_U=init_U, init_b=init_b) ''' dissag_pred = FullyConnectedLayer(name='disag_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=num_apps, unit='relu', init_W=init_W, init_b=init_b) ''' phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1'], parent_dim=[x2s_dim, rnn_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['s_tm1'], parent_dim=[rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu = FullyConnectedLayer(name='theta_mu', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig = FullyConnectedLayer(name='theta_sig', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff = FullyConnectedLayer(name='coeff', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) corr = FullyConnectedLayer(name='corr', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='tanh', init_W=init_W, init_b=init_b) binary = FullyConnectedLayer(name='binary', parent=['theta_1'], parent_dim=[p_x_dim], nout=1, unit='sigmoid', init_W=init_W, init_b=init_b) nodes = [ rnn, x_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu, theta_sig, coeff ] #, corr, binary params = OrderedDict() for node in nodes: if node.initialize() is not None: params.update(node.initialize()) params = init_tparams(params) s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) def inner_val_fn(s_tm1): ''' phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) ''' prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(prior_mu_t, prior_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) pred_t = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) pred_1_t = x_1.fprop([pred_t], params) s_t = rnn.fprop([[pred_1_t, z_1_t], [s_tm1]], params) return s_t, pred_t, z_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t # prior_mu_temp_val, prior_sig_temp_val ((s_temp_val, prediction_val, z_t_temp_val, theta_1_temp_val, theta_mu_temp_val, theta_sig_temp_val, coeff_temp_val), updates_val) =\ theano.scan(fn=inner_val_fn , n_steps=n_steps_val, #already 1 subtracted if doing next step outputs_info=[s_0, None, None, None, None, None, None]) for k, v in updates_val.iteritems(): k.default_update = v def inner_fn(x_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t) z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu_t = theta_mu.fprop([theta_1_t], params) theta_sig_t = theta_sig.fprop([theta_1_t], params) coeff_t = coeff.fprop([theta_1_t], params) #corr_t = corr.fprop([theta_1_t], params) #binary_t = binary.fprop([theta_1_t], params) pred = GMM_sample(theta_mu_t, theta_sig_t, coeff_t) #Gaussian_sample(theta_mu_t, theta_sig_t) s_t = rnn.fprop([[x_t, z_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return s_t, phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, z_t, z_1_t, theta_1_t, theta_mu_t, theta_sig_t, coeff_t, pred #, y_pred #corr_temp, binary_temp ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu_temp, theta_sig_temp, coeff_temp, prediction), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp], outputs_info=[s_0, None, None, None, None, None, None, None, None, None, None, None]) for k, v in updates.iteritems(): k.default_update = v s_temp = concatenate( [s_0[None, :, :], s_temp[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 ''' theta_1_temp = theta_1.fprop([z_1_temp, s_temp], params) theta_mu_temp = theta_mu.fprop([theta_1_temp], params) theta_sig_temp = theta_sig.fprop([theta_1_temp], params) coeff_temp = coeff.fprop([theta_1_temp], params) corr_temp = corr.fprop([theta_1_temp], params) binary_temp = binary.fprop([theta_1_temp], params) ''' s_temp.name = 'h_1' #gisse z_1_temp.name = 'z_1' #gisse z_t_temp.name = 'z' theta_mu_temp.name = 'theta_mu_temp' theta_sig_temp.name = 'theta_sig_temp' coeff_temp.name = 'coeff' prediction.name = 'pred_' + str(flgAgg) mse = T.mean((prediction - x)**2) # As axis = None is calculated for all mae = T.mean(T.abs_(prediction - x)) mse.name = 'mse' mae.name = 'mae' x_in = x.reshape((batch_size * n_steps, -1)) kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape theta_mu_in = theta_mu_temp.reshape((x_shape[0] * x_shape[1], -1)) theta_sig_in = theta_sig_temp.reshape((x_shape[0] * x_shape[1], -1)) coeff_in = coeff_temp.reshape((x_shape[0] * x_shape[1], -1)) #corr_in = corr_temp.reshape((x_shape[0]*x_shape[1], -1)) #binary_in = binary_temp.reshape((x_shape[0]*x_shape[1], -1)) recon = GMM( x_in, theta_mu_in, theta_sig_in, coeff_in ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out' #recon = recon * mask recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' nll_upper_bound = recon_term + kl_term #+ mse if (flgMSE): nll_upper_bound = nll_upper_bound + mse nll_upper_bound.name = 'nll_upper_bound' ############## TEST ############### theta_mu_in_val = theta_mu_temp_val.reshape((batch_size * n_steps, -1)) theta_sig_in_val = theta_sig_temp_val.reshape((batch_size * n_steps, -1)) coeff_in_val = coeff_temp_val.reshape((batch_size * n_steps, -1)) pred_in = prediction_val.reshape((batch_size * n_steps, -1)) recon_val = GMM( pred_in, theta_mu_in_val, theta_sig_in_val, coeff_in_val ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon_val = recon_val.reshape((batch_size, n_steps)) recon_val.name = 'gmm_out_val' model.inputs = [x, mask, y, y_mask, scheduleSamplingMask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) header = "epoch,log,kl,nll_upper_bound,mse,mae\n" extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch, save_path, header), Monitoring( freq=monitoring_freq, ddout=[ nll_upper_bound, recon_term, kl_term, mse, mae, theta_mu_temp, prediction ], indexSep=5, indexDDoutPlot=[(0, theta_mu_temp), (2, z_t_temp), (3, prediction)], instancesPlot=instancesPlot, #{0:[4,20],2:[5,10]},#, 80,150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr} mainloop = Training(name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], n_steps=n_steps, extension=extension, lr_iterations=lr_iterations) mainloop.run() test_fn = theano.function( inputs=[], outputs=[prediction_val, recon_val], updates= updates_val #, allow_input_downcast=True, on_unused_input='ignore' ) outputGeneration = test_fn() #{0:[4,20], 2:[5,10]} ''' plt.figure(1) plt.plot(np.transpose(outputGeneration[0],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated_z_0-4") plt.figure(2) plt.plot(np.transpose(outputGeneration[1],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated_s_0-4") plt.figure(3) plt.plot(np.transpose(outputGeneration[2],[1,0,2])[4]) plt.savefig(save_path+"/vrnn_dis_generated_theta_0-4") ''' plt.figure(1) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[4]) plt.savefig(save_path + "/vrnn_dis_generated_pred_4.ps") plt.figure(2) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[10]) plt.savefig(save_path + "/vrnn_dis_generated_pred_10.ps") plt.figure(3) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[20]) plt.savefig(save_path + "/vrnn_dis_generated_pred_20.ps") testLogLike = np.asarray(outputGeneration[1]).mean() fLog = open(save_path + '/output.csv', 'w') fLog.write(str(lr_iterations) + "\n") fLog.write(str(windows) + "\n") fLog.write("Test-log-likelihood\n") fLog.write("{}\n".format(testLogLike)) fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim, z2s_dim)) #fLog.write("{}\n".format(outputGeneration[0]))#logLIkelihood in the test set fLog.write("epoch,log,kl,mse,mae\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): f = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] d = mainloop.trainlog.monitor['mse'][i] e = mainloop.trainlog.monitor['mae'][i] fLog.write("{:d},{:.2f},{:.2f},{:.3f},{:.3f}\n".format(f, a, b, d, e)) f = open(save_path + '/outputRealGeneration.pkl', 'wb') pickle.dump(outputGeneration, f, -1) f.close()
def __init__(self, nwk, x=None, y=None, u=None, v=None, *arg, **kwd): """ : -------- parameters -------- : nwk: an expression builder for the neural network to be trained, could be a Nnt object. x: the inputs, with the first dimension standing for sample units. If unspecified, the trainer will try to evaluate the entry point and cache the result as source data. y: the labels, with the first dimension standing for sample units. if unspecified, a simi-unsupervied training is assumed as the labels will be identical to the inputs. u: the valication data inputs v: the validation data labels : -------- kwd: keywords -------- : ** bsz: batch size. ** lrt: learning rate. ** lmb: weight decay factor, the lambda ** dpc: dot product cap, preventing numerical explosion ** err: expression builder for the computation of training error between the network output {pred} and the label {y}. the expression must evaluate to a scalar. ** reg: expression builder for the computation of weight panalize the vector of parameters {vhat}, the expression must evaluate to a scalar. ** mmt: momentom of the trainer ** vdr: validation disruption rate ** hte: the halting training error. """ # numpy random number generator seed = kwd.pop('seed', None) nrng = kwd.pop('nrng', np.random.RandomState(seed)) trng = kwd.pop('trng', RandomStreams(nrng.randint(0x7FFFFFFF))) # private members self.__seed__ = seed self.__nrng__ = nrng self.__trng__ = trng # expression of error and regulator terms err = getattr(exb, kwd.get('err', 'CE')) reg = getattr(exb, kwd.get('reg', 'L1')) # the validation disruption self.vdr = S(kwd.get('vdr'), 'VDR') # the denoising self.dns = S(kwd.get('dns'), 'DNS') # the halting rules, and halting status self.hte = kwd.get('hte', 1e-3) # 1. low training error self.hgd = kwd.get('htg', 1e-7) # 2. low gradient self.hlr = kwd.get('hlr', 1e-7) # 3. low learning rate self.hvp = kwd.get('hvp', 100) # 4. out of validation patients self.hlt = 0 # current epoch index, use int64 ep = kwd.get('ep', 0) self.ep = S(ep, 'EP') # training batch ppsize, use int64 bsz = kwd.get('bsz', 20) self.bsz = S(bsz, 'BSZ') # current batch index, use int64 self.bt = S(0, 'BT') # momentumn, make sure momentum is a sane value mmt = kwd.get('mmt', .0) self.mmt = S(mmt, 'MMT') # learning rate lrt = kwd.get('lrt', 0.01) # learning rate acc = kwd.get('acc', 1.02) # acceleration dec = kwd.get('dec', 0.95) # deceleration self.lrt = S(lrt, 'LRT', 'f') self.acc = S(acc, 'ACC', 'f') self.dec = S(dec, 'DEC', 'f') # weight decay, lambda lmd = kwd.get('lmd', .0) self.lmd = S(lmd, 'LMD', 'f') # the neural network self.nwk = nwk # inputs and labels, for modeling and validation x = S(np.zeros((bsz * 2, nwk.dim[0]), 'f') if x is None else x) y = x if y is None else S(y) u = x if u is None else S(u) v = y if v is None else S(v) self.x, self.y, self.u, self.v = x, y, u, v # -------- construct trainer function -------- * # 1) symbolic expressions x = T.tensor(name='x', dtype=x.dtype, broadcastable=x.broadcastable) y = T.tensor(name='y', dtype=y.dtype, broadcastable=y.broadcastable) u = T.tensor(name='u', dtype=u.dtype, broadcastable=u.broadcastable) v = T.tensor(name='v', dtype=v.dtype, broadcastable=v.broadcastable) # prediction pred = nwk(x) # generic # mean correlation between testing outcome {v} and that predicted # from testing input {u}. vcor = exb.mcr(nwk(x), y) # list of symbolic parameters to be tuned pars = parms(pred) # unlist symbolic weights into a vector vwgt = T.concatenate([p.flatten() for p in pars if p.name == 'w']) # symbolic batch cost, which is the mean trainning erro over all # observations and sub attributes. # The observations are indexed by the first dimension of y, the last # dimension indices data entries for each observation, # e.g. voxels in an MRI region, and SNPs in a gene. # The objective function, err, returns a scalar of training loss, it # can be the L1, L2 norm and CE. erro = err(pred, y).mean() # the sum of weights calculated for weight decay. wsum = reg(vwgt) cost = erro + wsum * self.lmd # symbolic gradient of cost WRT parameters grad = T.grad(cost, pars) gvec = T.concatenate([g.flatten() for g in grad]) gabs = T.abs_(gvec) gsup = T.max(gabs) # trainer control nwep = ((self.bt + 1) * self.bsz) // self.x.shape[-2] # new epoch? # 2) define updates after each batch training up = [] # update parameters using gradiant decent, and momentum for p, g in zip(pars, grad): # initialize accumulated gradient # NOTE: p.eval() causes mehem!! h = S(np.zeros_like(p.get_value())) # accumulate gradient, partially historical (due to the momentum), # partially noval up.append((h, self.mmt * h + (1 - self.mmt) * g)) # update parameters by stepping down the accumulated gradient up.append((p, p - self.lrt * h)) # update batch and eqoch index up.append((self.bt, (self.bt + 1) * (1 - nwep))) up.append((self.ep, self.ep + nwep)) # 3) the trainer functions # expression of batch and whole data feed: _ = T.arange((self.bt + 0) * self.bsz, (self.bt + 1) * self.bsz) # enable denoise training if self.dns: msk = self.__trng__.binomial(self.y.shape, 1, self.dns, dtype=FX) msk = 1 - msk bts = { x: self.x.take(_, 0, 'wrap'), y: self.y.take(_, 0, 'wrap') * msk.take(_, -2, 'wrap') } dts = {x: self.x, y: self.y * msk} else: bts = {x: self.x.take(_, 0, 'wrap'), y: self.y.take(_, 0, 'wrap')} dts = {x: self.x, y: self.y} # each invocation sends one batch of training examples to the network, # calculate total cost and tune the parameters by gradient decent. self.step = F([], cost, name="step", givens=bts, updates=up) # training error, training cost self.terr = F([], erro, name="terr", givens=dts) self.tcst = F([], cost, name="tcst", givens=dts) # weights, and parameters self.wsum = F([], wsum, name="wsum") self.gsup = F([], gsup, name="gsup", givens=dts) # * -------- done with trainer functions -------- * # * -------- validation functions -------- * # enable validation binary disruption (binary)? if self.vdr: _ = self.__trng__.binomial(self.v.shape, 1, self.vdr, dtype=FX) vts = {x: self.u, y: (self.v + _) % C(2.0, FX)} else: vts = {x: self.u, y: self.v} self.verr = F([], erro, name="verr", givens=vts) # validation correlation performance self.vcor = F([], vcor, name="vcor", givens=vts) # * ---------- logging and recording ---------- * hd, skip = [], ['step', 'gvec'] for k, v in vars(self).items(): if k.startswith('__') or k in skip: continue # possible theano shared cpu variable if isinstance(v, type(self.lmd)) and v.ndim < 1: hd.append((k, v.get_value)) # possible theano shared gpu variable if isinstance(v, type(self.ep)) and v.ndim < 1: hd.append((k, v.get_value)) if isinstance(v, type(self.step)): hd.append((k, v)) if isinstance(v, float) or isinstance(v, int): hd.append((k, v)) self.__head__ = hd self.__time__ = .0 # the initial record, and history self.__hist__ = [self.__rpt__()] self.__mver__ = self.__hist__[-1] # min verr self.__mter__ = self.__hist__[-1] # min terr # printing format self.__pfmt__ = ( '{ep:04d}: {tcst:.1e} = {terr:.1e} + {lmd:.1e}*{wsum:.1e}' '|{verr:.1e}, {gsup:.2e}, {vcor:.2e}, {lrt:.2e}')
def log_normal(self,x, mean, std, eps=0.0): """computes log-proba of normal distribution""" std += eps return - 0.5 * np.log(2 * np.pi) - T.log(T.abs_(std)) - (x - mean) ** 2 / (2 * std ** 2)
w_h2 = theano.shared(np.random.randn(no_hidden1, 20)*.01, floatX) b_h2 = theano.shared(np.random.randn(20)*0.01, floatX) w_o = theano.shared(np.random.randn(20)*.01, floatX) b_o = theano.shared(np.random.randn()*.01, floatX) # learning rate alpha = theano.shared(learning_rate, floatX) #Define mathematical expression: h1_out = T.nnet.sigmoid(T.dot(x, w_h1) + b_h1) h2_out = T.nnet.sigmoid(T.dot(h1_out,w_h2) + b_h2) y = T.dot(h2_out, w_o) + b_o # 4-layer cost = T.abs_(T.mean(T.sqr(d - y))) accuracy = T.mean(d - y) #define gradients dw_o, db_o, dw_h1, db_h1, dw_h2, db_h2 = T.grad(cost, [w_o, b_o, w_h1, b_h1, w_h2, b_h2]) # 4-layer train = theano.function( inputs = [x, d], outputs = cost, updates = [[w_o, w_o - alpha*dw_o], [b_o, b_o - alpha*db_o], [w_h1, w_h1 - alpha*dw_h1], [b_h1, b_h1 - alpha*db_h1], [w_h2, w_h2 - alpha*dw_h2], [b_h2, b_h2 - alpha*db_h2]], allow_input_downcast=True
def SGMGHMC_p(tparams, cost, inps, ntrain, lr, rho=0.9, epsilon=1e-6, clip_norm=0.1): """ Additional parameters """ mom_tparams = OrderedDict() xi_tparams = OrderedDict() for k, p0 in tparams.iteritems(): mom_tparams[k] = theano.shared(p0.get_value() * 0. + 1e-10, name='%s_mom'%k) xi_tparams[k] = theano.shared(p0.get_value() * 0. + 1e-10, name='%s_xi'%k) a = theano.shared(numpy_floatX(2.)) m_org = theano.shared(numpy_floatX(5.)) c = theano.shared(numpy_floatX(5.)) sigma_p = theano.shared(numpy_floatX(10.)) sigma_xi = theano.shared(numpy_floatX(0.001)) gamma_xi = theano.shared(numpy_floatX(1)) logger = logging.getLogger('eval_ptb_sgmgnht') logger.setLevel(logging.INFO) fh = logging.FileHandler('eval_ptb_sgmgnht.log') logger.info('a {} m {} c {} s_p{} s_xi{} g_xi{}'.format(a.get_value(), m_org.get_value(), c.get_value(), sigma_p.get_value(), sigma_xi.get_value(), gamma_xi.get_value())) p = tensor.vector('p', dtype='float32') """ default: lr=0.001 """ trng = RandomStreams(123) grads = tensor.grad(cost, tparams.values()) norm = tensor.sqrt(sum([tensor.sum(g**2) for g in grads])) if tensor.ge(norm, clip_norm): grads = [g*clip_norm/norm for g in grads] gshared = [theano.shared(p0.get_value() * 0., name='%s_grad'%k) for k, p0 in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inps, cost, updates=gsup) updates = [] # reset mom # counter = theano.shared(numpy_floatX(0.)) # updates.append((counter,counter+1)) for p, mom, xi, g in zip(tparams.values(),mom_tparams.values(),xi_tparams.values(), gshared): #rms prop t = theano.shared(p.get_value() * 0.) t_new = rho * t + (1-rho) * g**2 updates.append((t, t_new)) m = (tensor.sqrt(t_new) + 1e-10) m = m/tensor.max(m)*m_org #m = tensor.switch(tensor.ge(m,1*m_org), 1*m_org, m) m = tensor.switch(tensor.le(m,m_org*0.01), m_org*0.01, m) g_f = tensor.sgn(mom)/m*(tensor.abs_(mom)**(1/a)) K_f = -g_f + 2/c*(c*g_f + tensor.log(1+tensor.exp(-c*g_f))) psi_f_1 = (1- tensor.exp(-c*g_f) )/( 1 + tensor.exp(-c*g_f) ) f1_f_1 = 1/m/a*psi_f_1*(tensor.abs_(mom+1e-100)**(1/a-1)) psi_grad_f_1 = 2*c*tensor.exp(- c*g_f)/(1 + tensor.exp(-c*g_f))**2 f3_f_1 = 1/m**2/a**2*(psi_f_1**2-psi_grad_f_1)*tensor.abs_(mom+1e-100)**(2/a-2) - (1/a-1)/m/a*psi_f_1*tensor.sgn(mom)*tensor.abs_(mom+1e-100)**(1/a-2) psi_f = (tensor.exp(c*g_f) - 1)/(tensor.exp(c*g_f) + 1) f1_f = 1/m/a*psi_f*(tensor.abs_(mom+1e-100)**(1/a-1)) psi_grad_f = 2*c*tensor.exp(c*g_f)/(tensor.exp(c*g_f) + 1)**2 f3_f = 1/m**2/a**2*(psi_f**2-psi_grad_f)*tensor.abs_(mom+1e-100)**(2/a-2) - (1/a-1)/m/a*psi_f*tensor.sgn(mom)*tensor.abs_(mom+1e-100)**(1/a-2) temp_f1 = tensor.switch(tensor.ge(g_f,0), f1_f_1, f1_f) temp_f3 = tensor.switch(tensor.ge(g_f,0), f3_f_1, f3_f) noise_p = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') noise_xi = trng.normal(p.get_value().shape, avg = 0.0, std = 1., dtype='float32') #lr_new = 1 / tensor.sqrt(tensor.abs_(temp_f1)) * lr lr_new = lr updated_p = p + temp_f1 * lr_new #updated_mom = (mom - temp_f1* xi *lr - g * lr * ntrain + tensor.sqrt(2*sigma_p*lr) * noise_p)* (1-tensor.eq(tensor.mod(iterations,100),0)) + randmg * tensor.eq(tensor.mod(iterations,100),0) updated_mom = mom - 1.2*temp_f1* xi *lr_new - g * lr_new * ntrain + tensor.sqrt(2*sigma_p*lr_new) * noise_p updated_xi = xi + temp_f3* sigma_xi * lr_new - (xi - sigma_p)*gamma_xi*lr_new + tensor.sqrt(2*sigma_xi*gamma_xi*lr_new) * noise_xi updates.append((p, updated_p)) updates.append((mom, updated_mom)) updates.append((xi, updated_xi)) f_update = theano.function([lr,ntrain], [p,mom,m], updates=updates) return f_grad_shared, f_update
def laplace_loss(percept_length, sigma_min, y_true, y_pred): sigmas_pred = sigma_min + y_pred[:, percept_length:] mus_pred = y_pred[:, :percept_length] return T.mean((T.abs_(y_true - mus_pred)) / sigmas_pred + T.log(sigmas_pred))
def mean_absolute_error(y_true, y_pred, weight=None): if weight is not None: return T.abs_( weight.reshape((weight.shape[0], 1)) * (y_pred - y_true)).mean() else: return T.abs_(y_pred - y_true).mean()
def sd2rho(sd): """ `sd -> rho` theano converter :math:`mu + sd*e = mu + log(1+exp(rho))*e`""" return tt.log(tt.exp(tt.abs_(sd)) - 1.)
def ready(self): embedding_layer = self.embedding_layer args = self.args padding_id = self.padding_id weights = self.weights dropout = self.dropout = theano.shared( np.float64(args.dropout).astype(theano.config.floatX)) # len*batch x = self.x = T.imatrix() n_d = args.hidden_dim2 n_e = embedding_layer.n_d activation = get_activation_by_name(args.activation) layers = self.layers = [] layer_type = args.layer.lower() for i in xrange(2): if layer_type == "rcnn": l = RCNN( n_in=n_e, # if i == 0 else n_d, n_out=n_d, activation=activation, order=args.order) elif layer_type == "lstm": l = LSTM( n_in=n_e, # if i == 0 else n_d, n_out=n_d, activation=activation) layers.append(l) # len * batch masks = T.cast(T.neq(x, padding_id), "float32") #masks = masks.dimshuffle((0,1,"x")) # (len*batch)*n_e embs = embedding_layer.forward(x.ravel()) if weights is not None: embs_w = weights[x.ravel()].dimshuffle((0, 'x')) embs = embs * embs_w # len*batch*n_e embs = embs.reshape((x.shape[0], x.shape[1], n_e)) embs = apply_dropout(embs, dropout) self.word_embs = embs flipped_embs = embs[::-1] # len*bacth*n_d h1 = layers[0].forward_all(embs) h2 = layers[1].forward_all(flipped_embs) h_final = T.concatenate([h1, h2[::-1]], axis=2) h_final = apply_dropout(h_final, dropout) size = n_d * 2 output_layer = self.output_layer = ZLayer(n_in=size, n_hidden=n_d, activation=activation) # sample z given text (i.e. x) z_pred, sample_updates = output_layer.sample_all(h_final) # we are computing approximated gradient by sampling z; # so should mark sampled z not part of the gradient propagation path # z_pred = self.z_pred = theano.gradient.disconnected_grad(z_pred) self.sample_updates = sample_updates print "z_pred", z_pred.ndim self.p1 = T.sum(masks * z_pred) / (T.sum(masks) + 1e-8) # len*batch*1 probs = output_layer.forward_all(h_final, z_pred) print "probs", probs.ndim logpz = -T.nnet.binary_crossentropy(probs, z_pred) * masks logpz = self.logpz = logpz.reshape(x.shape) probs = self.probs = probs.reshape(x.shape) # batch z = z_pred self.zsum = T.sum(z, axis=0, dtype=theano.config.floatX) self.zdiff = T.sum(T.abs_(z[1:] - z[:-1]), axis=0, dtype=theano.config.floatX) params = self.params = [] for l in layers + [output_layer]: for p in l.params: params.append(p) nparams = sum(len(x.get_value(borrow=True).ravel()) \ for x in params) say("total # parameters: {}\n".format(nparams)) l2_cost = None for p in params: if l2_cost is None: l2_cost = T.sum(p**2) else: l2_cost = l2_cost + T.sum(p**2) l2_cost = l2_cost * args.l2_reg self.l2_cost = l2_cost
def standard_laplace(x): return np.log(0.5) - T.abs_(x)
def normal(x, mean, sd): return C - T.log(T.abs_(sd)) - ((x - mean)**2 / (2 * sd**2))
def abs(x): return T.abs_(x)
context_name=ctx_name)() mode = get_mode('FAST_RUN').including('gpuarray') f = theano.function([guard_in], op(guard_in), mode=mode, profile=False) result.cache[key] = f return f(inp) result.cache = dict() return result f_gpua_min = f_compute(T.min) f_gpua_max = f_compute(T.max) f_gpua_absmax = f_compute(lambda x: T.max(T.abs_(x))) class NanGuardMode(Mode): """ A Theano compilation Mode that makes the compiled function automatically detect NaNs and Infs and detect an error if they occur. Parameters ---------- nan_is_error : bool If True, raise an error anytime a NaN is encountered. inf_is_error : bool If True, raise an error anytime an Inf is encountered. Note that some pylearn2 modules currently use np.inf as a default value (e.g. mlp.max_pool) and these will cause an error if inf_is_error is True.
def main(args): theano.optimizer = 'fast_compile' theano.config.exception_verbosity = 'high' trial = int(args['trial']) pkl_name = 'vrnn_gmm_%d' % trial channel_name = 'mse' data_path = args['data_path'] save_path = args[ 'save_path'] #+'/aggVSdisag_distrib/'+datetime.datetime.now().strftime("%y-%m-%d_%H-%M") period = int(args['period']) n_steps = int(args['n_steps']) stride_train = int(args['stride_train']) stride_test = n_steps typeLoad = int(args['typeLoad']) flgMSE = int(args['flgMSE']) monitoring_freq = int(args['monitoring_freq']) epoch = int(args['epoch']) batch_size = int(args['batch_size']) x_dim = int(args['x_dim']) y_dim = int(args['y_dim']) z_dim = int(args['z_dim']) rnn_dim = int(args['rnn_dim']) k = int(args['num_k']) #a mixture of K Gaussian functions lr = float(args['lr']) origLR = lr debug = int(args['debug']) print "trial no. %d" % trial print "batch size %d" % batch_size print "learning rate %f" % lr print "saving pkl file '%s'" % pkl_name print "to the save path '%s'" % save_path q_z_dim = 350 p_z_dim = 400 p_x_dim = 450 x2s_dim = 400 y2s_dim = 200 z2s_dim = 350 target_dim = k # As different appliances are separeted in theta_mu1, theta_mu2, etc... each one is just created from k different Gaussians model = Model() Xtrain, ytrain, Xval, yval, Xtest, ytest, reader = fetch_dataport( data_path, windows, appliances, numApps=-1, period=period, n_steps=n_steps, stride_train=stride_train, stride_test=stride_test, flgAggSumScaled=1, flgFilterZeros=1, typeLoad=typeLoad) instancesPlot = {0: [10]} #instancesPlot = reader.build_dict_instances_plot(listDates, batch_size, Xval.shape[0]) train_data = Dataport( name='train', prep='normalize', cond=True, # False #path=data_path, inputX=Xtrain, labels=ytrain) X_mean = train_data.X_mean X_std = train_data.X_std valid_data = Dataport( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xval, labels=yval) test_data = Dataport( name='valid', prep='normalize', cond=True, # False #path=data_path, X_mean=X_mean, X_std=X_std, inputX=Xtest, labels=ytest) init_W = InitCell('rand') init_U = InitCell('ortho') init_b = InitCell('zeros') init_b_sig = InitCell('const', mean=0.6) x, mask, y, y_mask = train_data.theano_vars() scheduleSamplingMask = T.fvector('schedMask') x.name = 'x_original' if debug: x.tag.test_value = np.zeros((15, batch_size, x_dim), dtype=np.float32) temp = np.ones((15, batch_size), dtype=np.float32) temp[:, -2:] = 0. mask.tag.test_value = temp """rnn = LSTM(name='rnn', parent=['x_1', 'z_1', 'y_1'], parent_dim=[x2s_dim, z2s_dim, y2s_dim], nout=rnn_dim, unit='tanh', init_W=mainloop.model.nodes[0].init_W, init_U=mainloop.model.nodes[0].init_U, init_b=mainloop.model.nodes[0].init_b) x_1 = FullyConnectedLayer(name='x_1', parent=['x_t'], parent_dim=[x_dim], nout=x2s_dim, unit='relu', init_W=init_W, init_b=init_b) y_1 = FullyConnectedLayer(name='y_1', parent=['y_t'], parent_dim=[y_dim], nout=y2s_dim, unit='relu', init_W=init_W, init_b=init_b) z_1 = FullyConnectedLayer(name='z_1', parent=['z_t'], parent_dim=[z_dim], nout=z2s_dim, unit='relu', init_W=init_W, init_b=init_b) phi_1 = FullyConnectedLayer(name='phi_1', parent=['x_1', 's_tm1','y_1'], parent_dim=[x2s_dim, rnn_dim, y2s_dim], nout=q_z_dim, unit='relu', init_W=init_W, init_b=init_b) phi_mu = FullyConnectedLayer(name='phi_mu', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) phi_sig = FullyConnectedLayer(name='phi_sig', parent=['phi_1'], parent_dim=[q_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) prior_1 = FullyConnectedLayer(name='prior_1', parent=['x_1','s_tm1'], parent_dim=[x2s_dim,rnn_dim], nout=p_z_dim, unit='relu', init_W=init_W, init_b=init_b) prior_mu = FullyConnectedLayer(name='prior_mu', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='linear', init_W=init_W, init_b=init_b) prior_sig = FullyConnectedLayer(name='prior_sig', parent=['prior_1'], parent_dim=[p_z_dim], nout=z_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_1 = FullyConnectedLayer(name='theta_1', parent=['z_1', 's_tm1'], parent_dim=[z2s_dim, rnn_dim], nout=p_x_dim, unit='relu', init_W=init_W, init_b=init_b) theta_mu1 = FullyConnectedLayer(name='theta_mu1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu2 = FullyConnectedLayer(name='theta_mu2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu3 = FullyConnectedLayer(name='theta_mu3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu4 = FullyConnectedLayer(name='theta_mu4', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_mu5 = FullyConnectedLayer(name='theta_mu5', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='linear', init_W=init_W, init_b=init_b) theta_sig1 = FullyConnectedLayer(name='theta_sig1', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig2 = FullyConnectedLayer(name='theta_sig2', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig3 = FullyConnectedLayer(name='theta_sig3', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig4 = FullyConnectedLayer(name='theta_sig4', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) theta_sig5 = FullyConnectedLayer(name='theta_sig5', parent=['theta_1'], parent_dim=[p_x_dim], nout=target_dim, unit='softplus', cons=1e-4, init_W=init_W, init_b=init_b_sig) coeff1 = FullyConnectedLayer(name='coeff1', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff2 = FullyConnectedLayer(name='coeff2', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff3 = FullyConnectedLayer(name='coeff3', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff4 = FullyConnectedLayer(name='coeff4', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b) coeff5 = FullyConnectedLayer(name='coeff5', parent=['theta_1'], parent_dim=[p_x_dim], nout=k, unit='softmax', init_W=init_W, init_b=init_b)""" #from experiment 18-05-31_18-48 fmodel = open('disall.pkl', 'rb') mainloop = cPickle.load(fmodel) fmodel.close() #define layers rnn = mainloop.model.nodes[0] x_1 = mainloop.model.nodes[1] y_1 = mainloop.model.nodes[2] z_1 = mainloop.model.nodes[3] phi_1 = mainloop.model.nodes[4] phi_mu = mainloop.model.nodes[5] phi_sig = mainloop.model.nodes[6] prior_1 = mainloop.model.nodes[7] prior_mu = mainloop.model.nodes[8] prior_sig = mainloop.model.nodes[9] theta_1 = mainloop.model.nodes[10] theta_mu1 = mainloop.model.nodes[11] theta_sig1 = mainloop.model.nodes[12] coeff1 = mainloop.model.nodes[13] nodes = [ rnn, x_1, y_1, z_1, #dissag_pred, phi_1, phi_mu, phi_sig, prior_1, prior_mu, prior_sig, theta_1, theta_mu1, theta_sig1, coeff1 ] params = mainloop.model.params dynamicOutput = [None, None, None, None, None, None, None, None] #dynamicOutput_val = [None, None, None, None, None, None,None, None, None] if (y_dim > 1): theta_mu2 = mainloop.model.nodes[14] theta_sig2 = mainloop.model.nodes[15] coeff2 = mainloop.model.nodes[16] nodes = nodes + [theta_mu2, theta_sig2, coeff2] dynamicOutput = dynamicOutput + [None, None, None, None ] #mu, sig, coef and pred if (y_dim > 2): theta_mu3 = mainloop.model.nodes[17] theta_sig3 = mainloop.model.nodes[18] coeff3 = mainloop.model.nodes[19] nodes = nodes + [theta_mu3, theta_sig3, coeff3] dynamicOutput = dynamicOutput + [None, None, None, None] if (y_dim > 3): theta_mu4 = mainloop.model.nodes[20] theta_sig4 = mainloop.model.nodes[21] coeff4 = mainloop.model.nodes[22] nodes = nodes + [theta_mu4, theta_sig4, coeff4] dynamicOutput = dynamicOutput + [None, None, None, None] if (y_dim > 4): theta_mu5 = mainloop.model.nodes[23] theta_sig5 = mainloop.model.nodes[24] coeff5 = mainloop.model.nodes[25] nodes = nodes + [theta_mu5, theta_sig5, coeff5] dynamicOutput = dynamicOutput + [None, None, None, None] s_0 = rnn.get_init_state(batch_size) x_1_temp = x_1.fprop([x], params) y_1_temp = y_1.fprop([y], params) output_fn = [s_0] + dynamicOutput output_fn_val = [s_0] + dynamicOutput[2:] print(len(output_fn), len(output_fn_val)) def inner_fn_test(x_t, s_tm1): prior_1_t = prior_1.fprop([x_t, s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample( prior_mu_t, prior_sig_t ) #in the original code it is gaussian. GMM is for the generation z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu1_t = theta_mu1.fprop([theta_1_t], params) theta_sig1_t = theta_sig1.fprop([theta_1_t], params) coeff1_t = coeff1.fprop([theta_1_t], params) y_pred1 = GMM_sampleY( theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t) tupleMulti = prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1 if (y_dim > 1): theta_mu2_t = theta_mu2.fprop([theta_1_t], params) theta_sig2_t = theta_sig2.fprop([theta_1_t], params) coeff2_t = coeff2.fprop([theta_1_t], params) y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t) y_pred1 = T.concatenate([y_pred1, y_pred2], axis=1) tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2) if (y_dim > 2): theta_mu3_t = theta_mu3.fprop([theta_1_t], params) theta_sig3_t = theta_sig3.fprop([theta_1_t], params) coeff3_t = coeff3.fprop([theta_1_t], params) y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t) y_pred1 = T.concatenate([y_pred1, y_pred3], axis=1) tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3) if (y_dim > 3): theta_mu4_t = theta_mu4.fprop([theta_1_t], params) theta_sig4_t = theta_sig4.fprop([theta_1_t], params) coeff4_t = coeff4.fprop([theta_1_t], params) y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t) y_pred1 = T.concatenate([y_pred1, y_pred4], axis=1) tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4) if (y_dim > 4): theta_mu5_t = theta_mu5.fprop([theta_1_t], params) theta_sig5_t = theta_sig5.fprop([theta_1_t], params) coeff5_t = coeff5.fprop([theta_1_t], params) y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t) y_pred1 = T.concatenate([y_pred1, y_pred5], axis=1) tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5) pred_1_t = y_1.fprop([y_pred1], params) #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 ) s_t = rnn.fprop([[x_t, z_1_t, pred_1_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return (s_t, ) + tupleMulti #corr_temp, binary_temp (restResults_val, updates_val) = theano.scan(fn=inner_fn_test, sequences=[x_1_temp], outputs_info=output_fn_val) for k, v in updates_val.iteritems(): k.default_update = v """def inner_fn(x_t, y_t, s_tm1): phi_1_t = phi_1.fprop([x_t, s_tm1, y_t], params) phi_mu_t = phi_mu.fprop([phi_1_t], params) phi_sig_t = phi_sig.fprop([phi_1_t], params) prior_1_t = prior_1.fprop([x_t,s_tm1], params) prior_mu_t = prior_mu.fprop([prior_1_t], params) prior_sig_t = prior_sig.fprop([prior_1_t], params) z_t = Gaussian_sample(phi_mu_t, phi_sig_t)#in the original code it is gaussian. GMM is for the generation z_1_t = z_1.fprop([z_t], params) theta_1_t = theta_1.fprop([z_1_t, s_tm1], params) theta_mu1_t = theta_mu1.fprop([theta_1_t], params) theta_sig1_t = theta_sig1.fprop([theta_1_t], params) coeff1_t = coeff1.fprop([theta_1_t], params) y_pred1 = GMM_sampleY(theta_mu1_t, theta_sig1_t, coeff1_t) #Gaussian_sample(theta_mu_t, theta_sig_t) tupleMulti = phi_mu_t, phi_sig_t, prior_mu_t, prior_sig_t, theta_mu1_t, theta_sig1_t, coeff1_t, y_pred1 if (y_dim>1): theta_mu2_t = theta_mu2.fprop([theta_1_t], params) theta_sig2_t = theta_sig2.fprop([theta_1_t], params) coeff2_t = coeff2.fprop([theta_1_t], params) y_pred2 = GMM_sampleY(theta_mu2_t, theta_sig2_t, coeff2_t) tupleMulti = tupleMulti + (theta_mu2_t, theta_sig2_t, coeff2_t, y_pred2) if (y_dim>2): theta_mu3_t = theta_mu3.fprop([theta_1_t], params) theta_sig3_t = theta_sig3.fprop([theta_1_t], params) coeff3_t = coeff3.fprop([theta_1_t], params) y_pred3 = GMM_sampleY(theta_mu3_t, theta_sig3_t, coeff3_t) tupleMulti = tupleMulti + (theta_mu3_t, theta_sig3_t, coeff3_t, y_pred3) if (y_dim>3): theta_mu4_t = theta_mu4.fprop([theta_1_t], params) theta_sig4_t = theta_sig4.fprop([theta_1_t], params) coeff4_t = coeff4.fprop([theta_1_t], params) y_pred4 = GMM_sampleY(theta_mu4_t, theta_sig4_t, coeff4_t) tupleMulti = tupleMulti + (theta_mu4_t, theta_sig4_t, coeff4_t, y_pred4) if (y_dim>4): theta_mu5_t = theta_mu5.fprop([theta_1_t], params) theta_sig5_t = theta_sig5.fprop([theta_1_t], params) coeff5_t = coeff5.fprop([theta_1_t], params) y_pred5 = GMM_sampleY(theta_mu5_t, theta_sig5_t, coeff5_t) tupleMulti = tupleMulti + (theta_mu5_t, theta_sig5_t, coeff5_t, y_pred5) #y_pred = [GMM_sampleY(theta_mu_t[i], theta_sig_t[i], coeff_t[i]) for i in range(y_dim)]#T.stack([y_pred1,y_pred2],axis = 0 ) s_t = rnn.fprop([[x_t, z_1_t, y_t], [s_tm1]], params) #y_pred = dissag_pred.fprop([s_t], params) return (s_t,)+tupleMulti #corr_temp, binary_temp (restResults, updates) = theano.scan(fn=inner_fn, sequences=[x_1_temp, y_1_temp], outputs_info=output_fn ) ''' ((s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,z_t_temp, z_1_temp, theta_1_temp, theta_mu1_temp, theta_sig1_temp, coeff1_temp, theta_mu2_temp, theta_sig2_temp, coeff2_temp, theta_mu3_temp, theta_sig3_temp, coeff3_temp, theta_mu4_temp, theta_sig4_temp, coeff4_temp, theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred1_temp, y_pred2_temp, y_pred3_temp, y_pred4_temp, y_pred5_temp), updates) =\ theano.scan(fn=inner_fn, sequences=[x_1_temp, y_1_temp], outputs_info=[s_0, None, None, None, None, None, None, None, None,None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]) ''' s_temp, phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp,theta_mu1_temp, theta_sig1_temp, coeff1_temp, y_pred1_temp = restResults[:9] restResults = restResults[9:] for k, v in updates.iteritems(): k.default_update = v #s_temp = concatenate([s_0[None, :, :], s_temp[:-1]], axis=0)# seems like this is for creating an additional dimension to s_0 theta_mu1_temp.name = 'theta_mu1' theta_sig1_temp.name = 'theta_sig1' coeff1_temp.name = 'coeff1' y_pred1_temp.name = 'disaggregation1' #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1) mse1 = T.mean((y_pred1_temp - y[:,:,0].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all mae1 = T.mean( T.abs_(y_pred1_temp - y[:,:,0].reshape((y.shape[0],y.shape[1],1))) ) mse1.name = 'mse1' mae1.name = 'mae1' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp)""" x_shape = x.shape y_shape = y.shape x_in = x.reshape((x_shape[0] * x_shape[1], -1)) y_in = y.reshape((y_shape[0] * y_shape[1], -1)) """theta_mu1_in = theta_mu1_temp.reshape((x_shape[0]*x_shape[1], -1)) theta_sig1_in = theta_sig1_temp.reshape((x_shape[0]*x_shape[1], -1)) coeff1_in = coeff1_temp.reshape((x_shape[0]*x_shape[1], -1)) ddoutMSEA = [] ddoutYpreds = [y_pred1_temp] indexSepDynamic = 6 # plus 1 for TOTAMSE #totaMSE = T.copy(mse1) mse2 = T.zeros((1,)) mae2 = T.zeros((1,)) mse3 = T.zeros((1,)) mae3 = T.zeros((1,)) mse4 = T.zeros((1,)) mae4 = T.zeros((1,)) mse5 = T.zeros((1,)) mae5 = T.zeros((1,)) if (y_dim>1): theta_mu2_temp, theta_sig2_temp, coeff2_temp, y_pred2_temp = restResults[:4] restResults = restResults[4:] theta_mu2_temp.name = 'theta_mu2' theta_sig2_temp.name = 'theta_sig2' coeff2_temp.name = 'coeff2' y_pred2_temp.name = 'disaggregation2' mse2 = T.mean((y_pred2_temp - y[:,:,1].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all mae2 = T.mean( T.abs_(y_pred2_temp - y[:,:,1].reshape((y.shape[0],y.shape[1],1))) ) mse2.name = 'mse2' mae2.name = 'mae2' theta_mu2_in = theta_mu2_temp.reshape((x_shape[0]*x_shape[1], -1)) theta_sig2_in = theta_sig2_temp.reshape((x_shape[0]*x_shape[1], -1)) coeff2_in = coeff2_temp.reshape((x_shape[0]*x_shape[1], -1)) argsGMM = theta_mu2_in, theta_sig2_in, coeff2_in ddoutMSEA = ddoutMSEA + [mse2, mae2] ddoutYpreds = ddoutYpreds + [y_pred2_temp] #totaMSE+=mse2 indexSepDynamic +=2 if (y_dim>2): theta_mu3_temp, theta_sig3_temp, coeff3_temp, y_pred3_temp = restResults[:4] restResults = restResults[4:] theta_mu3_temp.name = 'theta_mu3' theta_sig3_temp.name = 'theta_sig3' coeff3_temp.name = 'coeff3' y_pred3_temp.name = 'disaggregation3' mse3 = T.mean((y_pred3_temp - y[:,:,2].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all mae3 = T.mean( T.abs_(y_pred3_temp - y[:,:,2].reshape((y.shape[0],y.shape[1],1))) ) mse3.name = 'mse3' mae3.name = 'mae3' theta_mu3_in = theta_mu3_temp.reshape((x_shape[0]*x_shape[1], -1)) theta_sig3_in = theta_sig3_temp.reshape((x_shape[0]*x_shape[1], -1)) coeff3_in = coeff3_temp.reshape((x_shape[0]*x_shape[1], -1)) argsGMM = argsGMM + (theta_mu3_in, theta_sig3_in, coeff3_in) ddoutMSEA = ddoutMSEA + [mse3, mae3] ddoutYpreds = ddoutYpreds + [y_pred3_temp] #totaMSE+=mse3 indexSepDynamic +=2 if (y_dim>3): theta_mu4_temp, theta_sig4_temp, coeff4_temp, y_pred4_temp = restResults[:4] restResults = restResults[4:] theta_mu4_temp.name = 'theta_mu4' theta_sig4_temp.name = 'theta_sig4' coeff4_temp.name = 'coeff4' y_pred4_temp.name = 'disaggregation4' mse4 = T.mean((y_pred4_temp - y[:,:,3].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all mae4 = T.mean( T.abs_(y_pred4_temp - y[:,:,3].reshape((y.shape[0],y.shape[1],1))) ) mse4.name = 'mse4' mae4.name = 'mae4' theta_mu4_in = theta_mu4_temp.reshape((x_shape[0]*x_shape[1], -1)) theta_sig4_in = theta_sig4_temp.reshape((x_shape[0]*x_shape[1], -1)) coeff4_in = coeff4_temp.reshape((x_shape[0]*x_shape[1], -1)) argsGMM = argsGMM + (theta_mu4_in, theta_sig4_in, coeff4_in) ddoutMSEA = ddoutMSEA + [mse4, mae4] ddoutYpreds = ddoutYpreds + [y_pred4_temp] #totaMSE+=mse4 indexSepDynamic +=2 if (y_dim>4): theta_mu5_temp, theta_sig5_temp, coeff5_temp, y_pred5_temp = restResults[:4] restResults = restResults[4:] theta_mu5_temp.name = 'theta_mu5' theta_sig5_temp.name = 'theta_sig5' coeff5_temp.name = 'coeff5' y_pred5_temp.name = 'disaggregation5' mse5 = T.mean((y_pred5_temp - y[:,:,4].reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all mae5 = T.mean( T.abs_(y_pred5_temp - y[:,:,4].reshape((y.shape[0],y.shape[1],1))) ) mse5.name = 'mse5' mae5.name = 'mae5' theta_mu5_in = theta_mu5_temp.reshape((x_shape[0]*x_shape[1], -1)) theta_sig5_in = theta_sig5_temp.reshape((x_shape[0]*x_shape[1], -1)) coeff5_in = coeff5_temp.reshape((x_shape[0]*x_shape[1], -1)) argsGMM = argsGMM + (theta_mu5_in, theta_sig5_in, coeff5_in) ddoutMSEA = ddoutMSEA + [mse5, mae5] ddoutYpreds = ddoutYpreds + [y_pred5_temp] #totaMSE+=mse5 indexSepDynamic +=2 totaMSE = (mse1+mse2+mse3+mse4+mse5)/y_dim totaMSE.name = 'mse' kl_temp = KLGaussianGaussian(phi_mu_temp, phi_sig_temp, prior_mu_temp, prior_sig_temp) x_shape = x.shape y_shape = y.shape x_in = x.reshape((x_shape[0]*x_shape[1], -1)) y_in = y.reshape((y_shape[0]*y_shape[1], -1)) recon = GMMdisagMulti(y_dim, y_in, theta_mu1_in, theta_sig1_in, coeff1_in, *argsGMM)# BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon = recon.reshape((x_shape[0], x_shape[1])) recon.name = 'gmm_out'""" ''' recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in) recon5 = recon.reshape((x_shape[0], x_shape[1])) ''' """recon_term = recon.sum(axis=0).mean() recon_term = recon.sum(axis=0).mean() recon_term.name = 'recon_term' #kl_temp = kl_temp * mask kl_term = kl_temp.sum(axis=0).mean() kl_term.name = 'kl_term' #nll_upper_bound_0 = recon_term + kl_term #nll_upper_bound_0.name = 'nll_upper_bound_0' if (flgMSE==1): nll_upper_bound = recon_term + kl_term + totaMSE else: nll_upper_bound = recon_term + kl_term nll_upper_bound.name = 'nll_upper_bound'""" ######################## TEST (GENERATION) TIME s_temp_val, prior_mu_temp_val, prior_sig_temp_val, \ theta_mu1_temp_val, theta_sig1_temp_val, coeff1_temp_val, y_pred1_temp_val = restResults_val[:7] restResults_val = restResults_val[7:] s_temp_val = concatenate( [s_0[None, :, :], s_temp_val[:-1]], axis=0 ) # seems like this is for creating an additional dimension to s_0 theta_mu1_temp_val.name = 'theta_mu1_val' theta_sig1_temp_val.name = 'theta_sig1_val' coeff1_temp_val.name = 'coeff1_val' y_pred1_temp_val.name = 'disaggregation1_val' #[:,:,flgAgg].reshape((y.shape[0],y.shape[1],1) mse1_val = T.mean((y_pred1_temp_val - y[:, :, 0].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae1_val = T.mean( T.abs_(y_pred1_temp_val - y[:, :, 0].reshape((y.shape[0], y.shape[1], 1)))) #NEURALNILM #(sum_output - sum_target) / max(sum_output, sum_target)) totPred = T.sum(y_pred1_temp_val) totReal = T.sum(y[:, :, 0]) relErr1_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned1_val = 1 - T.sum( T.abs_(y_pred1_temp_val - y[:, :, 0].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) #y_unNormalize = (y[:,:,0] * reader.stdTraining[0]) + reader.meanTraining[0] #y_pred1_temp_val = (y_pred1_temp_val * reader.stdTraining[0]) + reader.meanTraining[0] #mse1_valUnNorm = T.mean((y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))**2) # As axis = None is calculated for all #mae1_valUnNorm = T.mean( T.abs_(y_pred1_temp_val - y_unNormalize.reshape((y.shape[0],y.shape[1],1)))) mse1_val.name = 'mse1_val' mae1_val.name = 'mae1_val' theta_mu1_in_val = theta_mu1_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig1_in_val = theta_sig1_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff1_in_val = coeff1_temp_val.reshape((x_shape[0] * x_shape[1], -1)) ddoutMSEA_val = [] ddoutYpreds_val = [y_pred1_temp_val] totaMSE_val = mse1_val totaMAE_val = mae1_val indexSepDynamic_val = 5 prediction_val = y_pred1_temp_val #Initializing values of mse and mae mse2_val = T.zeros((1, )) mae2_val = T.zeros((1, )) mse3_val = T.zeros((1, )) mae3_val = T.zeros((1, )) mse4_val = T.zeros((1, )) mae4_val = T.zeros((1, )) mse5_val = T.zeros((1, )) mae5_val = T.zeros((1, )) relErr2_val = T.zeros((1, )) relErr3_val = T.zeros((1, )) relErr4_val = T.zeros((1, )) relErr5_val = T.zeros((1, )) propAssigned2_val = T.zeros((1, )) propAssigned3_val = T.zeros((1, )) propAssigned4_val = T.zeros((1, )) propAssigned5_val = T.zeros((1, )) if (y_dim > 1): theta_mu2_temp_val, theta_sig2_temp_val, coeff2_temp_val, y_pred2_temp_val = restResults_val[: 4] restResults_val = restResults_val[4:] theta_mu2_temp_val.name = 'theta_mu2_val' theta_sig2_temp_val.name = 'theta_sig2_val' coeff2_temp_val.name = 'coeff2_val' y_pred2_temp_val.name = 'disaggregation2_val' mse2_val = T.mean((y_pred2_temp_val - y[:, :, 1].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae2_val = T.mean( T.abs_(y_pred2_temp_val - y[:, :, 1].reshape((y.shape[0], y.shape[1], 1)))) totPred = T.sum(y_pred2_temp_val) totReal = T.sum(y[:, :, 1]) relErr2_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned2_val = 1 - T.sum( T.abs_(y_pred2_temp_val - y[:, :, 1].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) mse2_val.name = 'mse2_val' mae2_val.name = 'mae2_val' theta_mu2_in_val = theta_mu2_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig2_in_val = theta_sig2_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff2_in_val = coeff2_temp_val.reshape((x_shape[0] * x_shape[1], -1)) argsGMM_val = theta_mu2_in_val, theta_sig2_in_val, coeff2_in_val ddoutMSEA_val = ddoutMSEA_val + [mse2_val, mae2_val] ddoutYpreds_val = ddoutYpreds_val + [y_pred2_temp_val] totaMSE_val += mse2_val totaMAE_val += mae2_val indexSepDynamic_val += 2 prediction_val = T.concatenate([prediction_val, y_pred2_temp_val], axis=2) if (y_dim > 2): theta_mu3_temp_val, theta_sig3_temp_val, coeff3_temp_val, y_pred3_temp_val = restResults_val[: 4] restResults_val = restResults_val[4:] theta_mu3_temp_val.name = 'theta_mu3_val' theta_sig3_temp_val.name = 'theta_sig3_val' coeff3_temp_val.name = 'coeff3_val' y_pred3_temp_val.name = 'disaggregation3_val' mse3_val = T.mean((y_pred3_temp_val - y[:, :, 2].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae3_val = T.mean( T.abs_(y_pred3_temp_val - y[:, :, 2].reshape((y.shape[0], y.shape[1], 1)))) totPred = T.sum(y_pred3_temp_val) totReal = T.sum(y[:, :, 2]) relErr3_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned3_val = 1 - T.sum( T.abs_(y_pred3_temp_val - y[:, :, 2].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) mse3_val.name = 'mse3_val' mae3_val.name = 'mae3_val' theta_mu3_in_val = theta_mu3_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig3_in_val = theta_sig3_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff3_in_val = coeff3_temp_val.reshape((x_shape[0] * x_shape[1], -1)) argsGMM_val = argsGMM_val + (theta_mu3_in_val, theta_sig3_in_val, coeff3_in_val) ddoutMSEA_val = ddoutMSEA_val + [mse3_val, mae3_val] ddoutYpreds_val = ddoutYpreds_val + [y_pred3_temp_val] totaMSE_val += mse3_val totaMAE_val += mae3_val indexSepDynamic_val += 2 prediction_val = T.concatenate([prediction_val, y_pred3_temp_val], axis=2) if (y_dim > 3): theta_mu4_temp_val, theta_sig4_temp_val, coeff4_temp_val, y_pred4_temp_val = restResults_val[: 4] restResults_val = restResults_val[4:] theta_mu4_temp_val.name = 'theta_mu4_val' theta_sig4_temp_val.name = 'theta_sig4_val' coeff4_temp_val.name = 'coeff4_val' y_pred4_temp_val.name = 'disaggregation4_val' mse4_val = T.mean((y_pred4_temp_val - y[:, :, 3].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae4_val = T.mean( T.abs_(y_pred4_temp_val - y[:, :, 3].reshape((y.shape[0], y.shape[1], 1)))) totPred = T.sum(y_pred4_temp_val) totReal = T.sum(y[:, :, 3]) relErr4_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned4_val = 1 - T.sum( T.abs_(y_pred4_temp_val - y[:, :, 3].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) mse4_val.name = 'mse4_val' mae4_val.name = 'mae4_val' theta_mu4_in_val = theta_mu4_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig4_in_val = theta_sig4_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff4_in_val = coeff4_temp_val.reshape((x_shape[0] * x_shape[1], -1)) argsGMM_val = argsGMM_val + (theta_mu4_in_val, theta_sig4_in_val, coeff4_in_val) ddoutMSEA_val = ddoutMSEA_val + [mse4_val, mae4_val] ddoutYpreds_val = ddoutYpreds_val + [y_pred4_temp_val] totaMSE_val += mse4_val totaMAE_val += mae4_val indexSepDynamic_val += 2 prediction_val = T.concatenate([prediction_val, y_pred4_temp_val], axis=2) if (y_dim > 4): theta_mu5_temp_val, theta_sig5_temp_val, coeff5_temp_val, y_pred5_temp_val = restResults_val[: 4] restResults_val = restResults_val[4:] theta_mu5_temp_val.name = 'theta_mu5_val' theta_sig5_temp_val.name = 'theta_sig5_val' coeff5_temp_val.name = 'coeff5_val' y_pred5_temp_val.name = 'disaggregation5_val' mse5_val = T.mean((y_pred5_temp_val - y[:, :, 4].reshape( (y.shape[0], y.shape[1], 1)))**2) # As axis = None is calculated for all mae5_val = T.mean( T.abs_(y_pred5_temp_val - y[:, :, 4].reshape((y.shape[0], y.shape[1], 1)))) totPred = T.sum(y_pred5_temp_val) totReal = T.sum(y[:, :, 4]) relErr5_val = (totPred - totReal) / T.maximum(totPred, totReal) propAssigned5_val = 1 - T.sum( T.abs_(y_pred5_temp_val - y[:, :, 4].reshape( (y.shape[0], y.shape[1], 1)))) / (2 * T.sum(x)) mse5_val.name = 'mse5_val' mae5_val.name = 'mae5_val' theta_mu5_in_val = theta_mu5_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) theta_sig5_in_val = theta_sig5_temp_val.reshape( (x_shape[0] * x_shape[1], -1)) coeff5_in_val = coeff5_temp_val.reshape((x_shape[0] * x_shape[1], -1)) argsGMM_val = argsGMM_val + (theta_mu5_in_val, theta_sig5_in_val, coeff5_in_val) ddoutMSEA_val = ddoutMSEA_val + [mse5_val, mae5_val] ddoutYpreds_val = ddoutYpreds_val + [y_pred5_temp_val] totaMSE_val += mse5_val totaMAE_val += mae5_val indexSepDynamic_val += 2 prediction_val = T.concatenate([prediction_val, y_pred5_temp_val], axis=2) recon_val = GMMdisagMulti( y_dim, y_in, theta_mu1_in_val, theta_sig1_in_val, coeff1_in_val, *argsGMM_val ) # BiGMM(x_in, theta_mu_in, theta_sig_in, coeff_in, corr_in, binary_in) recon_val = recon_val.reshape((x_shape[0], x_shape[1])) recon_val.name = 'gmm_out' totaMSE_val = totaMSE_val / y_dim totaMAE_val = totaMAE_val / y_dim ''' recon5 = GMM(y_in[:,4, None], theta_mu5_in, theta_sig5_in, coeff5_in) recon5 = recon.reshape((x_shape[0], x_shape[1])) ''' recon_term_val = recon_val.sum(axis=0).mean() recon_term_val = recon_val.sum(axis=0).mean() recon_term_val.name = 'recon_term' ###################### model.inputs = [x, mask, y, y_mask, scheduleSamplingMask] model.params = params model.nodes = nodes optimizer = Adam(lr=lr) header = "epoch,log,kl,nll_upper_bound,mse,mae\n" extension = [ GradientClipping(batch_size=batch_size), EpochCount(epoch, save_path, header), Monitoring( freq=monitoring_freq, #ddout=[nll_upper_bound, recon_term, totaMSE, kl_term, mse1, mae1]+ddoutMSEA+ddoutYpreds , #indexSep=indexSepDynamic, indexDDoutPlot=[13], # adding indexes of ddout for the plotting #, (6,y_pred_temp) instancesPlot=instancesPlot, #0-150 data=[Iterator(valid_data, batch_size)], savedFolder=save_path), Picklize(freq=monitoring_freq, path=save_path), EarlyStopping(freq=monitoring_freq, path=save_path, channel=channel_name), WeightNorm() ] lr_iterations = {0: lr} ''' mainloop = Training( name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], n_steps = n_steps, extension=extension, lr_iterations=lr_iterations ) ''' """mainloop.restore( name=pkl_name, data=Iterator(train_data, batch_size), model=model, optimizer=optimizer, cost=nll_upper_bound, outputs=[nll_upper_bound], n_steps = n_steps, extension=extension, lr_iterations=lr_iterations ) mainloop.run()""" data = Iterator(test_data, batch_size) test_fn = theano.function( inputs=[x, y], #[x, y], #givens={x:Xtest}, #on_unused_input='ignore', #z=( ,200,1) allow_input_downcast=True, outputs=[ prediction_val, recon_term_val, totaMSE_val, totaMAE_val, mse1_val, mse2_val, mse3_val, mse4_val, mse5_val, mae1_val, mae2_val, mae3_val, mae4_val, mae5_val, relErr1_val, relErr2_val, relErr3_val, relErr4_val, relErr5_val, propAssigned1_val, propAssigned2_val, propAssigned3_val, propAssigned4_val, propAssigned5_val ] #prediction_val, mse_val, mae_val , updates= updates_val #, allow_input_downcast=True, on_unused_input='ignore' ) testOutput = [] testMetrics2 = [] numBatchTest = 0 for batch in data: outputGeneration = test_fn(batch[0], batch[2]) testOutput.append(outputGeneration[1:14]) testMetrics2.append(outputGeneration[14:]) #{0:[4,20], 2:[5,10]} #if (numBatchTest==0): plt.figure(1) plt.plot(np.transpose(outputGeneration[0], [1, 0, 2])[4]) #ORIGINAL 1,0,2 plt.savefig(save_path + "/vrnn_dis_generated{}_Pred_0-4".format(numBatchTest)) plt.clf() plt.figure(2) plt.plot(np.transpose(batch[2], [1, 0, 2])[4]) plt.savefig(save_path + "/vrnn_dis_generated{}_RealDisag_0-4".format(numBatchTest)) plt.clf() plt.figure(3) plt.plot(np.transpose(batch[0], [1, 0, 2])[4]) #ORIGINAL 1,0,2 plt.savefig(save_path + "/vrnn_dis_generated{}_Realagg_0-4".format(numBatchTest)) plt.clf() numBatchTest += 1 testOutput = np.asarray(testOutput) testMetrics2 = np.asarray(testMetrics2) print(testOutput.shape) print(testMetrics2.shape) recon_test = testOutput[:, 0].mean() mse_test = testOutput[:, 1].mean() mae_test = testOutput[:, 2].mean() mse1_test = testOutput[:, 3].mean() mae1_test = testOutput[:, 8].mean() mse2_test = testOutput[:, 4].mean() mae2_test = testOutput[:, 9].mean() mse3_test = testOutput[:, 5].mean() mae3_test = testOutput[:, 10].mean() mse4_test = testOutput[:, 6].mean() mae4_test = testOutput[:, 11].mean() mse5_test = testOutput[:, 7].mean() mae5_test = testOutput[:, 12].mean() relErr1_test = testMetrics2[:, 0].mean() relErr2_test = testMetrics2[:, 1].mean() relErr3_test = testMetrics2[:, 2].mean() relErr4_test = testMetrics2[:, 3].mean() relErr5_test = testMetrics2[:, 4].mean() propAssigned1_test = testMetrics2[:, 5].mean() propAssigned2_test = testMetrics2[:, 6].mean() propAssigned3_test = testMetrics2[:, 7].mean() propAssigned4_test = testMetrics2[:, 8].mean() propAssigned5_test = testMetrics2[:, 9].mean() fLog = open(save_path + '/output.csv', 'w') fLog.write(str(lr_iterations) + "\n") fLog.write(str(appliances) + "\n") fLog.write(str(windows) + "\n") fLog.write( "logTest,mse1_test,mse2_test,mse3_test,mse4_test,mse5_test,mae1_test,mae2_test,mae3_test,mae4_test,mae5_test,mseTest,maeTest\n" ) fLog.write("{},{},{},{},{},{},{},{},{},{},{},{},{}\n\n".format( recon_test, mse1_test, mse2_test, mse3_test, mse4_test, mse5_test, mae1_test, mae2_test, mae3_test, mae4_test, mae5_test, mse_test, mae_test)) fLog.write( "relErr1,relErr2,relErr3,relErr4,relErr5,propAssigned1,propAssigned2,propAssigned3,propAssigned4,propAssigned5\n" ) fLog.write("{},{},{},{},{},{},{},{},{},{}\n".format( relErr1_test, relErr2_test, relErr3_test, relErr4_test, relErr5_test, propAssigned1_test, propAssigned2_test, propAssigned3_test, propAssigned4_test, propAssigned5_test)) fLog.write("q_z_dim,p_z_dim,p_x_dim,x2s_dim,y2s_dim,z2s_dim\n") fLog.write("{},{},{},{},{},{}\n".format(q_z_dim, p_z_dim, p_x_dim, x2s_dim, y2s_dim, z2s_dim)) fLog.write( "epoch,log,kl,mse1,mse2,mse3,mse4,mse5,mae1,mae2,mae3,mae4,mae5\n") for i, item in enumerate(mainloop.trainlog.monitor['nll_upper_bound']): d, e, f, g, j, k, l, m = 0, 0, 0, 0, 0, 0, 0, 0 ep = mainloop.trainlog.monitor['epoch'][i] a = mainloop.trainlog.monitor['recon_term'][i] b = mainloop.trainlog.monitor['kl_term'][i] c = mainloop.trainlog.monitor['mse1'][i] h = mainloop.trainlog.monitor['mae1'][i] if (y_dim > 1): d = mainloop.trainlog.monitor['mse2'][i] j = mainloop.trainlog.monitor['mae2'][i] if (y_dim > 2): e = mainloop.trainlog.monitor['mse3'][i] k = mainloop.trainlog.monitor['mae3'][i] if (y_dim > 3): f = mainloop.trainlog.monitor['mse4'][i] l = mainloop.trainlog.monitor['mae4'][i] if (y_dim > 4): g = mainloop.trainlog.monitor['mse5'][i] m = mainloop.trainlog.monitor['mae5'][i] fLog.write( "{:d},{:.2f},{:.2f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f},{:.3f}\n" .format(ep, a, b, c, d, e, f, g, h, j, k, l, m)) f = open(save_path + '/outputRealGeneration.pkl', 'wb') pickle.dump(outputGeneration, f, -1) f.close()