def unet_crossentropy_loss_sampled(y_true, y_pred): print 'unet_crossentropy_loss_sampled' epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1-y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) print 'average_loss:', average_loss return average_loss
def nll2(self, y): # for predicting whether a course is taken return -T.mean( T.log(self.output)[T.nonzero(y)] ) - T.mean( T.log(1 - self.output)[T.nonzero(1 - y)] )
def unet_crossentropy_loss_sampled(y_true, y_pred): print 'unet_crossentropy_loss_sampled' epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1 - y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]), dtype='int64') indPos = indPos[:n_samples] indNeg = indNeg[:n_samples] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean( T.log(1 - y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) print 'average_loss:', average_loss return average_loss
def prepare_loss(inputlayer, outlayer, pairs, types, loss_function, entropy_penalty=0, V=None, lamb=-1, train_pass=False): # reshape to 2d before sending through the network, # after which the original shape is recovered output = outlayer.output( {inputlayer: pairs.reshape((-1, pairs.shape[-1]))}, train_pass=train_pass).reshape((pairs.shape[0], 2, -1)) x1, x2 = output[:,0], output[:,1] cost = loss_function(x1, x2, types) same_loss = cost[T.nonzero(types)].mean() diff_loss = cost[T.nonzero(1 - types)].mean() if lamb >= 0: cost = 1 / (lamb + 1) * same_loss + lamb / (lamb + 1) * diff_loss else: cost = cost.mean() ent = entropy_loss(x1, x2) total_cost = cost + entropy_penalty * ent if V is not None: return total_cost, cost, same_loss, diff_loss, ent, calculate_spread(V) else: return total_cost, cost, same_loss, diff_loss, ent
def unet_crossentropy_loss_sampled(y_true, y_pred): # weighted version of pixel-wise crossrntropy loss function alpha = 0.6 epsilon = 1.0e-5 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1 - y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[srng.permutation(n=n)] # take equal number of samples depending on which class has less n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]), dtype='int64') # indPos = indPos[:n_samples] # indNeg = indNeg[:n_samples] total = np.float64(patchSize_out * patchSize_out * patchZ_out) loss_vector = ifelse( T.gt(n_samples, 0), # if this patch has positive samples, then calulate the first formula (-alpha * T.sum(T.log(y_pred_clipped[indPos])) - (1 - alpha) * T.sum(T.log(1 - y_pred_clipped[indNeg]))) / total, -(1 - alpha) * T.sum(T.log(1 - y_pred_clipped[indNeg])) / total) average_loss = T.mean(loss_vector) / (1 - alpha) return average_loss
def categorical_crossentropy_segm(prediction_proba, targets): ''' MODIFICATIONS: - reshape from image-size to array and back ''' shape = T.shape(prediction_proba) pred_mod1 = T.transpose(prediction_proba, (0,2,3,1)) pred_mod = T.reshape(pred_mod1, (-1,shape[1])) if prediction_proba.ndim == targets.ndim: targ_mod1 = T.transpose(targets,(0,2,3,1)) targ_mod = T.reshape(targ_mod1,(-1,shape[1])) else: targ_mod = T.reshape(targets, (-1,)) results = categorical_crossentropy(pred_mod, targ_mod) results = T.reshape(results, (shape[0],shape[2],shape[3])) # QUICK IMPLEMENTATION FOR TWO SPECIFIC CLASSES. NEEDS GENERALIZATION # Weights depending on class occurency: weights = (1.02275, 44.9647) cars_indx, not_cars_indx = T.nonzero(targets), T.nonzero(T.eq(targets,0)) T.set_subtensor(results[cars_indx], results[cars_indx]*float32(weights[1]) ) T.set_subtensor(results[not_cars_indx], results[not_cars_indx]*float32(weights[0]) ) return T.sum(results, axis=(1,2))
def __init__(self, rng, batchsize, epochs=100, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08, l1_weight=0.0, l2_weight=0.1, cost='mse'): self.alpha = alpha self.beta1 = beta1 self.beta2 = beta2 self.eps = eps self.l1_weight = l1_weight self.l2_weight = l2_weight self.rng = rng self.theano_rng = RandomStreams(rng.randint(2 ** 30)) self.epochs = epochs self.batchsize = batchsize # Where cost is always the cost which is minimised in supervised training # the T.nonzero term ensures that the cost is only calculated for examples with a label # # Convetion: We mark unlabelled examples with a vector of zeros in lieu of a one-hot vector if cost == 'mse': self.y_pred = lambda network, x: network(x) self.error = lambda network, y_pred, y: T.zeros((1,)) self.cost = lambda network, x, y: T.mean((network(x)[T.nonzero(y)] - y[T.nonzero(y)]**2)) elif cost == 'binary_cross_entropy': self.y_pred = lambda network, x: network(x) self.cost = lambda network, y_pred, y: T.nnet.binary_crossentropy(y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean() # classification error self.error = lambda network, y_pred, y: T.mean(T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1))) elif cost == 'cross_entropy': self.y_pred = lambda network, x: network(x) self.cost = lambda network, y_pred, y: T.nnet.categorical_crossentropy(y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean() # classification error self.error = lambda network, y_pred, y: T.mean(T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1))) else: self.y_pred = lambda network, x: network(x) self.error = lambda network, y_pred, y: T.zeros((1,)) self.cost = cost
def partial_errors(self, y): b = T.argmax(y, axis=1) zero_ind = T.nonzero(T.eq(b, 0))[0] one_ind = T.nonzero(T.eq(b, 1))[0] neg_error = T.mean(T.neq(self.y_pred[zero_ind], b[zero_ind])) pos_error = T.mean(T.neq(self.y_pred[one_ind], b[one_ind])) # print (b.eval()) return (neg_error + pos_error) * 0.5
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None): """ Do an efficient update of the weights given the two spike-update. (This still runs FING SLOWLY!) :param xs: An (n_in) vector :param es: An (n_out) vector :param kp_x: :param kd_x: :param kp_e: :param kd_e: :param shapes: (n_in, n_out) :return: """ kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)] n_in, n_out = shape rx = kd_x/(kp_x+kd_x) re = kd_e/(kp_e+kd_e) tx_last = create_shared_variable(np.zeros(n_in)+1) te_last = create_shared_variable(np.zeros(n_out)+1) x_last = create_shared_variable(np.zeros(n_in)) e_last = create_shared_variable(np.zeros(n_out)) x_spikes = tt.neq(xs, 0) e_spikes = tt.neq(es, 0) x_spike_ixs, = tt.nonzero(x_spikes) e_spike_ixs, = tt.nonzero(e_spikes) if dws is None: dws = tt.zeros(shape) t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last) # (n_x_spikes, n_out) dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last * rx**(tx_last[x_spike_ixs, None]-t_last) * re**(te_last[None, :]-t_last) * geoseries_sum(re*rx, t_end=t_last, t_start=1) ) new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x)) new_tx_last = tt.switch(x_spikes, 0, tx_last) t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs]) # (n_in, n_e_spikes) dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs] * rx**(new_tx_last[:, None]-t_last) * re**(te_last[None, e_spike_ixs]-t_last) * geoseries_sum(re*rx, t_end=t_last, t_start=1) ) add_update(x_last, new_x_last) add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e))) add_update(tx_last, new_tx_last+1) add_update(te_last, tt.switch(e_spikes, 1, te_last+1)) return dws
def __init__(self, data, labels, n_features, l1, learning_rate=0.01, seed=0): self.data = data self.labels = labels self.l1 = l1 self.seed = seed self.learning_rate = learning_rate self.rng = np.random.mtrand.RandomState(self.seed) self.w = [] # TODO: generalise to multiple classes for i in range(2): self.init_weights(n_features, i) self.n_class = [] # TODO: generalise to multiple classes for i in range(2): # self.n_class.append(np.shape(self.labels[0])) self.n_class.append(T.shape(T.nonzero(T.eq(self.labels, i))[0]))
def train_batch(self, batch_size): T = self.AE.T T = T.tocsr() nonzero_indices = T.nonzero() #pdb.set_trace() n_users = len(np.unique(nonzero_indices[0])) indices = np.unique(nonzero_indices[0]) for epoch in xrange(self.epochs): l = [] for ind, i in enumerate(xrange(0, n_users, batch_size)): # CHECK : SEEMS BUGGY. #------------------------ #ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32) ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32) #------------------------ #print ratings #pdb.set_trace() loss = self.AE.ae_batch(ratings) #loss = self.AE.debug(ratings) #print loss #pdb.set_trace() l.append(loss) m = np.mean(np.array(l)) print("mean Loss for epoch %d batch %d is %f"%(epoch, ind, m)) rmse = self.RMSE_sparse() print("RMSE after one epoch is %f"%(rmse)) f.write(str(rmse) + '\n')
def train_batch(self, batch_size): T = self.AE.T T = T.tocsr() nonzero_indices = T.nonzero() #pdb.set_trace() n_users = len(np.unique(nonzero_indices[0])) indices = np.unique(nonzero_indices[0]) for epoch in xrange(self.epochs): l = [] for ind, i in enumerate(xrange(0, n_users, batch_size)): # CHECK : SEEMS BUGGY. #------------------------ #ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32) ratings = T[indices[i:(i + batch_size)], :].toarray().astype( np.float32) #------------------------ #print ratings #pdb.set_trace() loss = self.AE.ae_batch(ratings) #loss = self.AE.debug(ratings) #print loss #pdb.set_trace() l.append(loss) m = np.mean(np.array(l)) print("mean Loss for epoch %d batch %d is %f" % (epoch, ind, m)) #rmse = self.RMSE_sparse() rmse = self.RMSE() print("RMSE after one epoch is %f" % (rmse)) f.write(str(rmse) + '\n')
def multi_sparse_graph_loss(y_true, y_pred): ids = tensor.nonzero(y_true[:, 0] + 1)[0] y_true = y_true[ids] y_pred = y_pred[ids] return tensor.mean( objectives.sparse_categorical_crossentropy(y_true, y_pred))
def linear_activation(self, time, updates): """Linear activation.""" # Destroy impulse if in refractory period masked_imp = t.set_subtensor( self.impulse[t.nonzero(self.refrac_until > time)], 0.) # Add impulse new_mem = self.mem + masked_imp # Store spiking output_spikes = t.ge(new_mem, self.v_thresh) spike_idxs = output_spikes.nonzero() if settings['reset'] == 'Reset by subtraction': if settings['payloads'] and False: # Experimental, turn off by default new_and_reset_mem = t.set_subtensor(new_mem[spike_idxs], 0.) else: new_and_reset_mem = t.inc_subtensor(new_mem[spike_idxs], -self.v_thresh) else: # settings['reset'] == 'Reset to zero': new_and_reset_mem = t.set_subtensor(new_mem[spike_idxs], 0.) updates.append((self.mem, new_and_reset_mem)) if settings['payloads']: residuals = t.inc_subtensor(new_mem[spike_idxs], -self.v_thresh) payloads, payloads_sum = update_payload(self, residuals, spike_idxs) updates.append((self.payloads, payloads)) updates.append((self.payloads_sum, payloads_sum)) return output_spikes
def loop (row): one_indices = T.nonzero(row)[0] zero_indices = T.eq(row, 0).nonzero()[0] random = shared_randomstreams.RandomStreams(5) ind1=random.random_integers(size=(1,), low=0, high=one_indices.shape[0]-1, ndim=None) ind2=random.random_integers(size=(50,), low=0, high=zero_indices.shape[0]-1, ndim=None) return one_indices[ind1], zero_indices[ind2]
def predict(prob, mask): valid_index = T.nonzero(mask > 0)[0] prob = prob[valid_index] word_index = T.zeros((batch_size, ), dtype='int32') word_index = T.set_subtensor(word_index[valid_index], T.argmax(prob, axis=1)) # +1? return word_index
def get_latent(self, score): padded = T.concatenate([ score, T.alloc(-np.inf, self.args['batch'] * self.args['max_inst_in_doc'], 1) ], axis=1) # padded = T.concatenate([score, T.alloc(0., self.args['batch'] * self.args['max_inst_in_doc'], 1)], axis=1) row_indices = np.array( [[i] * self.args['max_inst_in_doc'] for i in np.arange(self.args['batch'] * self.args['max_inst_in_doc'])], dtype='int32') ante_score = padded[row_indices, self.container['prev_inst_cluster_gold']] latent_score = T.max(ante_score, axis=1) latent_score = T.set_subtensor( latent_score[T.nonzero(T.eq(latent_score, -np.inf))], 0.) latent_inst = T.argmax(ante_score, axis=1) row_indices = T.arange(self.args['batch'] * self.args['max_inst_in_doc'], dtype='int32') alpha = T.set_subtensor( self.container['alpha'][row_indices, latent_inst], 0.) return T.reshape( latent_score, [self.args['batch'] * self.args['max_inst_in_doc'], 1 ]), alpha, latent_inst
def logp_theano_comorbidities(logLike,nObs,B0,B,X,S,T): logLike = 0.0 #Unwrap t=0 points for B0 zeroIndices = np.roll(T.cumsum(),1) #zeroIndices = np.roll(T.cumsum(),1) zeroIndices[0] = 0; zeroIndices = zeroIndices.astype('int32') #import pdb; pdb.set_trace() #Likelihood from B0 for X=1 and X=0 cases logLike += (X[zeroIndices]*TT.log(B0[:,S[zeroIndices]]).T).sum() logLike += ((1-X[zeroIndices])*TT.log(1.-B0[:,S[zeroIndices]]).T).sum() stateChange = S[1:]-S[:-1] # Don't consider t=0 points #import pdb; pdb.set_trace() #setZero = TT.as_tensor_variable(zeroIndices[1:]-1) #TT.set_subtensor(stateChange[setZero],0) stateChange = TT.set_subtensor(stateChange[zeroIndices[1:]-1],0) #stateChange[setZero] = 0 #stateChange[zeroIndices[1:]-1] = 0 changed = TT.nonzero(stateChange)[0]+1 #import pdb; pdb.set_trace() # A change can only happen from 0 to 1 given our assumptions logLike += ((X[changed]-X[changed-1])*TT.log(B[:,S[changed]]).T).sum() logLike += (((1-X[changed])*(1-X[changed-1]))*TT.log(1.-B[:,S[changed]]).T).sum() #logLike += (X[changed]*np.log(B[:,S[changed]]).T).sum() return logLike
def call(self, x): real = self.get_realpart(x) imag = self.get_imagpart(x) #mag = self.get_abs(x) ang = self.get_angle(x) + 0.0001 indices1 = T.nonzero(T.ge(ang, pi / 2)) indices2 = T.nonzero(T.le(ang, 0)) real = T.set_subtensor(real[indices1], 0) imag = T.set_subtensor(imag[indices1], 0) real = T.set_subtensor(real[indices2], 0) imag = T.set_subtensor(imag[indices2], 0) act = K.concatenate([real, imag], axis=1) return act
def add_payloads(prev_layer, input_spikes): """Get payloads from previous layer.""" # Get only payloads of those pre-synaptic neurons that spiked payloads = t.set_subtensor( prev_layer.payloads[t.nonzero(t.eq(input_spikes, 0.))], 0.) print("Using spikes with payloads from layer {}".format(prev_layer.name)) return t.add(input_spikes, payloads)
def exp(self, X, U): norm_U = tensor.sqrt(tensor.sum((U ** 2), axis=0)).reshape((1, self._n)) Y = X * tensor.cos(norm_U) + U * (tensor.sin(norm_U) / norm_U) # For those columns where the step is too small, use a retraction. exclude = tensor.nonzero(norm_U <= 4.5e-8)[-1] Y[:, exclude] = self._normalize_columns(X[:, exclude] + U[:, exclude]) return Y
def MASK_blanking(x_i): # Find indicies of first and last non-zero value in x_i idxs = T.nonzero(x_i)[0][[1, -1]] # Diff = no of non zero values no_values = idxs[1] - idxs[0] # Move index inside by proportion of no of values idxs0 = T.cast(T.floor(idxs[0] + no_values * blank_proportion), 'int32') idxs1 = T.cast(T.floor(idxs[1] - no_values * blank_proportion), 'int32') # Return a vector that has a tighter mask than x_i return T.set_subtensor(T.zeros_like(x_i)[idxs0:idxs1], T.alloc(1., idxs1-idxs0))
def add_synap_post_inp(i, po, p, s, q): # i:: sequence # po:: post # p:: pre # s:: dA # q:: W index = T.nonzero(q[:self.Ne, i]) npo = T.inc_subtensor(po[index, i], s) nw = T.inc_subtensor(q[:, i], p[:, i]) nw = T.clip(nw, 0, self.wmax) return {po: npo, q: nw}
def conditional_feature_matching(f_sample, f_data, label, num_classes, norm='l2'): cfm_loss = 0. for c in xrange(num_classes): index = T.nonzero( T.switch(T.eq(label, c), T.ones_like(label), T.zeros_like(label))) cfm_loss += feature_matching(f_sample[index], f_data[index], norm) return cfm_loss
def add_synap_post_inp(i,po,p,s,q): # i:: sequence # po:: post # p:: pre # s:: dA # q:: W index = T.nonzero(q[:self.Ne,i]) npo = T.inc_subtensor(po[index,i],s) nw = T.inc_subtensor(q[:,i],p[:,i]) nw = T.clip(nw,0,self.wmax) return {po:npo,q:nw}
def conditional_maximum_mean_discripancy(sample, data, label, num_classes, sigma=[2, 5, 10, 20, 40, 80]): cmmd_loss = 0. for c in xrange(num_classes): index = T.nonzero( T.switch(T.eq(label, c), T.ones_like(label), T.zeros_like(label))) cmmd_loss += maximum_mean_discripancy(sample[index], data[index], sigma) return cmmd_loss
def recurrence(curr_inst, current_hv, prev_inst_cluster, current_cluster): curr_indices = offsets + curr_inst prev_inst_cluster = T.set_subtensor( prev_inst_cluster[:, curr_inst], 0) # prev_inst_cluster[:, curr_inst] = 0 curr_rep_cnn = rep_cnn[curr_indices] rep_inter1 = T.concatenate( [T.sum(current_hv, axis=1, keepdims=True), current_hv], axis=1) rep_inter2 = T.stack([curr_rep_cnn] * (self.args['max_inst_in_doc'] + 1), axis=1) rep_inter = T.concatenate([rep_inter1, rep_inter2], axis=2) score_cluster = T.nnet.sigmoid( T.dot(rep_inter, W_global) + b_global) score_cluster = T.reshape( score_cluster, newshape=(self.args['batch'], self.args['max_inst_in_doc'] + 1)) score_cluster = T.concatenate( [score_cluster, T.alloc(-np.inf, self.args['batch'], 1)], axis=1) row_indices = np.array([[i] * self.args['max_inst_in_doc'] for i in np.arange(self.args['batch'])], dtype='int32') global_score = score_cluster[row_indices, prev_inst_cluster] score = local_score[curr_indices] + global_score indices_single = T.arange(self.args['batch'], dtype='int32') ante_cluster_raw = prev_inst_cluster[indices_single, T.argmax(score, axis=1)] indices_new_cluster = T.nonzero(T.eq(ante_cluster_raw, 0)) ante_cluster = T.set_subtensor( ante_cluster_raw[indices_new_cluster], current_cluster[indices_new_cluster]) current_cluster = T.set_subtensor( current_cluster[indices_new_cluster], current_cluster[indices_new_cluster] + 1) prev_inst_cluster = T.set_subtensor( prev_inst_cluster[:, curr_inst], ante_cluster) ante_hv = current_hv[indices_single, ante_cluster - 1] current_hv = T.set_subtensor( current_hv[indices_single, ante_cluster - 1], gru_step(curr_rep_cnn, ante_hv)) return current_hv, prev_inst_cluster, current_cluster
def unet_crossentropy_loss_sampled(y_true, y_pred): epsilon = 1.0e-4 y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon)) y_true = T.flatten(y_true) # this seems to work # it is super ugly though and I am sure there is a better way to do it # but I am struggling with theano to cooperate # filter the right indices indPos = T.nonzero(y_true)[0] # no idea why this is a tuple indNeg = T.nonzero(1-y_true)[0] # shuffle n = indPos.shape[0] indPos = indPos[srng.permutation(n=n)] n = indNeg.shape[0] indNeg = indNeg[srng.permutation(n=n)] # subset assuming each class has at least 100 samples present indPos = indPos[:200] indNeg = indNeg[:200] loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg])) average_loss = T.mean(loss_vector) return average_loss
def fprop_step(state_below, index, state_before, W, U, b): state_now = state_before.copy() index = self.num_modules -\ tensor.nonzero(tensor.mod(index+1, self.M))[0].shape[0] this_range = index * self.module_dim z = tensor.dot(state_below, W[:, :this_range]) +\ tensor.dot(state_before, U[:, :this_range]) +\ b[:this_range] z = tensor.tanh(z) state_now = tensor.set_subtensor(state_now[:, :this_range], z) return state_now
def gru_step(xt, *dropout_maskst): xt_oh = T.zeros((xt.shape[0], V), dtype=floatX) xt_oh = T.set_subtensor(xt_oh[T.arange(xt.shape[0]), T.cast(xt, "int32")], 1) sts = [] layer_in = xt_oh for i, num_h in enumerate(args.num_hs): stm1 = ss[i][:xt.shape[0], :] xs = T.concatenate([layer_in, stm1], axis=1) z = T.nnet.hard_sigmoid(T.dot(xs, Wzs[i]) + bzs[i]) r = T.nnet.hard_sigmoid(T.dot(xs, Wrs[i]) + brs[i]) h = T.tanh( T.dot(T.concatenate([layer_in, stm1 * r], axis=1), Whs[i]) + bhs[i]) st = (1 - z) * h + z * stm1 st = T.set_subtensor(st[T.nonzero(T.eq(xt, -1)), :], stm1[T.nonzero(T.eq(xt, -1)), :]) sts.append(st) if dropout_maskst and args.dropouts[i] != 0: st = dropout_layer(st, args.dropouts[i], srng, train_premasks, train_postmasks, dropout_maskst[i]) if i == len(args.num_hs) - 1: ot = T.nnet.softmax(T.dot(st, Wo) + bo) else: layer_in = st xtp1 = T.cast(T.argmax(srng.multinomial(n=1, pvals=ot), axis=1), floatX) s_updates = OrderedDict() for s, st, num_h in zip(ss, sts, args.num_hs): if T.lt(xt.shape[0], s.shape[0]): pad = T.zeros((s.shape[0] - xt.shape[0], num_h), dtype=floatX) st = T.concatenate([st, pad], axis=0) s_updates[s] = st return [ot, xtp1], s_updates
def add_synap_pre_inp(i,p,po,s,q): # i :: sequence # p :: pre | post # s :: dApre | dApost # q :: W index = T.nonzero(q[i,:self.Ne]) np = T.inc_subtensor(p[i,index],s) ## tmp = p[i,:] ## tmp=T.inc_subtensor(tmp[index],s) ## np=T.set_subtensor(p[i,:],tmp) #np = T.inc_subtensor(p[i,:],s) nw = T.inc_subtensor(q[i,:],po[i,:]) nw=T.clip(nw,0,self.wmax) return {p:np,q:nw}
def add_synap_pre_inp(i, p, po, s, q): # i :: sequence # p :: pre | post # s :: dApre | dApost # q :: W index = T.nonzero(q[i, :self.Ne]) np = T.inc_subtensor(p[i, index], s) ## tmp = p[i,:] ## tmp=T.inc_subtensor(tmp[index],s) ## np=T.set_subtensor(p[i,:],tmp) #np = T.inc_subtensor(p[i,:],s) nw = T.inc_subtensor(q[i, :], po[i, :]) nw = T.clip(nw, 0, self.wmax) return {p: np, q: nw}
def binary_tanh_activation(self, time, updates): """Binary tanh activation.""" # Destroy impulse if in refractory period masked_imp = t.set_subtensor( self.impulse[t.nonzero(self.refrac_until > time)], 0.) # Add impulse new_mem = self.mem + masked_imp # Store spiking signed_spikes = t.set_subtensor(new_mem[t.nonzero(t.gt(new_mem, 0))], self.v_thresh) signed_spikes = t.set_subtensor( signed_spikes[t.nonzero(t.lt(signed_spikes, 0))], -self.v_thresh) output_spikes = t.set_subtensor(new_mem[t.nonzero(new_mem)], self.v_thresh) # Reset neurons new_and_reset_mem = t.set_subtensor(new_mem[output_spikes.nonzero()], 0.) updates.append((self.mem, new_and_reset_mem)) return signed_spikes
def split_unk_outputs(self, outputs, outputs_mask): # Compute separate mask for character level (UNK) words # (with symbol < 0). charlevel_mask = outputs_mask * T.lt(outputs, 0) # ensure that char-level is never empty dummy = 1 - T.sum(charlevel_mask).clip(0, 1) dummy_mask = T.inc_subtensor(charlevel_mask[0,0], dummy) charlevel_indices = T.nonzero(dummy_mask.T) # shortlisted words directly in word level decoder, # but char level replaced with unk unked_outputs = (1 - charlevel_mask) * outputs unked_outputs += charlevel_mask * T.as_tensor( self.index['<UNK>']) return unked_outputs, charlevel_indices
def sample_loop(s, y_true, y_pred): true, pred = y_true[s], y_pred[s] pos = pred[true.nonzero()] neg = pred[(1 - true).nonzero()] pos_tile = T.extra_ops.repeat(pos, neg.shape[0]).reshape((pos.shape[0], neg.shape[0])) neg_tile = T.extra_ops.repeat(neg, pos.shape[0]).reshape((neg.shape[0], pos.shape[0])).transpose() p_loss = 1 + neg_tile - pos_tile loss = p_loss[T.nonzero(p_loss > 0)] return T.sum(loss)
def train_batch(self, batch_size): T = self.AE.T T = T.tocsr() nonzero_indices = T.nonzero() #pdb.set_trace() n_users = len(np.unique(nonzero_indices[0])) indices = np.unique(nonzero_indices[0]) for epoch in xrange(self.epochs): for ind, i in enumerate(xrange(0, n_users, batch_size)): ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32) #print ratings #pdb.set_trace() loss = self.AE.ae_batch(ratings) #loss = self.AE.debug(ratings) print loss
def step(input_step, previous_activation, time_step, W_in, W_self, biases): new_activation = previous_activation.copy() modzero = T.nonzero(T.eq(T.mod(time_step, self.group_labels), 0))[0] W_in_now = T.flatten(W_in[:, modzero, :], outdim=2) W_self_now = T.flatten(W_self[:, modzero, :], outdim=2) biases_now = T.flatten(biases[modzero, :]) activation = T.dot(input_step, W_in_now) activation += T.dot(previous_activation, W_self_now) activation += biases_now activation = self.activation_function(activation) modzero_activation_changes = (modzero * self.group_size) + ( T.ones((modzero.shape[0], self.group_size), dtype='int32') * T.arange(self.group_size, dtype='int32')).T modzero_flatten = T.flatten(modzero_activation_changes).astype('int32') new_activation = T.set_subtensor(new_activation[:, modzero_flatten], activation) time_step += 1 return new_activation, time_step
def trial_resp_pdf(t, ind, A, b, v, s): """Probability density function for response i at time t.""" p_neg, updates = theano.reduce( fn=lambda v_i, tot, s: normcdf(-v_i / s) * tot, sequences=v, outputs_info=tt.ones(1, dtype='float64'), non_sequences=s) # PDF for i and no finish yet for others v_ind = tt.arange(v.shape[0]) i = tt.cast(ind, 'int64') pdf = (tpdf(t, A, b, v[i], s) * ncdf(t, A, b, v[tt.nonzero(tt.neq(v_ind, i))], s)) / (1 - p_neg) # define probability of negative times to zero pdf_cond = tt.switch(tt.gt(t, 0), pdf, 0) return pdf_cond
def _step2(diag_, state_, hs_, Cs_): hs, Cs = [], [] token_idxs = tensor.cast(state_.argmax(axis=-1), "int32") msk_ = tensor.fill( (tensor.zeros_like(token_idxs, dtype="float32")), 1) msk_ = msk_.dimshuffle('x', 0) state_below0 = self.de_lookuptable[token_idxs].reshape( (1, encoderInputs.shape[1], self.de_hidden_size)) for i, lstm in enumerate(self.decoder_lstm_layers): h, C = lstm.forward(state_below0, msk_, hs_[i], Cs_[i]) #mind msk hs += h[-1], Cs += C[-1], state_below0 = h hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable( Cs) state_below0 = state_below0.reshape( (encoderInputs.shape[1], self.de_hidden_size)) attn_index = tensor.nonzero(diag_, True) attn_value = tensor.nonzero_values(diag_) en_context = Encoder_shuffle[:, attn_index[0], :] attn_context = Encoder_shuffle_re[:, attn_index[0], :] attn_weight = tensor.batched_dot(attn_context, state_below0) attn_weight = tensor.nnet.softmax(attn_weight) #attn_weight *= (encoderMask.dimshuffle(1,0)) attn_weight *= (attn_value.dimshuffle('x', 0)) ##attn_weight = attn_weight/(tensor.sum(attn_weight, axis=1).dimshuffle(0,'x')) ####### ctx_ : (b, h) ctx_ = tensor.sum(en_context * attn_weight[:, :, None], axis=1) state_below0 = tensor.concatenate([ctx_, state_below0], axis=1) newpred = tensor.dot(state_below0, self.linear) + self.linear_bias[None, :] state_below = tensor.nnet.softmax(newpred) ##### the beging symbole probablity is 0 extra_p = tensor.zeros_like(hs[:, :, 0]) state_below = tensor.concatenate([state_below, extra_p.T], axis=1) return state_below, hs, Cs
def train(self, batch_size): T = self.AE.T T = T.tocsr() nonzero_indices = T.nonzero() #pdb.set_trace() n_users = len(np.unique(nonzero_indices[0])) indices = np.unique(nonzero_indices[0]) for epoch in xrange(self.epochs): l = [] for ind, i in enumerate(xrange(0, n_users, batch_size)): ratings = T[indices[i:(i + batch_size)], :].toarray().astype( np.float32) loss = self.AE.ae_batch(ratings) l.append(loss) m = np.mean(np.array(l)) print("mean Loss for epoch %d batch %d is %f" % (epoch, ind, m)) rmse = self.RMSE() print("RMSE after one epoch is %f" % (rmse))
def split_unk_outputs(self, outputs, outputs_mask): # Compute separate mask for character level (UNK) words # (with symbol < 0 or > self.low_thresh). charlevel_mask = outputs_mask * T.lt(outputs, 0) # lower threshold used for indexing tensor for charlevel lthr = T.as_tensor(self.low_thresh) low_charlevel_mask = charlevel_mask + (outputs_mask * T.gt(outputs, lthr)) # ensure that char-level is never empty dummy = 1 - T.sum(low_charlevel_mask).clip(0, 1) low_charlevel_mask = T.inc_subtensor(low_charlevel_mask[0,0], dummy) low_charlevel_indices = T.nonzero(low_charlevel_mask.T) # higher threshold used for # shortlisted words directly in word level decoder, # but char level replaced with unk unked_outputs = (1 - charlevel_mask) * outputs unked_outputs += charlevel_mask * T.as_tensor( self.index['<UNK>']) return unked_outputs, low_charlevel_indices
def recurrence(curr_inst, current_hv, prev_inst_cluster, current_cluster): curr_indices = offsets + curr_inst prev_inst_cluster = T.set_subtensor( prev_inst_cluster[:, curr_inst], 0) curr_rep_cnn = rep_cnn[curr_indices] score_by_cluster = T.batched_dot(curr_rep_cnn, current_hv.transpose((0, 2, 1))) score_nonana = T.batched_dot(curr_rep_cnn, T.sum(current_hv, axis=1)) score_by_cluster = T.concatenate([ T.reshape(score_nonana, [self.args['batch'], 1]), score_by_cluster, T.alloc(-np.inf, self.args['batch'], 1) ], axis=1) row_indices = np.array([[i] * self.args['max_inst_in_doc'] for i in np.arange(self.args['batch'])], dtype='int32') global_score = score_by_cluster[row_indices, prev_inst_cluster] score = local_score[curr_indices] + global_score indices_single = T.arange(self.args['batch'], dtype='int32') ante_cluster_raw = prev_inst_cluster[indices_single, T.argmax(score, axis=1)] indices_new_cluster = T.nonzero(T.eq(ante_cluster_raw, 0)) ante_cluster = T.set_subtensor( ante_cluster_raw[indices_new_cluster], current_cluster[indices_new_cluster]) current_cluster = T.set_subtensor( current_cluster[indices_new_cluster], current_cluster[indices_new_cluster] + 1) prev_inst_cluster = T.set_subtensor( prev_inst_cluster[:, curr_inst], ante_cluster) ante_hv = current_hv[indices_single, ante_cluster - 1] current_hv = T.set_subtensor( current_hv[indices_single, ante_cluster - 1], gru_step(curr_rep_cnn, ante_hv)) return current_hv, prev_inst_cluster, current_cluster
def __init__(self, rng, batchsize, epochs=100, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08, l1_weight=0.0, l2_weight=0.1, cost='mse'): self.alpha = alpha self.beta1 = beta1 self.beta2 = beta2 self.eps = eps self.l1_weight = l1_weight self.l2_weight = l2_weight self.rng = rng self.theano_rng = RandomStreams(rng.randint(2**30)) self.epochs = epochs self.batchsize = batchsize # Where cost is always the cost which is minimised in supervised training # the T.nonzero term ensures that the cost is only calculated for examples with a label # # Convetion: We mark unlabelled examples with a vector of zeros in lieu of a one-hot vector if cost == 'mse': self.y_pred = lambda network, x: network(x) self.error = lambda network, y_pred, y: T.zeros((1, )) self.cost = lambda network, x, y: T.mean( (network(x)[T.nonzero(y)] - y[T.nonzero(y)]**2)) elif cost == 'binary_cross_entropy': self.y_pred = lambda network, x: network(x) self.cost = lambda network, y_pred, y: T.nnet.binary_crossentropy( y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean() # classification error self.error = lambda network, y_pred, y: T.mean( T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1))) elif cost == 'cross_entropy': self.y_pred = lambda network, x: network(x) self.cost = lambda network, y_pred, y: T.nnet.categorical_crossentropy( y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean() # classification error self.error = lambda network, y_pred, y: T.mean( T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1))) else: self.y_pred = lambda network, x: network(x) self.error = lambda network, y_pred, y: T.zeros((1, )) self.cost = cost
def step(i, in_mask, ACT, ACT_, in_se, WT): sub_tree_idx_ = T.nonzero(WT[:, i, :] > -1) a_ = T.dot(in_se[:, i], self.WSM) # + self.b if self.b is not None: a_ += self.b.dimshuffle('x', 0) a_ = a_ + T.sum(ACT_[:, i], axis=1) a_ = T.tanh(a_) # if self.dropout: # a_ = a_ / self.retain_prob * self._srng.binomial(a_.shape, p=self.retain_prob, # dtype=theano.config.floatX) a_ = T.switch(in_mask, a_, ACT[:, i-1]) a__ = T.batched_tensordot(a_[sub_tree_idx_[0], :], self.WC[WT[sub_tree_idx_[0], i, sub_tree_idx_[1]]], axes=1) # if self.dropout: # a__ = a__ / self.retain_prob * self._srng.binomial(a__.shape, p=self.retain_prob, # dtype=theano.config.floatX) newACT_ = T.set_subtensor(ACT_[sub_tree_idx_[0], sub_tree_idx_[1], i], a__) newACT = T.set_subtensor(ACT[:, i], a_) return newACT, newACT_
def binary_sigmoid_activation(self, time, updates): """Binary sigmoid activation.""" # Destroy impulse if in refractory period masked_imp = t.set_subtensor( self.impulse[t.nonzero(self.refrac_until > time)], 0.) # Add impulse new_mem = self.mem + masked_imp # Store spiking output_spikes = t.gt(new_mem, 0) spike_idxs = output_spikes.nonzero() # Reset neurons new_and_reset_mem = t.set_subtensor(new_mem[spike_idxs], 0.) updates.append((self.mem, new_and_reset_mem)) return output_spikes
def train(self): T = self.AE.T # COnverting to csr format for indexing T = T.tocsr() #pdb.set_trace() nonzero_indices = T.nonzero() for epoch in xrange(self.epochs): print("Running epoch %d"%(epoch)) for i in np.unique(nonzero_indices[0]): # get indices of observed values from the user 'i' 's vector indices = T[i, :].nonzero()[1] #print indices #indices = indices.reshape(indices.shape[0],) # Get correspoding ratings ratings = T[i, indices].toarray() #print ratings ratings = ratings.reshape(ratings.shape[1],) # Convert inputs to theano datatype indices = indices.astype(np.int32) ratings = ratings.astype(np.int32) #pdb.set_trace() loss = self.AE.ae(indices, ratings) print("Loss at epoch %d is %f"%(epoch, loss))
def train_batch(self, batch_size): T = self.AE.T T = T.tocsr() nonzero_indices = T.nonzero() #pdb.set_trace() n_users = len(np.unique(nonzero_indices[0])) indices = np.unique(nonzero_indices[0]) for epoch in xrange(self.epochs): for ind, i in enumerate(xrange(0, n_users, batch_size)): # CHECK : SEEMS BUGGY. #------------------------ #ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32) ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32) #------------------------ #print ratings #pdb.set_trace() loss = self.AE.ae_batch(ratings) #loss = self.AE.debug(ratings) #print loss #pdb.set_trace() print("Loss for epoch %d batch %d is %f"%(epoch, ind, loss)) print("RMSE after one epoch is %f"%(self.RMSE()))
activation = T.tanh) layer1 = ll.LSTMLayer( input = layer_feat.output, n_in = input_dim, n_out = 75) layer2 = ll.LSTMLayer( input = layer_feat.output, n_in = input_dim, n_out = 75, backwards = True) h = T.concatenate((layer1.output,layer2.output[::-1]),axis=1) word_indices = T.nonzero(T.eq(T.argmax(input,axis=1),input_dim-1)) word_h = h[word_indices] layer3 = nl.NNLayer( input = word_h, n_in = layer1.n_out * 2, n_out = 100, activation = T.tanh) layer4 = nl.NNLayer( input = layer3.output, n_in = layer3.n_out, n_out = len(ix_to_tag), activation = T.nnet.softmax) layers = [layer_feat, layer1, layer2, layer3]
layer1 = ll.LSTMLayer( input = layer_embed.output, n_in = layer_embed.n_out, n_out = 150 ) layer2 = ll.LSTMLayer( input = layer_embed.output, n_in = layer_embed.n_out, n_out = 150, backwards=True ) #gets only the output vectors whose indices are equal to the end of token end_indices = T.nonzero(T.eq(T.argmax(x, axis=1),input_dim-1)) #gets only the output vectors whose indices are equal to the start of token start_indices = T.nonzero(T.eq(T.argmax(x,axis=1),input_dim-2)) hl = T.concatenate((layer1.output, layer2.output[::-1]),axis=1) hc = extra.cumsum(hl, axis=0) hsub = hc[end_indices] - hc[start_indices] diff_indices = T.as_tensor_variable(end_indices) - T.as_tensor_variable(start_indices) diff_shuf = diff_indices.flatten().dimshuffle(0, 'x') h = hsub / diff_shuf h_size = (layer1.n_out + layer2.n_out) #relationship between near words layer_c = nl.NNLayer(
def mse_rl(y_true, y_pred): #We don't really need this here unless you compile your model instead of pickling it. #this care = T.nonzero(T.eq(T.isnan(y_true),0)) return T.mean((y_pred[care] - y_true[care]) ** 2).sum()
def get_cv_error_one(self): """Stochastic approximation to the pseudo-likelihood""" validation_3d = (self.validation[:,self.n_labels:]).reshape((1,T.shape(self.validation)[0],T.shape(self.validation)[1]-self.n_labels)) labels,confidence = self.predict(validation_3d) accuracy = (1.0*T.shape(self.validation)[0] - T.shape(T.nonzero(labels - T.argmax(self.validation[:,:self.n_labels], axis=1)))[1]) / T.shape(self.validation)[0] return accuracy
def nll(self, y): # return - T.mean(T.dot(T.log(self.output.T), y)) return -T.mean( T.log(self.output)[T.nonzero(y)] )
def ctc_objective(y_pred, y, y_pred_mask=None, y_mask=None, batch=True): ''' CTC objective. Parameters ---------- y_pred : [nb_samples, in_seq_len, nb_classes+1] softmax probabilities y : [nb_samples, out_seq_len] output sequences y_mask : [nb_samples, out_seq_len] mask decides which labels in y is included (0 for ignore, 1 for keep) y_pred_mask : [nb_samples, in_seq_len] mask decides which samples in input sequence are used batch : True/False if batching is not used, nb_samples=1 Note: the implementation without batch support is more reliable Returns ------- grad_cost : the cost you calculate gradient on actual_cost : the cost for monitoring model performance (*NOTE: do not calculate gradient on this cost) Note ---- According to @Richard Kurle: test error of 38% with 1 bidirectional LSTM layer or with a stack of 3, but I could not reproduce the results to those reported in Grave's paper. If you get blanks only, you probably have just bad hyperparameters or you did not wait enough epochs. At the beginnign of the training, only the cost decreases but you don't see yet any characters popping up. You will need gradient clipping to prevent exploding gradients as well. ''' y_pred_mask = y_pred_mask if y_pred_mask is not None else T.ones((y_pred.shape[0], y_pred.shape[1]), dtype=floatX) y_mask = y_mask if y_mask is not None else T.ones(y.shape, dtype=floatX) if batch: # ====== reshape input ====== # y_pred = y_pred.dimshuffle(1, 0, 2) y_pred_mask = y_pred_mask.dimshuffle(1, 0) y = y.dimshuffle(1, 0) y_mask = y_mask.dimshuffle(1, 0) # ====== calculate cost ====== # grad_cost = _pseudo_cost(y, y_pred, y_mask, y_pred_mask, False) grad_cost = grad_cost.mean() monitor_cost = _cost(y, y_pred, y_mask, y_pred_mask, True) monitor_cost = monitor_cost.mean() return grad_cost, monitor_cost else: y = T.cast(y, dtype='int32') # batch_size=1 => just take [0] to reduce 1 dimension y_pred = y_pred[0] y_pred_mask = y_pred_mask[0] y = y[0] y_mask = y_mask[0] # after take, ndim=2 go up to 3, need to be reduced back to 2 y_pred = T.take(y_pred, T.nonzero(y_pred_mask, return_matrix=True), axis=0)[0] y = T.take(y, T.nonzero(y_mask, return_matrix=True), axis=0).ravel() return _cost_no_batch(y_pred, y)
def normal(self, size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None, truncate=False, **kwargs): """ Sample a tensor of values from a normal distribution. Parameters ---------- size : int_vector_like Array dimensions for the output tensor. avg : float_like, optional The mean value for the truncated normal to sample from (defaults to 0.0). std : float_like, optional The standard deviation for the truncated normal to sample from (defaults to 1.0). truncate : bool, optional Truncates the normal distribution at 2 standard deviations if True (defaults to False). When this flag is set, the standard deviation of the result will be less than the one specified. ndim : int, optional The number of dimensions for the output tensor (defaults to None). This argument is necessary if the size argument is ambiguous on the number of dimensions. dtype : str, optional The data-type for the output tensor. If not specified, the dtype is inferred from avg and std, but it is at least as precise as floatX. kwargs Other keyword arguments for random number generation (see uniform). Returns ------- samples : TensorVariable A Theano tensor of samples randomly drawn from a normal distribution. """ size = _check_size(size) avg = undefined_grad(as_tensor_variable(avg)) std = undefined_grad(as_tensor_variable(std)) if dtype is None: dtype = scal.upcast(config.floatX, avg.dtype, std.dtype) avg = tensor.cast(avg, dtype=dtype) std = tensor.cast(std, dtype=dtype) # generate even number of uniform samples # Do manual constant folding to lower optiimizer work. if isinstance(size, theano.Constant): n_odd_samples = size.prod(dtype='int64') else: n_odd_samples = tensor.prod(size, dtype='int64') n_even_samples = n_odd_samples + n_odd_samples % 2 uniform = self.uniform((n_even_samples, ), low=0., high=1., ndim=1, dtype=dtype, nstreams=nstreams, **kwargs) # box-muller transform u1 = uniform[:n_even_samples // 2] u2 = uniform[n_even_samples // 2:] r = tensor.sqrt(-2.0 * tensor.log(u1)) theta = np.array(2.0 * np.pi, dtype=dtype) * u2 cos_theta, sin_theta = tensor.cos(theta), tensor.sin(theta) z0 = r * cos_theta z1 = r * sin_theta if truncate: # use valid samples to_fix0 = (z0 < -2.) | (z0 > 2.) to_fix1 = (z1 < -2.) | (z1 > 2.) z0_valid = z0[tensor.nonzero(~to_fix0)] z1_valid = z1[tensor.nonzero(~to_fix1)] # re-sample invalid samples to_fix0 = tensor.nonzero(to_fix0)[0] to_fix1 = tensor.nonzero(to_fix1)[0] n_fix_samples = to_fix0.size + to_fix1.size lower = tensor.constant(1. / np.e**2, dtype=dtype) u_fix = self.uniform((n_fix_samples, ), low=lower, high=1., ndim=1, dtype=dtype, nstreams=nstreams, **kwargs) r_fix = tensor.sqrt(-2. * tensor.log(u_fix)) z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0] z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1] # pack everything together to a useful result norm_samples = tensor.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed) else: norm_samples = tensor.join(0, z0, z1) if isinstance(n_odd_samples, theano.Variable): samples = norm_samples[:n_odd_samples] elif n_odd_samples % 2 == 1: samples = norm_samples[:-1] else: samples = norm_samples samples = tensor.reshape(samples, newshape=size, ndim=ndim) samples *= std samples += avg return samples
def nonzero(x, return_matrix=False): return T.nonzero(x, return_matrix)
def predict(prob, mask): valid_index = T.nonzero(mask > 0)[0] prob = prob[valid_index] word_index = T.zeros((batch_size,), dtype='int32') word_index = T.set_subtensor(word_index[valid_index], T.argmax(prob, axis=1)) # +1? return word_index
def loss_of_time(prob, y, mask): valid_index = T.nonzero(mask > 0)[0] # FIXME: why y log twice ? loss = -T.sum(T.log(prob[valid_index, y[valid_index]])) return loss