def init_count_window_bigrams(self, train_stories, window_size, batch_size): window = T.matrix('window', dtype='int32') window.tag.test_value = rng.randint(self.lexicon_size, size=(window_size, 100)).astype('int32') window.tag.test_value[1, 10] = -1 window.tag.test_value[:, 0] = -1 window.tag.test_value[-1, 1] = -1 words1 = window[0] words2 = window[1:].T word_index = T.scalar('word_index', dtype='int32') word_index.tag.test_value = 0 batch_index = T.scalar('batch_index', dtype='int32') batch_index.tag.test_value = 0 #select words in sequence and batch window_ = train_stories[word_index:word_index + window_size, batch_index:batch_index + batch_size] #filter stories with all empty words from this batch window_ = window_[:, T.argmin(window_[0] < 0):] self.count_window_bigrams = theano.function(inputs=[word_index, batch_index],\ outputs=[words1, words2],\ givens={window: window_},\ on_unused_input='ignore',\ allow_input_downcast=True)
def get_output(self, train=False): X = self.get_input(train) # mask = self.get_padded_shuffled_mask(train, X, pad=0) mask = self.get_input_mask(train=train) ind = T.switch(T.eq(mask[:, -1], 1.), mask.shape[-1], T.argmin(mask, axis=-1)).astype('int32').ravel() max_time = T.max(ind) X = X.dimshuffle((1, 0, 2)) Y = T.dot(X, self.W) + self.b # h0 = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1) h0 = T.repeat(self.h_m1, X.shape[1], axis=0) c0 = T.repeat(self.c_m1, X.shape[1], axis=0) [outputs, _], updates = theano.scan( self._step, sequences=Y, outputs_info=[h0, c0], non_sequences=[self.R], n_steps=max_time, truncate_gradient=self.truncate_gradient, strict=True, allow_gc=theano.config.scan.allow_gc) res = T.concatenate([h0.dimshuffle('x', 0, 1), outputs], axis=0).dimshuffle((1, 0, 2)) if self.return_sequences: return res #return outputs[-1] return res[T.arange(mask.shape[0], dtype='int32'), ind]
def _match(self, sample): diff = (T.sqr(self.codebook)).sum( axis=1, keepdims=True) + (T.sqr(sample)).sum( axis=1, keepdims=True) - 2 * T.dot(self.codebook, sample.T) bmu = T.argmin(diff) err = T.min(diff) return err, bmu
def get_output(self, train=False): X = self.get_input(train) # mask = self.get_padded_shuffled_mask(train, X, pad=0) mask = self.get_input_mask(train=train) ind = T.switch(T.eq(mask[:, -1], 1.), mask.shape[-1], T.argmin(mask, axis=-1)).astype('int32').ravel() max_time = T.max(ind) X = X.dimshuffle((1, 0, 2)) Y = T.dot(X, self.W) + self.b # h0 = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1) h0 = T.repeat(self.h_m1, X.shape[1], axis=0) c0 = T.repeat(self.c_m1, X.shape[1], axis=0) [outputs, _], updates = theano.scan(self._step, sequences=Y, outputs_info=[h0, c0], non_sequences=[self.R], n_steps=max_time, truncate_gradient=self.truncate_gradient, strict=True, allow_gc=theano.config.scan.allow_gc) res = T.concatenate([h0.dimshuffle('x', 0, 1), outputs], axis=0).dimshuffle((1, 0, 2)) if self.return_sequences: return res # return outputs[-1] return res[T.arange(mask.shape[0], dtype='int32'), ind]
def __init__(self, input, n_in, n_out): self.W = theano.shared( value=np.zeros( (n_in, n_out), dtype=theano.config.floatX ), name='W', borrow=True ) self.b = theano.shared( value=np.zeros( (n_out,), dtype=theano.config.floatX ), name='b', borrow=True ) self.cost_predicted_given_x = T.dot(input, self.W) + self.b self.y_pred = T.argmin(self.cost_predicted_given_x, axis=1) self.params = [self.W, self.b] self.input = input self.y = T.ivector('y') self.cost_vector = T.matrix('cost_vector') self.MSE = T.mean((self.cost_predicted_given_x - self.cost_vector) ** 2) self.error = T.mean(T.neq(self.y_pred, self.y)) self.future_cost = T.sum(self.cost_vector[T.arange(self.y_pred.shape[0]), self.y_pred])
def __init__(self, cooccurrence, z_k): eps = 1e-9 self.z_k = z_k n = cooccurrence.shape[0] self.n = n h = cooccurrence.astype(np.float32) p = h / np.sum(h, axis=None) pc = T.constant(p, name="p") z_init = np.random.random_integers(0, z_k - 1, (n,)).astype(np.int32) z = theano.shared(z_init, name="z") self.z = z c = T.zeros((z_k, n), dtype='float32') # (z_k, n) c = T.set_subtensor(c[z, T.arange(c.shape[1])], 1) pyz = T.dot(c, pc) # (z_k, x_k) marg = T.sum(pyz, axis=1, keepdims=True) cond = pyz / (marg + eps) nll = -T.sum(pyz * T.log(eps + cond), axis=None) # scalar nllyzr = T.transpose(-T.log(eps + cond), (1, 0)) # (x_k, z_k) losses = T.dot(pc, nllyzr) # (x_k, z_k) nz = T.cast(T.argmin(losses, axis=1), 'int32') # (x_k,) updates = [(z, nz)] flag = T.gt(T.sum(T.neq(z, nz)), 0) self.train_fun = theano.function([], [nll, flag], updates=updates) self.val_fun = theano.function([], nll)
def _get_cluster_symbol(self): output = self._get_output_symbol() Y_hat = T.reshape(output, (self.batch, self.y_n, self.k)) y = self._get_y_symbol() Y = T.tile(y[:, :, None], (1, 1, self.k)) diff = T.mean((Y - Y_hat)**2, axis=1) cluster = T.argmin(diff, axis=1) return cluster
def get_output_for(self, inputs): A = inputs[0] X = inputs[1] max_degree_node = T.argmax(A.sum(0)) min_degree_node = T.argmin(A.sum(0)) return self.reduce(A, [max_degree_node, min_degree_node])
def batch_get_nearest_neighbours(samples, dataset): sample = Te.matrix(name="sample") data = Te.matrix(name="dataset") find_nearest_neighbour = theano.function(name="find_nearest_neighbour", inputs=[sample], outputs=data[Te.argmin(Te.sum((data[:, None, :] - sample) ** 2, axis=2), axis=0)], givens={data: dataset['train']['data']}) return find_nearest_neighbour(samples)
def perform(self): mask = self.mask assert mask.ndim == 2, 'Only 2D mask are supported' ind = T.switch(T.eq(mask[:, -1], 1.), mask.shape[-1], T.argmin(mask, axis=-1)).astype('int32') for is_train in [True, False]: y = self.get_input(is_train) res = y[T.arange(mask.shape[0], dtype='int32'), ind] self.output_vars[is_train] = [res]
def dtw(i, q_p, b_p, Q, D, inf): i0 = T.eq(i, 0) # inf = T.cast(1e10,'float32') * T.cast(T.switch(T.eq(self.n,0), T.switch(T.eq(i,0), 0, 1), 1), 'float32') penalty = T.switch(T.and_(T.neg(n0), i0), big, T.constant(0.0, 'float32')) loop = T.constant(0.0, 'float32') + q_p forward = T.constant(0.0, 'float32') + T.switch(T.or_(n0, i0), 0, Q[i - 1]) opt = T.stack([loop, forward]) k_out = T.cast(T.argmin(opt, axis=0), 'int32') return opt[k_out, T.arange(opt.shape[1])] + D[i] + penalty, k_out
def get_output_for(self, inputs): A = inputs[0] eigenvals_eigenvecs = T.nlinalg.eig(A) smallest_eigenval_index = T.argmin(eigenvals_eigenvecs[0]) smallest_eigenvec = eigenvals_eigenvecs[1][smallest_eigenval_index] return smallest_eigenvec
def batch_get_nearest_neighbours(samples, dataset): sample = Te.matrix(name="sample") data = Te.matrix(name="dataset") find_nearest_neighbour = theano.function( name="find_nearest_neighbour", inputs=[sample], outputs=data[Te.argmin(Te.sum((data[:, None, :] - sample)**2, axis=2), axis=0)], givens={data: dataset['train']['data']}) return find_nearest_neighbour(samples)
def get_min_index(self,origin): """ This function computes the cost and the updates for one trainng step of the RAE """ merge_input = self.get_input_values(origin) encode = self.get_hidden_values(merge_input) decode = self.get_reconstructed_input(encode) L = T.sum((0.5*numpy.array(decode-merge_input)**2), axis=1) min_index=T.argmin(L) return (min_index,merge_input[min_index],encode[min_index])
def indexing_bilinear(self): # Declare variables a = tt.dvector() b = tt.dscalar() # Build symbolic expression out = tt.argmin(tt.abs_(a - b)) # Compile function self.tt_index_array = function([a, b], out)
def __init__(self, input, n_in, n_out): # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared( value=np.zeros( (n_in, n_out), dtype=theano.config.floatX ), name='W', borrow=True ) # initialize the baises b as a vector of n_out 0s self.b = theano.shared( value=np.zeros( (n_out,), dtype=theano.config.floatX ), name='b', borrow=True ) # keep track of model input self.input = input # symbolic variable of cost vector in terms of a single example; # for a batch of examples, it's a matrix, so don't get confused # with the variable name `cost_vector` self.cost_vector = T.matrix('cost_vector') # symbolic variable of Z_{n,k} self.Z_nk = T.matrix('Z_nk') self.cost_predicted_given_x = T.dot(self.input, self.W) + self.b # elementwise comparison with 0 self.xi = T.maximum((self.Z_nk * (self.cost_predicted_given_x - self.cost_vector)), 0.) # define the linear one-sisded regression loss self.one_sided_regression_loss = T.sum(self.xi) # symbolic description of how to compute prediction as class whose # cost is minimum self.y_pred = T.argmin(self.cost_predicted_given_x, axis=1) # parameters of the model self.params = [self.W, self.b] # symbolic variable of labels, will only be used for computing 0/1 errors self.y = T.ivector('y') # compute the 0/1 loss self.error = T.mean(T.neq(self.y_pred, self.y)) # when a new example comes in, the model first computes (predicts) its # cost on classifying into each class (a vector) by # `self.cost_predicted_given_x`; then the model will predict this new # example as label with the smallest cost; # self.future_cost = T.sum(self.cost_vector[T.arange(self.y_pred.shape[0]), self.y_pred]) self.future_cost = T.mean(self.cost_vector[T.arange(self.y_pred.shape[0]), self.y_pred])
def init_H(self): if not hasattr(self, "_clusters"): a = (self.W * tensor.dot(self.W, self._kernel_matrix)).sum(axis=1) \ - 2.0 * tensor.dot(self._kernel_matrix, self.W.T) b = tensor.argmin(a, axis=1) self._clusters = function([], b) H = .2 * numpy.ones((self._data_size, self._num_latent_topics)).astype(self.W.dtype) clusters = self._clusters() for i, cluster in enumerate(clusters): H[i, cluster] += 1.0 self.H.set_value(H)
def get_nearest_neighbours(samples, dataset): sample = Te.vector(name="sample") data = Te.matrix(name="dataset") find_nearest_neighbour = theano.function(name="find_nearest_neighbour", inputs=[sample], outputs=data[Te.argmin(Te.sum((data - sample) ** 2, axis=1))], givens={data: dataset['train']['data']}) neighbours = [] for s in samples: neighbours += [find_nearest_neighbour(s)] return neighbours
def get_nearest_neighbours(samples, dataset): sample = Te.vector(name="sample") data = Te.matrix(name="dataset") find_nearest_neighbour = theano.function( name="find_nearest_neighbour", inputs=[sample], outputs=data[Te.argmin(Te.sum((data - sample)**2, axis=1))], givens={data: dataset['train']['data']}) neighbours = [] for s in samples: neighbours += [find_nearest_neighbour(s)] return neighbours
def greed_step(self, vec_len, node_index, seq_index, vectors, path): hs, _ = theano.scan(fn=self.compose_step, sequences=T.arange(vec_len - 1), non_sequences=vectors, name="compose_phrase") comp_vec = hs[0] comp_rec = hs[1] min_index = T.argmin(comp_rec) T.set_subtensor(vectors[min_index:-1], vectors[min_index + 1:]) T.set_subtensor(vectors[min_index], comp_vec[min_index]) T.set_subtensor(path[seq_index], T.concatenate([min_index, min_index + 1, node_index])) return vectors, min_index
def asymMSE(y_true, y_pred): d = y_true-y_pred # Get prediction errors mins = T.argmin(y_true,axis=1) # Get indices of optimal action mins_onehot = T.extra_ops.to_one_hot(mins,5) # Convert min index to one hot array others_onehot = mins_onehot-1 # Get the indices of the non-optimal actions, which will be -1's d_opt = d*mins_onehot # Get the error of the optimal action d_sub = d*others_onehot # Get the errors of the non-optimal actions a = 160*d_opt**2 # 160 times error of optimal action squared b = d_opt**2 # 1 times error of optimal action squared c = 40*d_sub**2 # 40 times error of suboptimal actions squared d = d_sub**2 # 1 times error of suboptimal actions squared l = T.switch(d_sub<0,c,d) + T.switch(d_opt<0,a,b) #This chooses which errors to use depending on the sign of the errors #If true, use the steeper penalty. If false, use the milder penalty return l
def __init__(self, data, dims, cluster_penalty=2.1): self.dims = dims self.cluster_penalty = cluster_penalty self.n_clusters = theano.shared(value=np.int(1), name='n_clusters') self.data = theano.shared(value=data, name="data") self.indicators = theano.shared(value=np.ones(self.dims[0], dtype='uint32'), name="indices") mu_init = np.zeros((self.dims[0], 3)) mu_init[1, :] = data.mean(axis=0) self.mu = theano.shared(value=mu_init, name="mu") t_idx = T.iscalar('t_idx') t_vec = T.vector('t_vec') self.D_i_c = (self.euclidean_dist(self.data, self.mu)[:, :self.n_clusters])**2 self.min_dic = T.min(self.D_i_c, axis=1) > self.cluster_penalty self.getDics = theano.function(inputs=[], outputs=self.min_dic) self.updateClusters = theano.function( inputs=[t_idx], updates=[ \ (self.indicators, T.set_subtensor(self.indicators[t_idx], self.n_clusters)), \ (self.mu, T.set_subtensor(self.mu[self.n_clusters], self.data[t_idx])), \ (self.n_clusters, self.n_clusters+1) ] ) self.updateIndicators = theano.function( inputs=[t_idx], updates=[ \ (self.indicators, T.set_subtensor(self.indicators[t_idx], T.argmin(self.D_i_c[t_idx]))) ] ) self.getLkFromIdx = theano.function(inputs=[t_idx], outputs=self.getLk(t_idx)) self.getMu = theano.function(inputs=[], outputs=self.mu) self.getNClusters = theano.function(inputs=[], outputs=self.n_clusters)
def constructMinimalDistanceIndicesVariable(x, y, n, m): sDistances = constructSquaredDistanceMatrixVariable(x, y, n, m) lamblinsTrick = False if lamblinsTrick: # https://github.com/Theano/Theano/issues/1399 # https://gist.github.com/danielvarga/d0eeacea92e65b19188c # https://groups.google.com/forum/#!topic/theano-users/E7ProqnGUMk s = sDistances bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32') # This is a heavy-handed workaround for the fact that in # lamblin's hack, ties lead to completely screwed results. bestIndices = T.clip(bestIndices, 0, n-1) else: bestIndices = T.argmin(sDistances, axis=0) return bestIndices
def __init__(self, y, uv, params): self.layer0_W_y, self.layer0_b_y = params[0] self.layer0_W_uv, self.layer0_b_uv = params[1] self.layer1_W, self.layer1_b = params[2] self.layer2_W = params[3] self.layer3_W, self.layer3_b = params[4] self.layer4_W, self.layer4_b = params[5] poolsize = (2, 2) # layer0_y: conv-maxpooling-tanh layer0_y_conv = conv.conv2d(input=y, filters=self.layer0_W_y, border_mode='full') layer0_y_pool = downsample.max_pool_2d(input=layer0_y_conv, ds=poolsize, ignore_border=True) layer0_y_out = T.tanh(layer0_y_pool + \ self.layer0_b_y.reshape(1, -1, 1, 1)) # layer0_uv: conv-maxpooling-tanh layer0_uv_conv = conv.conv2d(input=uv, filters=self.layer0_W_uv, border_mode='full') layer0_uv_pool = downsample.max_pool_2d(input=layer0_uv_conv, ds=poolsize, ignore_border=True) layer0_uv_out = T.tanh(layer0_uv_pool + \ self.layer0_b_uv.reshape(1, -1, 1, 1)) layer1_input = T.concatenate((layer0_y_out, layer0_uv_out), axis=1) # layer1: conv-maxpooling-tanh layer1_conv = conv.conv2d(input=layer1_input, filters=self.layer1_W, border_mode='full') layer1_pool = downsample.max_pool_2d(input=layer1_conv, ds=poolsize, ignore_border=True) layer1_out = T.tanh(layer1_pool + self.layer1_b.reshape(1, -1, 1, 1)) # layer2: conv layer2_out = conv.conv2d(input=layer1_out, filters=self.layer2_W, border_mode='valid') layer3_input = layer2_out.reshape((256, -1)).dimshuffle(1, 0) # layer3: hidden-layer layer3_lin = T.dot(layer3_input, self.layer3_W) + self.layer3_b layer3_out = T.tanh(layer3_lin) # layer4: logistic-regression layer4_out = T.nnet.softmax(T.dot(layer3_out, self.layer4_W) + \ self.layer4_b) self.pred = T.argmin(layer4_out, axis=1)
def straight_through(p, u): sts = StraightThroughSampler() cum = T.extra_ops.cumsum(p, axis = 1) - T.addbroadcast(T.reshape(u, (u.shape[0], 1)),1) cum = T.switch(T.lt(cum, 0.0), 10.0, cum) ideal_bucket = T.argmin(cum, axis = 1) one_hot = T.extra_ops.to_one_hot(ideal_bucket, 4) y = sts(p, one_hot) return y
def __init__(self, cooccurrence, z_k): eps = 1e-9 self.z_k = z_k n = cooccurrence.shape[0] self.n = n h = cooccurrence.astype(np.float32) p = h / np.sum(h, axis=None) pc = T.constant(p, name="p") z_init = np.random.random_integers(0, z_k - 1, (n, )) zt = theano.shared(z_init) self.z_shared = zt idx = T.iscalar(name='idx') c = T.zeros((z_k, n), dtype='float32') # (z_k, n) c = T.set_subtensor(c[zt, T.arange(c.shape[1])], 1) c = T.set_subtensor(c[zt[idx], idx], 0) p_yz0 = T.dot(c, pc) # (z_k, n) x (n, n) = (z_k, n) marg0 = T.sum(p_yz0, axis=1, keepdims=True) cond0 = p_yz0 / (eps + marg0) ent0 = -T.sum(p_yz0 * T.log(eps + cond0), axis=1) # (z_k,) sum0 = T.sum(ent0, axis=0) p_yz1 = p_yz0 + (pc[idx, :].dimshuffle(('x', 0))) marg1 = T.sum(p_yz1, axis=1, keepdims=True) cond1 = p_yz1 / (eps + marg1) ent1 = -T.sum(p_yz1 * T.log(eps + cond1), axis=1) # (z_k,) ed = ent1 - ent0 # (z_k) sel = T.argmin(ed) # (scalar,) ztn = T.set_subtensor(zt[idx], sel) entn = sum0 + (ed[sel]) changed = T.neq(sel, zt[idx]) updates = [(zt, ztn)] self.train_fun = theano.function([idx], [entn, changed], updates=updates) c = T.zeros((z_k, n), dtype='float32') # (z_k, n) c = T.set_subtensor(c[zt, T.arange(c.shape[1])], 1) p_yz = T.dot(c, pc) # (z_k, n) x (n, n) = (z_k, n) marg = T.sum(p_yz, axis=1, keepdims=True) cond = p_yz / (eps + marg) ent = -T.sum(p_yz * T.log(eps + cond), axis=None) self.val_fun = theano.function([], ent)
def apply(self, y_hat): # reshape 1d vector to 2d matrix y_hat_2d = y_hat.reshape((y_hat.shape[0] / self.examples_group_size, self.examples_group_size)) #y_hat_2d = tt.printing.Print("Y hat 2d in correct rank: ")(y_hat_2d) # sort each group by relevance # we sort the responses in decreasing order, that is why we multiply y_hat by -1 sorting_indices = tt.argsort(-1 * y_hat_2d, axis=1) #sorting_indices = tt.printing.Print("sorting indices in correct rank: ")(sorting_indices) # check where is the ground truth whose index should be 0 in the original array correct_rank = tt.argmin(sorting_indices, axis=1) + 1 #correct_rank = tt.printing.Print("correct rank: ")(correct_rank) correct_rank.name = "correct_rank" return correct_rank
def recurrence(x, cur_time, prev_hidden): act_modules = modules - T.argmin((cur_time % p)[::-1]) update_indices = act_modules * sizeof_mod w_subtensor = self.wh[:update_indices] b_subtensor = self.bh[:update_indices] inp_updates = T.dot(x, self.wi)[:update_indices] pre_new_h = T.dot(w_subtensor, prev_hidden) + inp_updates + b_subtensor new_h = T.set_subtensor(prev_hidden[:update_indices], h_nonlinearity(pre_new_h)) out = nonlinearity(T.dot(new_h, self.wo) + self.bo) return (cur_time + 1.0), new_h, out
def find_perturb(perturbation): logits_os = model(inputs + (1 + over_shoot) * perturbation) y_pred = T.argmax(logits_os, axis=1) is_mistake = T.neq(y_pred, labels) current_ind = batch_indices[(1 - is_mistake).nonzero()] should_stop = T.all(is_mistake) # continue generating perturbation only for correctly classified inputs_subset = inputs[current_ind] perturbation_subset = perturbation[current_ind] labels_subset = labels[current_ind] batch_subset = T.arange(inputs_subset.shape[0]) x_adv = inputs_subset + perturbation_subset logits = model(x_adv) corrects = logits[batch_subset, labels_subset] jac = jacobian(logits, x_adv, num_classes) # deepfool f = logits - T.shape_padright(corrects) w = jac - T.shape_padaxis(jac[batch_subset, labels_subset], axis=1) reduce_ind = range(2, inputs.ndim + 1) if norm == 'l2': dist = T.abs_(f) / w.norm(2, axis=reduce_ind) else: dist = T.abs_(f) / T.sum(T.abs_(w), axis=reduce_ind) # remove correct targets dist = T.set_subtensor(dist[batch_subset, labels_subset], T.constant(np.inf)) l = T.argmin(dist, axis=1) dist_l = dist[batch_subset, l].dimshuffle(0, 'x', 'x', 'x') # avoid numerical instability and clip max value if clip_dist is not None: dist_l = T.clip(dist_l, 0, clip_dist) w_l = w[batch_subset, l] if norm == 'l2': reduce_ind = range(1, inputs.ndim) perturbation_upd = dist_l * w_l / w_l.norm( 2, reduce_ind, keepdims=True) else: perturbation_upd = dist_l * T.sgn(w_l) perturbation = ifelse( should_stop, perturbation, T.inc_subtensor(perturbation[current_ind], perturbation_upd)) return perturbation, scan_module.until(should_stop)
def __init__(self, weights, neurons_topology, learning_rate=0.1, learning_rate_decay=0.985, collaboration_sigma=1.0, collaboration_sigma_decay=0.95, verbosity=2): self._verbosity = verbosity self._history = [] self.neurons_number = weights.shape[0] self.W_shar_mat = theano.shared(weights) self.D_shar_mat = theano.shared(neurons_topology) self.collaboration_sigma = theano.shared(collaboration_sigma) self.collaboration_sigma_decay = collaboration_sigma_decay self.x_row = T.vector("exemplar") self.x_mat = T.matrix("batch") self.learning_rate = theano.shared(learning_rate) self.learning_rate_decay = learning_rate_decay self.distance_from_y_row = ((T.sub(self.W_shar_mat, self.x_row)**2).sum(axis=1)) self.closest_neuron_idx = T.argmin(self.distance_from_y_row) self.distances_from_closest_neuron = self.D_shar_mat[ self.closest_neuron_idx] self.affinities_to_closest_neuron = T.exp( -self.distances_from_closest_neuron / (self.collaboration_sigma)**2) self.smoothed_distances_from_closest_neuron = T.mul( self.distance_from_y_row, G.disconnected_grad(self.affinities_to_closest_neuron)) self.cost_scal = self.smoothed_distances_from_closest_neuron.sum() self.updates = sgd(self.cost_scal, [self.W_shar_mat], learning_rate=self.learning_rate) self.update_neurons = theano.function([self.x_row], self.cost_scal, updates=self.updates)
def min_risk_choice(Posterior): #The Loss function is a function of the predictiveness profiles Preds = predictiveness_profiles(Models, K, num_M) Loss = ifelse(T.eq(Choice_type, 1), T.pow(1.0 - Preds,2), ifelse(T.eq(Choice_type, 2), T.abs_(1.0 - Preds), - Preds)) #Kroneckering Loss up num_Obs times (tile Loss, making it num_M by num_M*num_Obs) Loss = kron(T.ones((1,num_Obs)), Loss) #Kroneckering up the Posterior, making it num_M by num_Obs*numM Posterior = kron(Posterior, T.ones((1,num_M))) #Dotting and reshaping down to give num_M by num_Obs expected loss matrix Expected_Loss = T.dot(T.ones((1,num_M)),Posterior*Loss) Expected_Loss = T.reshape(Expected_Loss, (num_Obs,num_M)).T #Choice minimizes risk Choice = T.argmin(Expected_Loss, axis = 0) return Choice
def min_risk_choice(Posterior): #The Loss function is a function of the predictiveness profiles Preds = predictiveness_profiles(Models, K, num_M) Loss = ifelse( T.eq(Choice_type, 1), T.pow(1.0 - Preds, 2), ifelse(T.eq(Choice_type, 2), T.abs_(1.0 - Preds), -Preds)) #Kroneckering Loss up num_Obs times (tile Loss, making it num_M by num_M*num_Obs) Loss = kron(T.ones((1, num_Obs)), Loss) #Kroneckering up the Posterior, making it num_M by num_Obs*numM Posterior = kron(Posterior, T.ones((1, num_M))) #Dotting and reshaping down to give num_M by num_Obs expected loss matrix Expected_Loss = T.dot(T.ones((1, num_M)), Posterior * Loss) Expected_Loss = T.reshape(Expected_Loss, (num_Obs, num_M)).T #Choice minimizes risk Choice = T.argmin(Expected_Loss, axis=0) return Choice
def getDeployFunction(self, cr): from algorithms.algorithm import beam_search, greed print "Compiling computing graph." get_question_hidden = theano.function([self.question, self.question_mask], self.last_hidden_state, name='get_question_hidden') _, pred_word_probability = self.softmax_layer.getOutput(self.last_decoder_hidden) self.last_decoder_hidden self.tparams['Wemb'] recons_v = self.tparams['recons_v'] recons_b = self.tparams['recons_b'] recons_b = recons_b.dimshuffle(['x', 0]) media_h = T.dot(self.tparams['Wemb'], recons_v) + recons_b recons_h_error_L = T.tanh(media_h) - T.addbroadcast(self.last_decoder_hidden, 0) recons_h_error_L = T.sqr(recons_h_error_L).sum(axis=1) recons_h_error_L = recons_h_error_L / self.options['hidden_dim'] error = -T.log(pred_word_probability) + recons_h_error_L score = T.exp(-error) pred_word = T.argmin(error) deploy_model = theano.function(inputs=[self.answer, self.answer_mask, self.last_hidden_state], outputs=[pred_word, score], allow_input_downcast=True) print "Compiled." def dm(sentence): print "feed %s: " % sentence (x, x_mask) = cr.transformInputData(sentence) x = x[:-1] x_mask = x_mask[:-1] last_s = get_question_hidden(x, x_mask) def f(y, y_mask): return deploy_model(y, y_mask, last_s) return beam_search('', cr, f) return dm
def __SOM(self, X, W, n): learning_rate_op = T.exp(-1. * self.som_lr * n) _alpha_op = self.alpha * learning_rate_op _sigma_op = self.sigma * learning_rate_op locations = self.locs maps = T.sub(X, W) measure = T.sum(T.pow(T.sub(X, W), 2), axis=1) err = measure.min() self.bmu_index = T.argmin(measure) bmu_loc = locations[self.bmu_index] dist_square = T.sum(T.square(T.sub(locations, bmu_loc)), axis=1) H = T.cast(T.exp(-dist_square / (2 * T.square(_sigma_op))), dtype=theano.config.floatX) w_update = W + _alpha_op * \ T.tile(H, [self.latent_size, 1]).T * maps Qs = self.__soft_probs(X, W) P = Qs**2 / Qs.sum() P = (P.T / P.sum()).T cost = self.__kld(P, Qs) return [err, cost, bmu_loc], {W: w_update}
def generate_optimize_basis(): # original solution tx0 = partial.x # optimized solution tx1 = T.dot(tl.matrix_inverse(T.dot(partial.A.T, partial.A)), T.dot(partial.A.T, y) - gamma/2*partial.theta) # investigate zero crossings between tx0 and tx1 tbetas = tx0 / (tx0 - tx1) # investigate tx1 tbetas = T.concatenate([tbetas, [1.0]]) # only between tx0 and inclusively tx1 tbetas = tbetas[(T.lt(0, tbetas) * T.le(tbetas, 1)).nonzero()] txbs, _ = theano.map(lambda b: (1-b)*tx0 + b*tx1, [tbetas]) tlosses, _ = theano.map(loss, [txbs]) # select the optimum txb = txbs[T.argmin(tlosses)] return theano.function([tpart, full.x, full.theta], [T.set_subtensor(partial.x, txb), T.set_subtensor(partial.theta, T.sgn(txb))])
def __build_measure(self): #print(self.archetypes.shape.eval()) #print(self.layers_bw[-1].shape.eval()) ### FW E_fw = T.mean(self.__energy(self.layers_fw, self.weights_fw, self.weights_bw, self.biases_fw)) C_fw = T.mean(self.__cost(self.layers_fw[-1], direction="fw")) y_prediction = T.argmax(self.layers_fw[-1], axis=1) # Error count for y-error error_fw = T.mean(T.neq(y_prediction, self.y_data)) ### BW #print([layer.shape.eval() for layer in self.layers_fw]) E_bw = T.mean(self.__energy(self.layers_bw, self.weights_bw, self.weights_fw, self.biases_bw)) # IDENTIFY CLOSEST ARCHETYPE W/ MINIMAL SUM-SQUARED DISTANCE # CLOSEST ARCHETYPE IS THE SAME AS THE Y-LABEL arches_reshaped = T.reshape(self.archetypes, [1, D*D, D*D]) output_reshaped = T.reshape(self.layers_bw[-1], [self.batch_size, 1, D*D]) sum_squared = ((arches_reshaped - output_reshaped) ** 2).sum(axis=2) closest_arch = T.argmin(sum_squared, axis=1) #closest_arch_one_hot = T.extra_ops.to_one_hot(closest_arch, D*D) #C_bw = T.mean(self.__cost(closest_arch_one_hot)) C_bw = T.mean(self.__cost(self.layers_bw[-1], direction="bw")) # Index of closest archetype for x-error error_bw = T.mean(T.neq(closest_arch, self.y_data)) measure = theano.function( inputs=[], outputs=[E_fw, C_fw, error_fw, E_bw, C_bw, error_bw]#, closest_arch, self.y_data] ) return measure
def generate_functions(A, y, gamma): tA = T.matrix('A') ty = T.vector('y') tx = T.vector('x') ttheta = T.vector('theta') tx0 = T.vector('x0') tx1 = T.vector('x1') tbetas = T.vector('betas') error = lambda x: T.sum((T.dot(tA, x) - ty)**2) derror = lambda x: T.grad(error(x), x) penalty = lambda x: x.norm(1) loss = lambda x: error(x) + penalty(x) entering_index = T.argmax(abs(derror(tx))) txs, _ = theano.map(lambda b, x0, x1: (1-b)*x0 + b*x1, [tbetas], [tx0, tx1]) return { "select_entering": theano.function([tx], [entering_index, derror(tx)[entering_index]], givens = {tA: A, ty: y}), "qp_optimum": theano.function([tA, ttheta], T.dot(T.inv(T.dot(tA.T, tA)), T.dot(tA.T, ty) - gamma/2*ttheta), givens = {ty: y}), "txs": theano.function([tbetas, tx0, tx1], txs), "select_candidate": theano.function([tA, tbetas, tx0, tx1], txs[T.argmin(theano.map(loss, [txs])[0])], givens = {ty: y}), "optimal_nz": theano.function([tA, tx], derror(tx) + gamma*T.sgn(tx), givens = {ty: y}), "optimal_z": theano.function([tA, tx], abs(derror(tx)), givens = {ty: y}), }
def argmin(x): return T.argmin(x)
def argmin(x, dim = None, keep = False): return T.argmin(x, axis = dim, keepdims = keep)
def __call__(self, X, termination_criterion, initial_H=None): """ Compute for each sample its representation. Parameters ---------- X : Sample matrix. numpy.ndarray termination_criterion: pylearn TerminationCriterion object initial_H: Numpy matrix. Returns ------- H: H matrix with the representation. """ dataset_size = X.shape[0] H = None if initial_H is not None: if H.shape[0] == dataset_size and H.shape[1] == self._num_latent_topics: H = initial_H if H is None: if not hasattr(self, "predict_clusters"): h = tensor.matrix(name="h") x = tensor.matrix(name="x") kxb = self._kernel(x, self._budget) a = (self.W * tensor.dot(self.W, self._kernel_matrix)).sum(axis=1) \ - 2.0 * tensor.dot(kxb, self.W.T) b = tensor.argmin(a, axis=1) self.predict_clusters = function([x], b) H = .2 * numpy.ones((self._data_size, self._num_latent_topics)).astype(self.W.dtype) clusters = self.predict_clusters(X) for i, cluster in enumerate(clusters): H[i, cluster] += 1.0 if not hasattr(self, "predict_representation"): h = tensor.matrix(name="h") x = tensor.matrix(name="x") kxb = self._kernel(x, self._budget) kxbp = 0.5 * (numpy.abs(kxb) + kxb) kxbn = 0.5 * (numpy.abs(kxb) - kxb) a = tensor.dot(h, tensor.dot(self.W, self.kbn)) b = tensor.dot(kxbp + a, self.W.T) c = tensor.dot(h, tensor.dot(self.W, self.kbp)) d = tensor.dot(kxbn + c, self.W.T) e = h * tensor.sqrt(b / (d + self.lambda_vals)) f = tensor.maximum(e, eps) self.predict_representation = function([x, h], f) keep_training = True if not isfinite(H): raise Exception("NaN or Inf in H") while keep_training: H = self.predict_representation(X, H) if not isfinite(H): raise Exception("NaN or Inf in H") keep_training = termination_criterion.continue_learning(self) return H
def argmin(x, axis=None, keepdims=False): return T.argmin(x, axis=axis, keepdims=keepdims)
def main(model='mlp', num_epochs=500): # Load the dataset print("Loading data...") num_per_class = 100 print("Using %d per class" % num_per_class) X_train, y_train, X_test, y_test = load_data("/X_train.npy", "/Y_train.npy", "/X_test.npy", "/Y_test.npy") X_train_final = [] y_train_final = [] for i in range(10): X_train_class = X_train[y_train == i] # permutated_index = np.random.permutation(X_train_class.shape[0]) permutated_index = np.arange(X_train_class.shape[0]) X_train_final.append(X_train_class[permutated_index[:100]]) y_train_final += [i] * num_per_class X_train = np.vstack(X_train_final) y_train = np.array(y_train_final, dtype = np.int32) X_train = extend_image(X_train, 40) X_test = extend_image(X_test, 40) #X_train, y_train, X_test, y_test = load_data("/cluttered_train_x.npy", "/cluttered_train_y.npy", "/cluttered_test_x.npy", "/cluttered_test_y.npy", dataset = "MNIST_CLUTTER") # Prepare Theano variables for inputs and targets nRotation = 8 # The dimension would be (nRotation * n, w, h) input_var = T.tensor4('inputs') # The dimension would be (n, ) vanilla_target_var = T.ivector('vanilla_targets') # The dimension would be (nRotation * n , ) target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) network = build_cnn(input_var) saved_weights = np.load("../data/mnist_Chi_dec_100.npy") lasagne.layers.set_all_param_values(network, saved_weights) # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): # The dimension would be (nRotation * n, 10) predictions = lasagne.layers.get_output(network) # The dimension would be (nRotation * n, 10) one_hot_targets = T.extra_ops.to_one_hot(target_var, 10) # rests = [T.reshape(predictions[i][(1 - one_hot_targets).nonzero()], (-1, 9)) for i in range(nRotation)] rests = T.reshape(predictions[(1 - one_hot_targets).nonzero()], (nRotation, -1, 9)) # a list of $nRotation tensor, each tensor is of shape (n, 1) rests = [T.max(rests[i], axis = 1) for i in range(nRotation)] latent_vector = T.argmin(T.as_tensor_variable(rests), axis = 0) # selected_index is a (n, nRotation) matrix of binary values. selected_index = T.extra_ops.to_one_hot(latent_vector, nRotation) predictions = T.reshape(predictions, (nRotation, -1, 10)) predictions = T.as_tensor_variable(predictions).swapaxes(0, 1) predictions = predictions[selected_index.nonzero()] # loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = lasagne.objectives.multiclass_hinge_loss(predictions, vanilla_target_var) loss = loss.mean() # We could add some weight decay as well here, see lasagne.regularization. # Create update expressions for training, i.e., how to modify the # parameters at each training step. Here, we'll use Stochastic Gradient # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more. params = lasagne.layers.get_all_params(network, trainable=True) # updates = lasagne.updates.nesterov_momentum( # loss, params, learning_rate=0.01, momentum=0.9) updates = lasagne.updates.adagrad(loss, params, learning_rate = 0.01) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_prediction = T.reshape(test_prediction,(nRotation, -1, 10)) test_prediction_max = test_prediction.max(axis = 2) rotation_index = T.extra_ops.to_one_hot(T.argmax(test_prediction_max, axis = 0), nRotation) test_prediction = test_prediction.swapaxes(0, 1)[rotation_index.nonzero()] test_loss = lasagne.objectives.multiclass_hinge_loss(test_prediction, vanilla_target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), vanilla_target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var, vanilla_target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, vanilla_target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, 100, shuffle=True): inputs, targets = batch inputs = inputs.reshape(100, 40, 40) inputs = rotateImage_batch(inputs, nRotation).reshape(100 * nRotation, 1, 40, 40) duplicated_targets = np.array([targets for i in range(nRotation)]).reshape(100 * nRotation,) train_err += train_fn(inputs, duplicated_targets, targets) train_batches += 1 # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format( epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(train_err / train_batches)) if epoch % 5 == 0: # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False): inputs, targets = batch inputs = inputs.reshape(500, 40, 40) inputs = rotateImage_batch(inputs, nRotation).reshape(500 * nRotation, 1, 40, 40) err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 print("Final results:") print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches)) print(" test accuracy:\t\t{:.2f} %".format( test_acc / test_batches * 100)) # Optionally, you could now dump the network weights to a file like this: # np.savez('model.npz', *lasagne.layers.get_all_param_values(network)) # # And load them again later on like this: # with np.load('model.npz') as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # lasagne.layers.set_all_param_values(network, param_values) weightsOfParams = lasagne.layers.get_all_param_values(network) #np.save("../data/mnist_clutter_CNN_params_sigmoid.npy", weightsOfParams) #np.save("../data/mnist_CNN_params_sigmoid.npy", weightsOfParams) #np.save("../data/mnist_CNN_params.npy", weightsOfParams) #np.save("../data/mnist_CNN_params_drop_out_semi_Chi_Dec7.npy", weightsOfParams) np.save("../data/mnist_CNN_params_drop_out_Chi_2017.npy", weightsOfParams)
n = 5000 # number of candidates m = 1000 # number of targets f = 500 # number of features x = T.matrix('x') # candidates y = T.matrix('y') # targets xL2S = T.sum(x*x, axis=-1) # [n] yL2S = T.sum(y*y, axis=-1) # [m] xL2SM = T.zeros((m, n)) + xL2S # broadcasting, [m, n] yL2SM = T.zeros((n, m)) + yL2S # # broadcasting, [n, m] squaredPairwiseDistances = xL2SM.T + yL2SM - 2.0*T.dot(x, y.T) # [n, m] np.random.seed(1) N = randomMatrix(n, f) M = randomMatrix(m, f) lamblinsTrick = True if lamblinsTrick: # from https://github.com/Theano/Theano/issues/1399 s = squaredPairwiseDistances bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32') else: bestIndices = T.argmin(squaredPairwiseDistances, axis=0) nearests_fn = theano.function([x, y], bestIndices, profile=True) print nearests_fn(N, M).sum()
def min_dist(self, data): return T.argmin(T.sqrt(T.sum((data - self.W)**2, 1)))
def buildAttenBased(d_in, d_out, LR, alpha): # fw_lstm_doc: forward LSTM of document encoding # bw_lstm_doc: backward LSTM of document encoding # fw_lstm_que: forward LSTM of question encoding # bw_lstm_que: backward LSTM of question encoding # y: words encoding, total t words, so t vectors # u: question encoding, only one vector x_seq = T.matrix('x_seq') q_seq = T.matrix('q_seq') fw_lstm_doc = LSTM(d_in, d_out, LR, alpha, 0, x_seq) bw_lstm_doc = LSTM(d_in, d_out, LR, alpha, 0, x_seq[::-1]) fw_lstm_que = LSTM(d_in, d_out, LR, alpha, 0, q_seq) bw_lstm_que = LSTM(d_in, d_out, LR, alpha, 0, q_seq[::-1]) y = T.concatenate([ fw_lstm_doc.output_encoding(), bw_lstm_doc.output_encoding()[::-1] ], axis=1) u = T.concatenate([ [fw_lstm_que.output_encoding()[-1]], [bw_lstm_que.output_encoding()[-1]] ], axis=1) Wym = shared_uniform("Wym", 2*d_out, 2*d_out) Wum = shared_uniform("Wum", 2*d_out, 2*d_out) wms = shared_uniform("wms", 2*d_out) Wrg = shared_uniform("Wrg", 2*d_out, 2*d_out) Wug = shared_uniform("Wug", 2*d_out, 2*d_out) yT = T.transpose(y) uT = T.transpose(u) um = T.dot(Wum,uT) m = T.tanh( T.dot(Wym, yT) + um ) # s is a vector (t,) mT = T.transpose(m) s = T.exp( T.sum(wms*mT, axis = 1) ) s = s/T.sqrt(T.sum(s**2)) # r is a vector (2*d_out,) # ug is (1, 2*d_out) # g is a vector (2*d_out,) r = T.dot(T.transpose(y), s) ug = T.transpose(T.dot(Wug,uT)) g = T.sum( T.tanh( T.dot(Wrg,r) + ug ), axis = 0) g_hat = T.vector('g_hat') ### Cost Function ### cost = T.sum((g-g_hat)**2) params = [Wym, Wum, wms, Wrg, Wug] params.extend(fw_lstm_doc.params) params.extend(bw_lstm_doc.params) params.extend(fw_lstm_que.params) params.extend(bw_lstm_que.params) ### Calclate Gradients ### gradients = T.grad(cost, params) ### Model Functions ### train_model = theano.function( inputs = [x_seq, q_seq, g_hat], updates = fw_lstm_doc.rmsprop(params, gradients, LR), outputs = [cost], allow_input_downcast = True ) test_model = theano.function( inputs = [x_seq, q_seq], outputs = g, allow_input_downcast = True ) A = T.vector('A_Opt') B = T.vector('B_Opt') C = T.vector('C_Opt') D = T.vector('D_Opt') ser = [ T.sum((g-A)**2),T.sum((g-B)**2), T.sum((g-C)**2),T.sum((g-D)**2) ] opt = T.argmin(ser) testAns_model = theano.function( inputs = [x_seq, q_seq, A, B, C, D], outputs = opt, allow_input_downcast = True ) return train_model, test_model, testAns_model
w_2 = init_weights((h_size, y_size)) # Forward propagation yhat = forwardprop(X, w_1, w_2) # Backward propagation cost = T.mean(T.nnet.categorical_crossentropy(yhat, Y)) params = [w_1, w_2] updates = backprop(cost, params) # Train and predict train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True) pred_y = T.argmin(yhat, axis=1) predict = theano.function(inputs=[X], outputs=pred_y, allow_input_downcast=True) # Run SGD """for iter in range(500): train(train_X, train_y) train_accuracy = np.mean(np.argmax(train_y, axis=1) == predict(train_X)) test_accuracy = np.mean(np.argmax(test_y, axis=1) == predict(test_X)) print predict(test_X) print("Iteration = %d, train accuracy = %.2f%%, test accuracy = %.2f%%" % (iter + 1, 100 * train_accuracy, 100 * test_accuracy)) break""" train(train_X, train_y)
def argmin(x, axis=-1): return T.argmin(x, axis=axis, keepdims=False)
def _match(self, sample): diff = (T.sqr(self.codebook)).sum(axis = 1, keepdims = True) + (T.sqr(sample)).sum(axis = 1, keepdims = True) - 2 * T.dot(self.codebook, sample.T) bmu = T.argmin(diff) err = T.min(diff) return err, bmu
def constructMinimalDistancesVariable(x, y, initials, n, m): sDistances = constructSquaredDistanceMatrixVariable(x, y, n, m) bestIndices = T.argmin(sDistances, axis=0) bestXes = x[bestIndices] bestInitials = initials[bestIndices] return bestXes, bestInitials
def get_time_range(self, train): mask = self.get_input_mask(train=train) ind = T.switch(T.eq(mask[:, -1], 1.), mask.shape[-1], T.argmin(mask, axis=-1)).astype('int32') self.time_range = ind return ind
def pred(self, output): W1 = 0.5 W2 = 0.5 pred = T.argmin(W1 * T.arccos(T.dot(output, mappings_vec.T)/(absolute(output)*absolute(mappings_vec, axis=1))) + W2 * (absolute(mappings_vec, axis=1) - absolute(output))) return pred