Exemple #1
0
	def init_count_window_bigrams(self, train_stories, window_size, batch_size):

		window = T.matrix('window', dtype='int32')
		window.tag.test_value = rng.randint(self.lexicon_size, size=(window_size, 100)).astype('int32')
		window.tag.test_value[1, 10] = -1
		window.tag.test_value[:, 0] = -1
		window.tag.test_value[-1, 1] = -1

		words1 = window[0]
		words2 = window[1:].T

		word_index = T.scalar('word_index', dtype='int32')
		word_index.tag.test_value = 0
		batch_index = T.scalar('batch_index', dtype='int32')
		batch_index.tag.test_value = 0

		#select words in sequence and batch
		window_ = train_stories[word_index:word_index + window_size, batch_index:batch_index + batch_size]
		#filter stories with all empty words from this batch
		window_ = window_[:, T.argmin(window_[0] < 0):]

		self.count_window_bigrams = theano.function(inputs=[word_index, batch_index],\
													outputs=[words1, words2],\
													givens={window: window_},\
													on_unused_input='ignore',\
													allow_input_downcast=True)
Exemple #2
0
    def get_output(self, train=False):
        X = self.get_input(train)
        # mask = self.get_padded_shuffled_mask(train, X, pad=0)
        mask = self.get_input_mask(train=train)
        ind = T.switch(T.eq(mask[:, -1], 1.), mask.shape[-1], T.argmin(mask, axis=-1)).astype('int32').ravel()
        max_time = T.max(ind)
        X = X.dimshuffle((1, 0, 2))
        Y = T.dot(X, self.W) + self.b
        # h0 = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
        h0 = T.repeat(self.h_m1, X.shape[1], axis=0)
        c0 = T.repeat(self.c_m1, X.shape[1], axis=0)

        [outputs, _], updates = theano.scan(
            self._step,
            sequences=Y,
            outputs_info=[h0, c0],
            non_sequences=[self.R], n_steps=max_time,
            truncate_gradient=self.truncate_gradient, strict=True,
            allow_gc=theano.config.scan.allow_gc)

        res = T.concatenate([h0.dimshuffle('x', 0, 1), outputs], axis=0).dimshuffle((1, 0, 2))
        if self.return_sequences:
            return res
        #return outputs[-1]
        return res[T.arange(mask.shape[0], dtype='int32'), ind]
Exemple #3
0
 def _match(self, sample):
     diff = (T.sqr(self.codebook)).sum(
         axis=1, keepdims=True) + (T.sqr(sample)).sum(
             axis=1, keepdims=True) - 2 * T.dot(self.codebook, sample.T)
     bmu = T.argmin(diff)
     err = T.min(diff)
     return err, bmu
    def get_output(self, train=False):
        X = self.get_input(train)
        # mask = self.get_padded_shuffled_mask(train, X, pad=0)
        mask = self.get_input_mask(train=train)
        ind = T.switch(T.eq(mask[:, -1], 1.), mask.shape[-1],
                       T.argmin(mask, axis=-1)).astype('int32').ravel()
        max_time = T.max(ind)
        X = X.dimshuffle((1, 0, 2))
        Y = T.dot(X, self.W) + self.b
        # h0 = T.unbroadcast(alloc_zeros_matrix(X.shape[1], self.output_dim), 1)
        h0 = T.repeat(self.h_m1, X.shape[1], axis=0)
        c0 = T.repeat(self.c_m1, X.shape[1], axis=0)

        [outputs,
         _], updates = theano.scan(self._step,
                                   sequences=Y,
                                   outputs_info=[h0, c0],
                                   non_sequences=[self.R],
                                   n_steps=max_time,
                                   truncate_gradient=self.truncate_gradient,
                                   strict=True,
                                   allow_gc=theano.config.scan.allow_gc)

        res = T.concatenate([h0.dimshuffle('x', 0, 1), outputs],
                            axis=0).dimshuffle((1, 0, 2))
        if self.return_sequences:
            return res
        # return outputs[-1]
        return res[T.arange(mask.shape[0], dtype='int32'), ind]
Exemple #5
0
    def __init__(self, input, n_in, n_out):

        self.W = theano.shared(
            value=np.zeros(
                (n_in, n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )

        self.b = theano.shared(
            value=np.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )

        self.cost_predicted_given_x = T.dot(input, self.W) + self.b

        self.y_pred = T.argmin(self.cost_predicted_given_x, axis=1)

        self.params = [self.W, self.b]

        self.input = input
        self.y = T.ivector('y')
        self.cost_vector = T.matrix('cost_vector')

        self.MSE = T.mean((self.cost_predicted_given_x - self.cost_vector) ** 2)

        self.error = T.mean(T.neq(self.y_pred, self.y))

        self.future_cost = T.sum(self.cost_vector[T.arange(self.y_pred.shape[0]), self.y_pred])
Exemple #6
0
    def __init__(self, cooccurrence, z_k):
        eps = 1e-9
        self.z_k = z_k
        n = cooccurrence.shape[0]
        self.n = n
        h = cooccurrence.astype(np.float32)
        p = h / np.sum(h, axis=None)
        pc = T.constant(p, name="p")

        z_init = np.random.random_integers(0, z_k - 1, (n,)).astype(np.int32)
        z = theano.shared(z_init, name="z")
        self.z = z

        c = T.zeros((z_k, n), dtype='float32')  # (z_k, n)
        c = T.set_subtensor(c[z, T.arange(c.shape[1])], 1)

        pyz = T.dot(c, pc)  # (z_k, x_k)
        marg = T.sum(pyz, axis=1, keepdims=True)
        cond = pyz / (marg + eps)
        nll = -T.sum(pyz * T.log(eps + cond), axis=None)  # scalar

        nllyzr = T.transpose(-T.log(eps + cond), (1, 0))  # (x_k, z_k)
        losses = T.dot(pc, nllyzr)  # (x_k, z_k)
        nz = T.cast(T.argmin(losses, axis=1), 'int32')  # (x_k,)
        updates = [(z, nz)]

        flag = T.gt(T.sum(T.neq(z, nz)), 0)

        self.train_fun = theano.function([], [nll, flag], updates=updates)
        self.val_fun = theano.function([], nll)
Exemple #7
0
 def _get_cluster_symbol(self):
     output = self._get_output_symbol()
     Y_hat = T.reshape(output, (self.batch, self.y_n, self.k))
     y = self._get_y_symbol()
     Y = T.tile(y[:, :, None], (1, 1, self.k))
     diff = T.mean((Y - Y_hat)**2, axis=1)
     cluster = T.argmin(diff, axis=1)
     return cluster
Exemple #8
0
    def get_output_for(self, inputs):
        A = inputs[0]
        X = inputs[1]

        max_degree_node = T.argmax(A.sum(0))
        min_degree_node = T.argmin(A.sum(0))

        return self.reduce(A, [max_degree_node, min_degree_node])
Exemple #9
0
def batch_get_nearest_neighbours(samples, dataset):
    sample = Te.matrix(name="sample")
    data = Te.matrix(name="dataset")
    find_nearest_neighbour = theano.function(name="find_nearest_neighbour",
                                             inputs=[sample],
                                             outputs=data[Te.argmin(Te.sum((data[:, None, :] - sample) ** 2, axis=2), axis=0)],
                                             givens={data: dataset['train']['data']})
    return find_nearest_neighbour(samples)
Exemple #10
0
 def _get_cluster_symbol(self):
     output = self._get_output_symbol()
     Y_hat = T.reshape(output, (self.batch, self.y_n, self.k))
     y = self._get_y_symbol()
     Y = T.tile(y[:, :, None], (1, 1, self.k))
     diff = T.mean((Y - Y_hat)**2, axis=1)
     cluster = T.argmin(diff, axis=1)
     return cluster
Exemple #11
0
 def perform(self):
     mask = self.mask
     assert mask.ndim == 2, 'Only 2D mask are supported'
     ind = T.switch(T.eq(mask[:, -1], 1.), mask.shape[-1], T.argmin(mask, axis=-1)).astype('int32')
     for is_train in [True, False]:
         y = self.get_input(is_train)
         res = y[T.arange(mask.shape[0], dtype='int32'), ind]
         self.output_vars[is_train] = [res]
Exemple #12
0
 def dtw(i, q_p, b_p, Q, D, inf):
   i0 = T.eq(i, 0)
   # inf = T.cast(1e10,'float32') * T.cast(T.switch(T.eq(self.n,0), T.switch(T.eq(i,0), 0, 1), 1), 'float32')
   penalty = T.switch(T.and_(T.neg(n0), i0), big, T.constant(0.0, 'float32'))
   loop = T.constant(0.0, 'float32') + q_p
   forward = T.constant(0.0, 'float32') + T.switch(T.or_(n0, i0), 0, Q[i - 1])
   opt = T.stack([loop, forward])
   k_out = T.cast(T.argmin(opt, axis=0), 'int32')
   return opt[k_out, T.arange(opt.shape[1])] + D[i] + penalty, k_out
Exemple #13
0
    def get_output_for(self, inputs):
        A = inputs[0]

        eigenvals_eigenvecs = T.nlinalg.eig(A)

        smallest_eigenval_index = T.argmin(eigenvals_eigenvecs[0])
        smallest_eigenvec = eigenvals_eigenvecs[1][smallest_eigenval_index]

        return smallest_eigenvec
 def dtw(i, q_p, b_p, Q, D, inf):
   i0 = T.eq(i, 0)
   # inf = T.cast(1e10,'float32') * T.cast(T.switch(T.eq(self.n,0), T.switch(T.eq(i,0), 0, 1), 1), 'float32')
   penalty = T.switch(T.and_(T.neg(n0), i0), big, T.constant(0.0, 'float32'))
   loop = T.constant(0.0, 'float32') + q_p
   forward = T.constant(0.0, 'float32') + T.switch(T.or_(n0, i0), 0, Q[i - 1])
   opt = T.stack([loop, forward])
   k_out = T.cast(T.argmin(opt, axis=0), 'int32')
   return opt[k_out, T.arange(opt.shape[1])] + D[i] + penalty, k_out
Exemple #15
0
def batch_get_nearest_neighbours(samples, dataset):
    sample = Te.matrix(name="sample")
    data = Te.matrix(name="dataset")
    find_nearest_neighbour = theano.function(
        name="find_nearest_neighbour",
        inputs=[sample],
        outputs=data[Te.argmin(Te.sum((data[:, None, :] - sample)**2, axis=2),
                               axis=0)],
        givens={data: dataset['train']['data']})
    return find_nearest_neighbour(samples)
Exemple #16
0
    def get_min_index(self,origin):
        """ This function computes the cost and the updates for one trainng
        step of the RAE """
        merge_input = self.get_input_values(origin)
        encode = self.get_hidden_values(merge_input)
        decode = self.get_reconstructed_input(encode)

        L = T.sum((0.5*numpy.array(decode-merge_input)**2), axis=1)
        min_index=T.argmin(L)
        return (min_index,merge_input[min_index],encode[min_index])
Exemple #17
0
    def indexing_bilinear(self):
        # Declare variables
        a = tt.dvector()
        b = tt.dscalar()

        # Build symbolic expression
        out = tt.argmin(tt.abs_(a - b))

        # Compile function
        self.tt_index_array = function([a, b], out)
Exemple #18
0
    def __init__(self, input, n_in, n_out):
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = theano.shared(
            value=np.zeros(
                (n_in, n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        # initialize the baises b as a vector of n_out 0s
        self.b = theano.shared(
            value=np.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )

        # keep track of model input
        self.input = input
        # symbolic variable of cost vector in terms of a single example;
        # for a batch of examples, it's a matrix, so don't get confused
        # with the variable name `cost_vector`
        self.cost_vector = T.matrix('cost_vector')
        # symbolic variable of Z_{n,k}
        self.Z_nk = T.matrix('Z_nk')

        self.cost_predicted_given_x = T.dot(self.input, self.W) + self.b

        # elementwise comparison with 0
        self.xi = T.maximum((self.Z_nk * (self.cost_predicted_given_x - self.cost_vector)), 0.)

        # define the linear one-sisded regression loss
        self.one_sided_regression_loss = T.sum(self.xi)

        # symbolic description of how to compute prediction as class whose
        # cost is minimum
        self.y_pred = T.argmin(self.cost_predicted_given_x, axis=1)

        # parameters of the model
        self.params = [self.W, self.b]

        # symbolic variable of labels, will only be used for computing 0/1 errors
        self.y = T.ivector('y')
        # compute the 0/1 loss
        self.error = T.mean(T.neq(self.y_pred, self.y))

        # when a new example comes in, the model first computes (predicts) its
        # cost on classifying into each class (a vector) by
        # `self.cost_predicted_given_x`; then the model will predict this new
        # example as label with the smallest cost;
        # self.future_cost = T.sum(self.cost_vector[T.arange(self.y_pred.shape[0]), self.y_pred])
        self.future_cost = T.mean(self.cost_vector[T.arange(self.y_pred.shape[0]), self.y_pred])
 def init_H(self):
     if not hasattr(self, "_clusters"):
         a = (self.W * tensor.dot(self.W, self._kernel_matrix)).sum(axis=1) \
             - 2.0 * tensor.dot(self._kernel_matrix, self.W.T)
         b = tensor.argmin(a, axis=1)
         self._clusters = function([], b)
     H = .2 * numpy.ones((self._data_size, self._num_latent_topics)).astype(self.W.dtype)
     clusters = self._clusters()
     for i, cluster in enumerate(clusters):
         H[i, cluster] += 1.0
     self.H.set_value(H)
Exemple #20
0
def get_nearest_neighbours(samples, dataset):
    sample = Te.vector(name="sample")
    data = Te.matrix(name="dataset")
    find_nearest_neighbour = theano.function(name="find_nearest_neighbour",
                                             inputs=[sample],
                                             outputs=data[Te.argmin(Te.sum((data - sample) ** 2, axis=1))],
                                             givens={data: dataset['train']['data']})
    neighbours = []
    for s in samples:
        neighbours += [find_nearest_neighbour(s)]
    return neighbours
Exemple #21
0
def get_nearest_neighbours(samples, dataset):
    sample = Te.vector(name="sample")
    data = Te.matrix(name="dataset")
    find_nearest_neighbour = theano.function(
        name="find_nearest_neighbour",
        inputs=[sample],
        outputs=data[Te.argmin(Te.sum((data - sample)**2, axis=1))],
        givens={data: dataset['train']['data']})
    neighbours = []
    for s in samples:
        neighbours += [find_nearest_neighbour(s)]
    return neighbours
Exemple #22
0
 def greed_step(self, vec_len, node_index, seq_index, vectors, path):
     hs, _ = theano.scan(fn=self.compose_step,
                         sequences=T.arange(vec_len - 1),
                         non_sequences=vectors,
                         name="compose_phrase")
     comp_vec = hs[0]
     comp_rec = hs[1]
     min_index = T.argmin(comp_rec)
     T.set_subtensor(vectors[min_index:-1], vectors[min_index + 1:])
     T.set_subtensor(vectors[min_index], comp_vec[min_index])
     T.set_subtensor(path[seq_index],
                     T.concatenate([min_index, min_index + 1, node_index]))
     return vectors, min_index
Exemple #23
0
def asymMSE(y_true, y_pred):
    d = y_true-y_pred                              # Get prediction errors
    mins = T.argmin(y_true,axis=1)                 # Get indices of optimal action
    mins_onehot = T.extra_ops.to_one_hot(mins,5)   # Convert min index to one hot array 
    others_onehot = mins_onehot-1                  # Get the indices of the non-optimal actions, which will be -1's
    d_opt = d*mins_onehot                          # Get the error of the optimal action
    d_sub = d*others_onehot                        # Get the errors of the non-optimal actions
    a = 160*d_opt**2                               # 160 times error of optimal action squared
    b = d_opt**2                                   # 1   times error of optimal action squared
    c = 40*d_sub**2                                # 40  times error of suboptimal actions squared
    d = d_sub**2                                   # 1   times error of suboptimal actions squared
    l = T.switch(d_sub<0,c,d) + T.switch(d_opt<0,a,b) #This chooses which errors to use depending on the sign of the errors
                                                      #If true, use the steeper penalty. If false, use the milder penalty
    return l
Exemple #24
0
    def __init__(self, data, dims, cluster_penalty=2.1):

        self.dims = dims
        self.cluster_penalty = cluster_penalty

        self.n_clusters = theano.shared(value=np.int(1), name='n_clusters')

        self.data = theano.shared(value=data, name="data")

        self.indicators = theano.shared(value=np.ones(self.dims[0],
                                                      dtype='uint32'),
                                        name="indices")

        mu_init = np.zeros((self.dims[0], 3))
        mu_init[1, :] = data.mean(axis=0)

        self.mu = theano.shared(value=mu_init, name="mu")

        t_idx = T.iscalar('t_idx')
        t_vec = T.vector('t_vec')

        self.D_i_c = (self.euclidean_dist(self.data,
                                          self.mu)[:, :self.n_clusters])**2
        self.min_dic = T.min(self.D_i_c, axis=1) > self.cluster_penalty

        self.getDics = theano.function(inputs=[], outputs=self.min_dic)

        self.updateClusters = theano.function(
            inputs=[t_idx],
            updates=[ \
                (self.indicators, T.set_subtensor(self.indicators[t_idx], self.n_clusters)), \
                (self.mu, T.set_subtensor(self.mu[self.n_clusters], self.data[t_idx])), \
                (self.n_clusters, self.n_clusters+1)
            ]
        )

        self.updateIndicators = theano.function(
            inputs=[t_idx],
            updates=[ \
                (self.indicators, T.set_subtensor(self.indicators[t_idx], T.argmin(self.D_i_c[t_idx])))
            ]
        )

        self.getLkFromIdx = theano.function(inputs=[t_idx],
                                            outputs=self.getLk(t_idx))

        self.getMu = theano.function(inputs=[], outputs=self.mu)

        self.getNClusters = theano.function(inputs=[], outputs=self.n_clusters)
Exemple #25
0
def constructMinimalDistanceIndicesVariable(x, y, n, m):
    sDistances = constructSquaredDistanceMatrixVariable(x, y, n, m)
    lamblinsTrick = False
    if lamblinsTrick:
        # https://github.com/Theano/Theano/issues/1399
        # https://gist.github.com/danielvarga/d0eeacea92e65b19188c
        # https://groups.google.com/forum/#!topic/theano-users/E7ProqnGUMk
        s = sDistances
        bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32')
        # This is a heavy-handed workaround for the fact that in
        # lamblin's hack, ties lead to completely screwed results.
        bestIndices = T.clip(bestIndices, 0, n-1)
    else:
        bestIndices = T.argmin(sDistances, axis=0)
    return bestIndices
Exemple #26
0
    def __init__(self, y, uv, params):
        self.layer0_W_y, self.layer0_b_y = params[0]
        self.layer0_W_uv, self.layer0_b_uv = params[1]
        self.layer1_W, self.layer1_b = params[2]
        self.layer2_W = params[3]
        self.layer3_W, self.layer3_b = params[4]
        self.layer4_W, self.layer4_b = params[5]

        poolsize = (2, 2)
        # layer0_y: conv-maxpooling-tanh
        layer0_y_conv = conv.conv2d(input=y, filters=self.layer0_W_y,
                border_mode='full')
        layer0_y_pool = downsample.max_pool_2d(input=layer0_y_conv,
                ds=poolsize, ignore_border=True)
        layer0_y_out = T.tanh(layer0_y_pool + \
                self.layer0_b_y.reshape(1, -1, 1, 1))

        # layer0_uv: conv-maxpooling-tanh
        layer0_uv_conv = conv.conv2d(input=uv, filters=self.layer0_W_uv,
                border_mode='full')
        layer0_uv_pool = downsample.max_pool_2d(input=layer0_uv_conv,
                ds=poolsize, ignore_border=True)
        layer0_uv_out = T.tanh(layer0_uv_pool + \
                self.layer0_b_uv.reshape(1, -1, 1, 1))

        layer1_input = T.concatenate((layer0_y_out, layer0_uv_out), axis=1)

        # layer1: conv-maxpooling-tanh
        layer1_conv = conv.conv2d(input=layer1_input, filters=self.layer1_W,
                border_mode='full')
        layer1_pool = downsample.max_pool_2d(input=layer1_conv,
                ds=poolsize, ignore_border=True)
        layer1_out = T.tanh(layer1_pool + self.layer1_b.reshape(1, -1, 1, 1))

        # layer2: conv
        layer2_out = conv.conv2d(input=layer1_out, filters=self.layer2_W,
                border_mode='valid')

        layer3_input = layer2_out.reshape((256, -1)).dimshuffle(1, 0)

        # layer3: hidden-layer
        layer3_lin = T.dot(layer3_input, self.layer3_W) + self.layer3_b
        layer3_out = T.tanh(layer3_lin)

        # layer4: logistic-regression
        layer4_out = T.nnet.softmax(T.dot(layer3_out, self.layer4_W) + \
                self.layer4_b)
        self.pred = T.argmin(layer4_out, axis=1)
def straight_through(p, u):


    sts = StraightThroughSampler()

    cum = T.extra_ops.cumsum(p, axis = 1) - T.addbroadcast(T.reshape(u, (u.shape[0], 1)),1)

    cum = T.switch(T.lt(cum, 0.0), 10.0, cum)

    ideal_bucket = T.argmin(cum, axis = 1)

    one_hot = T.extra_ops.to_one_hot(ideal_bucket, 4)

    y = sts(p, one_hot)

    return y
Exemple #28
0
    def __init__(self, cooccurrence, z_k):
        eps = 1e-9
        self.z_k = z_k
        n = cooccurrence.shape[0]
        self.n = n
        h = cooccurrence.astype(np.float32)
        p = h / np.sum(h, axis=None)
        pc = T.constant(p, name="p")

        z_init = np.random.random_integers(0, z_k - 1, (n, ))
        zt = theano.shared(z_init)
        self.z_shared = zt

        idx = T.iscalar(name='idx')
        c = T.zeros((z_k, n), dtype='float32')  # (z_k, n)
        c = T.set_subtensor(c[zt, T.arange(c.shape[1])], 1)
        c = T.set_subtensor(c[zt[idx], idx], 0)

        p_yz0 = T.dot(c, pc)  # (z_k, n) x (n, n) = (z_k, n)
        marg0 = T.sum(p_yz0, axis=1, keepdims=True)
        cond0 = p_yz0 / (eps + marg0)
        ent0 = -T.sum(p_yz0 * T.log(eps + cond0), axis=1)  # (z_k,)
        sum0 = T.sum(ent0, axis=0)

        p_yz1 = p_yz0 + (pc[idx, :].dimshuffle(('x', 0)))
        marg1 = T.sum(p_yz1, axis=1, keepdims=True)
        cond1 = p_yz1 / (eps + marg1)
        ent1 = -T.sum(p_yz1 * T.log(eps + cond1), axis=1)  # (z_k,)

        ed = ent1 - ent0  # (z_k)
        sel = T.argmin(ed)  # (scalar,)
        ztn = T.set_subtensor(zt[idx], sel)
        entn = sum0 + (ed[sel])
        changed = T.neq(sel, zt[idx])
        updates = [(zt, ztn)]

        self.train_fun = theano.function([idx], [entn, changed],
                                         updates=updates)

        c = T.zeros((z_k, n), dtype='float32')  # (z_k, n)
        c = T.set_subtensor(c[zt, T.arange(c.shape[1])], 1)
        p_yz = T.dot(c, pc)  # (z_k, n) x (n, n) = (z_k, n)
        marg = T.sum(p_yz, axis=1, keepdims=True)
        cond = p_yz / (eps + marg)
        ent = -T.sum(p_yz * T.log(eps + cond), axis=None)

        self.val_fun = theano.function([], ent)
Exemple #29
0
    def apply(self, y_hat):
        # reshape 1d vector to 2d matrix
        y_hat_2d = y_hat.reshape((y_hat.shape[0] / self.examples_group_size,
                                  self.examples_group_size))
        #y_hat_2d = tt.printing.Print("Y hat 2d in correct rank: ")(y_hat_2d)

        # sort each group by relevance
        # we sort the responses in decreasing order, that is why we multiply y_hat by -1
        sorting_indices = tt.argsort(-1 * y_hat_2d, axis=1)
        #sorting_indices = tt.printing.Print("sorting indices in correct rank: ")(sorting_indices)

        # check where is the ground truth whose index should be 0 in the original array
        correct_rank = tt.argmin(sorting_indices, axis=1) + 1
        #correct_rank = tt.printing.Print("correct rank: ")(correct_rank)
        correct_rank.name = "correct_rank"

        return correct_rank
        def recurrence(x, cur_time, prev_hidden):
            act_modules = modules - T.argmin((cur_time % p)[::-1])

            update_indices = act_modules * sizeof_mod

            w_subtensor = self.wh[:update_indices]
            b_subtensor = self.bh[:update_indices]

            inp_updates = T.dot(x, self.wi)[:update_indices]
            pre_new_h = T.dot(w_subtensor,
                              prev_hidden) + inp_updates + b_subtensor
            new_h = T.set_subtensor(prev_hidden[:update_indices],
                                    h_nonlinearity(pre_new_h))

            out = nonlinearity(T.dot(new_h, self.wo) + self.bo)

            return (cur_time + 1.0), new_h, out
Exemple #31
0
    def find_perturb(perturbation):
        logits_os = model(inputs + (1 + over_shoot) * perturbation)
        y_pred = T.argmax(logits_os, axis=1)
        is_mistake = T.neq(y_pred, labels)
        current_ind = batch_indices[(1 - is_mistake).nonzero()]
        should_stop = T.all(is_mistake)

        # continue generating perturbation only for correctly classified
        inputs_subset = inputs[current_ind]
        perturbation_subset = perturbation[current_ind]
        labels_subset = labels[current_ind]
        batch_subset = T.arange(inputs_subset.shape[0])

        x_adv = inputs_subset + perturbation_subset
        logits = model(x_adv)
        corrects = logits[batch_subset, labels_subset]
        jac = jacobian(logits, x_adv, num_classes)

        # deepfool
        f = logits - T.shape_padright(corrects)
        w = jac - T.shape_padaxis(jac[batch_subset, labels_subset], axis=1)
        reduce_ind = range(2, inputs.ndim + 1)
        if norm == 'l2':
            dist = T.abs_(f) / w.norm(2, axis=reduce_ind)
        else:
            dist = T.abs_(f) / T.sum(T.abs_(w), axis=reduce_ind)
        # remove correct targets
        dist = T.set_subtensor(dist[batch_subset, labels_subset],
                               T.constant(np.inf))
        l = T.argmin(dist, axis=1)
        dist_l = dist[batch_subset, l].dimshuffle(0, 'x', 'x', 'x')
        # avoid numerical instability and clip max value
        if clip_dist is not None:
            dist_l = T.clip(dist_l, 0, clip_dist)
        w_l = w[batch_subset, l]
        if norm == 'l2':
            reduce_ind = range(1, inputs.ndim)
            perturbation_upd = dist_l * w_l / w_l.norm(
                2, reduce_ind, keepdims=True)
        else:
            perturbation_upd = dist_l * T.sgn(w_l)
        perturbation = ifelse(
            should_stop, perturbation,
            T.inc_subtensor(perturbation[current_ind], perturbation_upd))
        return perturbation, scan_module.until(should_stop)
Exemple #32
0
    def __init__(self,
                 weights,
                 neurons_topology,
                 learning_rate=0.1,
                 learning_rate_decay=0.985,
                 collaboration_sigma=1.0,
                 collaboration_sigma_decay=0.95,
                 verbosity=2):

        self._verbosity = verbosity
        self._history = []
        self.neurons_number = weights.shape[0]
        self.W_shar_mat = theano.shared(weights)
        self.D_shar_mat = theano.shared(neurons_topology)

        self.collaboration_sigma = theano.shared(collaboration_sigma)
        self.collaboration_sigma_decay = collaboration_sigma_decay

        self.x_row = T.vector("exemplar")
        self.x_mat = T.matrix("batch")

        self.learning_rate = theano.shared(learning_rate)
        self.learning_rate_decay = learning_rate_decay

        self.distance_from_y_row = ((T.sub(self.W_shar_mat,
                                           self.x_row)**2).sum(axis=1))
        self.closest_neuron_idx = T.argmin(self.distance_from_y_row)
        self.distances_from_closest_neuron = self.D_shar_mat[
            self.closest_neuron_idx]
        self.affinities_to_closest_neuron = T.exp(
            -self.distances_from_closest_neuron /
            (self.collaboration_sigma)**2)

        self.smoothed_distances_from_closest_neuron = T.mul(
            self.distance_from_y_row,
            G.disconnected_grad(self.affinities_to_closest_neuron))
        self.cost_scal = self.smoothed_distances_from_closest_neuron.sum()

        self.updates = sgd(self.cost_scal, [self.W_shar_mat],
                           learning_rate=self.learning_rate)
        self.update_neurons = theano.function([self.x_row],
                                              self.cost_scal,
                                              updates=self.updates)
Exemple #33
0
    def min_risk_choice(Posterior):

        #The Loss function is a function of the predictiveness profiles
        Preds = predictiveness_profiles(Models, K, num_M)
        
        Loss = ifelse(T.eq(Choice_type, 1), T.pow(1.0 - Preds,2), ifelse(T.eq(Choice_type, 2), T.abs_(1.0 - Preds), - Preds))             
        
        #Kroneckering Loss up num_Obs times (tile Loss, making it num_M by num_M*num_Obs)
        Loss = kron(T.ones((1,num_Obs)), Loss)        
        #Kroneckering up the Posterior, making it num_M by num_Obs*numM
        Posterior = kron(Posterior, T.ones((1,num_M)))

        #Dotting and reshaping down to give num_M by num_Obs expected loss matrix
        Expected_Loss = T.dot(T.ones((1,num_M)),Posterior*Loss)            
        Expected_Loss = T.reshape(Expected_Loss, (num_Obs,num_M)).T
        
        #Choice minimizes risk
        Choice = T.argmin(Expected_Loss, axis = 0) 
        return Choice 
Exemple #34
0
    def min_risk_choice(Posterior):

        #The Loss function is a function of the predictiveness profiles
        Preds = predictiveness_profiles(Models, K, num_M)

        Loss = ifelse(
            T.eq(Choice_type, 1), T.pow(1.0 - Preds, 2),
            ifelse(T.eq(Choice_type, 2), T.abs_(1.0 - Preds), -Preds))

        #Kroneckering Loss up num_Obs times (tile Loss, making it num_M by num_M*num_Obs)
        Loss = kron(T.ones((1, num_Obs)), Loss)
        #Kroneckering up the Posterior, making it num_M by num_Obs*numM
        Posterior = kron(Posterior, T.ones((1, num_M)))

        #Dotting and reshaping down to give num_M by num_Obs expected loss matrix
        Expected_Loss = T.dot(T.ones((1, num_M)), Posterior * Loss)
        Expected_Loss = T.reshape(Expected_Loss, (num_Obs, num_M)).T

        #Choice minimizes risk
        Choice = T.argmin(Expected_Loss, axis=0)
        return Choice
 def getDeployFunction(self, cr):
     from algorithms.algorithm import beam_search, greed
     print "Compiling computing graph."
     get_question_hidden = theano.function([self.question, self.question_mask],
                                      self.last_hidden_state,
                                      name='get_question_hidden')
     _, pred_word_probability = self.softmax_layer.getOutput(self.last_decoder_hidden)
     
     self.last_decoder_hidden
     self.tparams['Wemb']
     
     recons_v = self.tparams['recons_v']
     recons_b = self.tparams['recons_b']
     
     recons_b = recons_b.dimshuffle(['x', 0])
     
     media_h = T.dot(self.tparams['Wemb'], recons_v) + recons_b
     
     recons_h_error_L = T.tanh(media_h) - T.addbroadcast(self.last_decoder_hidden, 0)
     recons_h_error_L = T.sqr(recons_h_error_L).sum(axis=1) 
     recons_h_error_L = recons_h_error_L / self.options['hidden_dim']
     error = -T.log(pred_word_probability) + recons_h_error_L
     score = T.exp(-error)
     pred_word = T.argmin(error)
     
     deploy_model = theano.function(inputs=[self.answer, self.answer_mask, self.last_hidden_state],
                                    outputs=[pred_word, score],
                                    allow_input_downcast=True)
     print "Compiled."
     def dm(sentence):
         print "feed %s: " % sentence
         (x, x_mask) = cr.transformInputData(sentence)
         x = x[:-1]
         x_mask = x_mask[:-1]
         last_s = get_question_hidden(x, x_mask)
         def f(y, y_mask):
             return deploy_model(y, y_mask, last_s)
         return beam_search('', cr, f)
     return dm
Exemple #36
0
    def __SOM(self, X, W, n):

        learning_rate_op = T.exp(-1. * self.som_lr * n)
        _alpha_op = self.alpha * learning_rate_op
        _sigma_op = self.sigma * learning_rate_op

        locations = self.locs
        maps = T.sub(X, W)
        measure = T.sum(T.pow(T.sub(X, W), 2), axis=1)
        err = measure.min()
        self.bmu_index = T.argmin(measure)
        bmu_loc = locations[self.bmu_index]
        dist_square = T.sum(T.square(T.sub(locations, bmu_loc)), axis=1)
        H = T.cast(T.exp(-dist_square / (2 * T.square(_sigma_op))),
                   dtype=theano.config.floatX)
        w_update = W + _alpha_op * \
            T.tile(H, [self.latent_size, 1]).T * maps
        Qs = self.__soft_probs(X, W)
        P = Qs**2 / Qs.sum()
        P = (P.T / P.sum()).T
        cost = self.__kld(P, Qs)
        return [err, cost, bmu_loc], {W: w_update}
Exemple #37
0
    def generate_optimize_basis():
        # original solution
        tx0 = partial.x
        # optimized solution
        tx1 = T.dot(tl.matrix_inverse(T.dot(partial.A.T, partial.A)),
                    T.dot(partial.A.T, y) - gamma/2*partial.theta)

        # investigate zero crossings between tx0 and tx1
        tbetas = tx0 / (tx0 - tx1)
        # investigate tx1
        tbetas = T.concatenate([tbetas, [1.0]])
        # only between tx0 and inclusively tx1
        tbetas = tbetas[(T.lt(0, tbetas) * T.le(tbetas, 1)).nonzero()]

        txbs, _ = theano.map(lambda b: (1-b)*tx0 + b*tx1, [tbetas])
        tlosses, _ = theano.map(loss, [txbs])
        # select the optimum
        txb = txbs[T.argmin(tlosses)]

        return theano.function([tpart, full.x, full.theta],
                               [T.set_subtensor(partial.x,     txb),
                                T.set_subtensor(partial.theta, T.sgn(txb))])
Exemple #38
0
    def __build_measure(self): 
       
        #print(self.archetypes.shape.eval())
        #print(self.layers_bw[-1].shape.eval())

        ### FW
        E_fw = T.mean(self.__energy(self.layers_fw, self.weights_fw, self.weights_bw, self.biases_fw))
        C_fw = T.mean(self.__cost(self.layers_fw[-1], direction="fw"))
        y_prediction = T.argmax(self.layers_fw[-1], axis=1)
        # Error count for y-error
        error_fw        = T.mean(T.neq(y_prediction, self.y_data))

        ### BW
        #print([layer.shape.eval() for layer in self.layers_fw])
        E_bw = T.mean(self.__energy(self.layers_bw, self.weights_bw, self.weights_fw, self.biases_bw))
        # IDENTIFY CLOSEST ARCHETYPE W/ MINIMAL SUM-SQUARED DISTANCE        
        # CLOSEST ARCHETYPE IS THE SAME AS THE Y-LABEL
        arches_reshaped = T.reshape(self.archetypes, [1, D*D, D*D])
        output_reshaped = T.reshape(self.layers_bw[-1], [self.batch_size, 1, D*D])

        sum_squared = ((arches_reshaped - output_reshaped) ** 2).sum(axis=2)

        closest_arch = T.argmin(sum_squared, axis=1)  
        #closest_arch_one_hot = T.extra_ops.to_one_hot(closest_arch, D*D)
        #C_bw = T.mean(self.__cost(closest_arch_one_hot)) 
        
        C_bw = T.mean(self.__cost(self.layers_bw[-1], direction="bw"))
        
        # Index of closest archetype for x-error
        error_bw        = T.mean(T.neq(closest_arch, self.y_data))

        measure = theano.function(
            inputs=[],
            outputs=[E_fw, C_fw, error_fw, E_bw, C_bw, error_bw]#, closest_arch, self.y_data]
        )

        return measure
def generate_functions(A, y, gamma):
    tA = T.matrix('A')
    ty = T.vector('y')
    tx = T.vector('x')
    ttheta = T.vector('theta')
    
    tx0 = T.vector('x0')
    tx1 = T.vector('x1')
    tbetas = T.vector('betas')
    
    error = lambda x: T.sum((T.dot(tA, x) - ty)**2)
    derror = lambda x: T.grad(error(x), x)
    penalty = lambda x: x.norm(1)
    loss = lambda x: error(x) + penalty(x)

    entering_index = T.argmax(abs(derror(tx)))
    txs, _ = theano.map(lambda b, x0, x1: (1-b)*x0 + b*x1,
                        [tbetas], [tx0, tx1])

    return {
        "select_entering": theano.function([tx],
                                           [entering_index, derror(tx)[entering_index]],
                                           givens = {tA: A, ty: y}),
        "qp_optimum": theano.function([tA, ttheta],
                                      T.dot(T.inv(T.dot(tA.T, tA)), T.dot(tA.T, ty) - gamma/2*ttheta),
                                      givens = {ty: y}),
        "txs": theano.function([tbetas, tx0, tx1], txs),
        "select_candidate": theano.function([tA, tbetas, tx0, tx1],
                                            txs[T.argmin(theano.map(loss, [txs])[0])],
                                            givens = {ty: y}),
        "optimal_nz": theano.function([tA, tx],
                                      derror(tx) + gamma*T.sgn(tx),
                                      givens = {ty: y}),
        "optimal_z": theano.function([tA, tx],
                                     abs(derror(tx)),
                                     givens = {ty: y}),
        }
Exemple #40
0
def argmin(x):
    return T.argmin(x)
Exemple #41
0
def argmin(x, dim = None, keep = False):
  return T.argmin(x, axis = dim, keepdims = keep)
    def __call__(self, X, termination_criterion, initial_H=None):
        """
            Compute for each sample its representation.

            Parameters
            ----------
            X : Sample matrix. numpy.ndarray
            termination_criterion: pylearn TerminationCriterion object
            initial_H: Numpy matrix.

            Returns
            -------
            H: H matrix with the representation.
        """

        dataset_size = X.shape[0]

        H = None
        if initial_H is not None:
            if H.shape[0] == dataset_size and H.shape[1] == self._num_latent_topics:
                H = initial_H

        if H is None:
            if not hasattr(self, "predict_clusters"):
                h = tensor.matrix(name="h")
                x = tensor.matrix(name="x")
                kxb = self._kernel(x, self._budget)
                a = (self.W * tensor.dot(self.W, self._kernel_matrix)).sum(axis=1) \
                    - 2.0 * tensor.dot(kxb, self.W.T)
                b = tensor.argmin(a, axis=1)
                self.predict_clusters = function([x], b)

            H = .2 * numpy.ones((self._data_size, self._num_latent_topics)).astype(self.W.dtype)
            clusters = self.predict_clusters(X)
            for i, cluster in enumerate(clusters):
                H[i, cluster] += 1.0

        if not hasattr(self, "predict_representation"):
            h = tensor.matrix(name="h")
            x = tensor.matrix(name="x")
            kxb = self._kernel(x, self._budget)
            kxbp = 0.5 * (numpy.abs(kxb) + kxb)
            kxbn = 0.5 * (numpy.abs(kxb) - kxb)
            a = tensor.dot(h, tensor.dot(self.W, self.kbn))
            b = tensor.dot(kxbp + a, self.W.T)
            c = tensor.dot(h, tensor.dot(self.W, self.kbp))
            d = tensor.dot(kxbn + c, self.W.T)
            e = h * tensor.sqrt(b / (d + self.lambda_vals))
            f = tensor.maximum(e, eps)
            self.predict_representation = function([x, h], f)

        keep_training = True
        if not isfinite(H):
            raise Exception("NaN or Inf in H")

        while keep_training:
            H = self.predict_representation(X, H)
            if not isfinite(H):
                raise Exception("NaN or Inf in H")
            keep_training = termination_criterion.continue_learning(self)

        return H
Exemple #43
0
def argmin(x, axis=None, keepdims=False):
    return T.argmin(x, axis=axis, keepdims=keepdims)
def main(model='mlp', num_epochs=500):
    # Load the dataset
    print("Loading data...")
    num_per_class = 100
    print("Using %d per class" % num_per_class) 
    
    X_train, y_train, X_test, y_test = load_data("/X_train.npy", "/Y_train.npy", "/X_test.npy", "/Y_test.npy")
    X_train_final = []
    y_train_final = []
    for i in range(10):
        X_train_class = X_train[y_train == i]
        # permutated_index = np.random.permutation(X_train_class.shape[0])
        permutated_index = np.arange(X_train_class.shape[0])
        X_train_final.append(X_train_class[permutated_index[:100]])
        y_train_final += [i] * num_per_class
    X_train = np.vstack(X_train_final)
    y_train = np.array(y_train_final, dtype = np.int32) 
    
    X_train = extend_image(X_train, 40)
    X_test = extend_image(X_test, 40)
    #X_train, y_train, X_test, y_test = load_data("/cluttered_train_x.npy", "/cluttered_train_y.npy", "/cluttered_test_x.npy", "/cluttered_test_y.npy", dataset = "MNIST_CLUTTER")

    # Prepare Theano variables for inputs and targets
    nRotation = 8
    
    # The dimension would be (nRotation * n, w, h)
    input_var = T.tensor4('inputs')

    # The dimension would be (n, )
    vanilla_target_var = T.ivector('vanilla_targets')
    # The dimension would be (nRotation * n , )
    target_var = T.ivector('targets')

    # Create neural network model (depending on first command line parameter)
    
    network = build_cnn(input_var)
    
    saved_weights = np.load("../data/mnist_Chi_dec_100.npy")
    lasagne.layers.set_all_param_values(network, saved_weights)

    # Create a loss expression for training, i.e., a scalar objective we want
    # to minimize (for our multi-class problem, it is the cross-entropy loss):
    # The dimension would be (nRotation * n, 10)
    predictions = lasagne.layers.get_output(network)

    # The dimension would be (nRotation * n, 10)
    one_hot_targets = T.extra_ops.to_one_hot(target_var, 10)

    # rests = [T.reshape(predictions[i][(1 - one_hot_targets).nonzero()], (-1, 9)) for i in range(nRotation)]
    
    rests = T.reshape(predictions[(1 - one_hot_targets).nonzero()], (nRotation, -1, 9))
    # a list of $nRotation tensor, each tensor is of shape (n, 1)
    rests = [T.max(rests[i], axis = 1) for i in range(nRotation)]

    latent_vector = T.argmin(T.as_tensor_variable(rests), axis = 0)

    # selected_index is a (n, nRotation) matrix of binary values.
    selected_index = T.extra_ops.to_one_hot(latent_vector, nRotation)

    predictions = T.reshape(predictions, (nRotation, -1, 10))

    predictions = T.as_tensor_variable(predictions).swapaxes(0, 1)

    predictions = predictions[selected_index.nonzero()]

    # loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
    loss = lasagne.objectives.multiclass_hinge_loss(predictions, vanilla_target_var)
    loss = loss.mean()
    # We could add some weight decay as well here, see lasagne.regularization.

    # Create update expressions for training, i.e., how to modify the
    # parameters at each training step. Here, we'll use Stochastic Gradient
    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
    params = lasagne.layers.get_all_params(network, trainable=True)
    # updates = lasagne.updates.nesterov_momentum(
    #         loss, params, learning_rate=0.01, momentum=0.9)
    updates = lasagne.updates.adagrad(loss, params, learning_rate = 0.01)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network,
    # disabling dropout layers.
    test_prediction = lasagne.layers.get_output(network, deterministic=True)
    test_prediction = T.reshape(test_prediction,(nRotation, -1, 10))
    test_prediction_max = test_prediction.max(axis = 2)
    rotation_index = T.extra_ops.to_one_hot(T.argmax(test_prediction_max, axis = 0), nRotation)
    test_prediction = test_prediction.swapaxes(0, 1)[rotation_index.nonzero()]
    
    test_loss = lasagne.objectives.multiclass_hinge_loss(test_prediction, vanilla_target_var)
    test_loss = test_loss.mean()
    # As a bonus, also create an expression for the classification accuracy:
    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), vanilla_target_var),
                      dtype=theano.config.floatX)

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_fn = theano.function([input_var, target_var, vanilla_target_var], loss, updates=updates)

    # Compile a second function computing the validation loss and accuracy:
    val_fn = theano.function([input_var, vanilla_target_var], [test_loss, test_acc])

    # Finally, launch the training loop.
    print("Starting training...")
    # We iterate over epochs:
    for epoch in range(num_epochs):
        # In each epoch, we do a full pass over the training data:
        train_err = 0
        train_batches = 0
        start_time = time.time()
        for batch in iterate_minibatches(X_train, y_train, 100, shuffle=True):
            inputs, targets = batch
            inputs = inputs.reshape(100, 40, 40)
            inputs = rotateImage_batch(inputs, nRotation).reshape(100 * nRotation, 1, 40, 40)
            duplicated_targets = np.array([targets for i in range(nRotation)]).reshape(100 * nRotation,)
            train_err += train_fn(inputs, duplicated_targets, targets)
            train_batches += 1


        # Then we print the results for this epoch:
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time))
        print("  training loss:\t\t{:.6f}".format(train_err / train_batches))

        if epoch % 5 == 0: 
           # After training, we compute and print the test error:
            test_err = 0
            test_acc = 0
            test_batches = 0
            for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
                inputs, targets = batch
                inputs = inputs.reshape(500, 40, 40)
                inputs = rotateImage_batch(inputs, nRotation).reshape(500 * nRotation, 1, 40, 40)
                err, acc = val_fn(inputs, targets)
                test_err += err
                test_acc += acc
                test_batches += 1
            print("Final results:")
            print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches))
            print("  test accuracy:\t\t{:.2f} %".format(
                test_acc / test_batches * 100))

            # Optionally, you could now dump the network weights to a file like this:
            # np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
            #
            # And load them again later on like this:
            # with np.load('model.npz') as f:
            #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
            # lasagne.layers.set_all_param_values(network, param_values)
    weightsOfParams = lasagne.layers.get_all_param_values(network)
    #np.save("../data/mnist_clutter_CNN_params_sigmoid.npy", weightsOfParams)
    #np.save("../data/mnist_CNN_params_sigmoid.npy", weightsOfParams)
    #np.save("../data/mnist_CNN_params.npy", weightsOfParams)
    #np.save("../data/mnist_CNN_params_drop_out_semi_Chi_Dec7.npy", weightsOfParams)
    np.save("../data/mnist_CNN_params_drop_out_Chi_2017.npy", weightsOfParams)
n = 5000 # number of candidates
m = 1000 # number of targets
f = 500  # number of features

x = T.matrix('x') # candidates
y = T.matrix('y') # targets

xL2S = T.sum(x*x, axis=-1) # [n]
yL2S = T.sum(y*y, axis=-1) # [m]
xL2SM = T.zeros((m, n)) + xL2S # broadcasting, [m, n]
yL2SM = T.zeros((n, m)) + yL2S # # broadcasting, [n, m]
squaredPairwiseDistances = xL2SM.T + yL2SM - 2.0*T.dot(x, y.T) # [n, m]

np.random.seed(1)

N = randomMatrix(n, f)
M = randomMatrix(m, f)

lamblinsTrick = True

if lamblinsTrick:
    # from https://github.com/Theano/Theano/issues/1399
    s = squaredPairwiseDistances
    bestIndices = T.cast( ( T.arange(n).dimshuffle(0, 'x') * T.cast(T.eq(s, s.min(axis=0, keepdims=True)), 'float32') ).sum(axis=0), 'int32')
else:
    bestIndices = T.argmin(squaredPairwiseDistances, axis=0)

nearests_fn = theano.function([x, y], bestIndices, profile=True)

print nearests_fn(N, M).sum()
Exemple #46
0
 def min_dist(self, data):
     return T.argmin(T.sqrt(T.sum((data - self.W)**2, 1)))
Exemple #47
0
def buildAttenBased(d_in, d_out, LR, alpha):
    # fw_lstm_doc:  forward LSTM of document encoding
    # bw_lstm_doc: backward LSTM of document encoding
    # fw_lstm_que:  forward LSTM of question encoding
    # bw_lstm_que: backward LSTM of question encoding
    # y: words encoding, total t words, so t vectors
    # u: question encoding, only one vector

    x_seq = T.matrix('x_seq')
    q_seq = T.matrix('q_seq')

    fw_lstm_doc = LSTM(d_in, d_out, LR, alpha, 0, x_seq)
    bw_lstm_doc = LSTM(d_in, d_out, LR, alpha, 0, x_seq[::-1])
    fw_lstm_que = LSTM(d_in, d_out, LR, alpha, 0, q_seq)
    bw_lstm_que = LSTM(d_in, d_out, LR, alpha, 0, q_seq[::-1])

    y = T.concatenate([
            fw_lstm_doc.output_encoding(),
            bw_lstm_doc.output_encoding()[::-1] 
            ],
            axis=1)
    u = T.concatenate([ 
            [fw_lstm_que.output_encoding()[-1]],
            [bw_lstm_que.output_encoding()[-1]] 
            ],
            axis=1)

    Wym = shared_uniform("Wym", 2*d_out, 2*d_out)
    Wum = shared_uniform("Wum", 2*d_out, 2*d_out)
    wms = shared_uniform("wms", 2*d_out)
    Wrg = shared_uniform("Wrg", 2*d_out, 2*d_out)
    Wug = shared_uniform("Wug", 2*d_out, 2*d_out)
    
    yT = T.transpose(y)
    uT = T.transpose(u)
    um = T.dot(Wum,uT)
    m = T.tanh( T.dot(Wym, yT) + um ) 
    
    # s is a vector (t,)
    mT = T.transpose(m)
    s = T.exp( T.sum(wms*mT, axis = 1) )
    s = s/T.sqrt(T.sum(s**2))
    
    # r is a vector (2*d_out,)
    # ug is (1, 2*d_out)
    # g is a vector (2*d_out,)
    r = T.dot(T.transpose(y), s)
    ug = T.transpose(T.dot(Wug,uT))
    g = T.sum( T.tanh( T.dot(Wrg,r) + ug ), axis = 0)

    g_hat = T.vector('g_hat')

    ### Cost Function ###
    cost = T.sum((g-g_hat)**2)

    params = [Wym, Wum, wms, Wrg, Wug]
    params.extend(fw_lstm_doc.params)
    params.extend(bw_lstm_doc.params)
    params.extend(fw_lstm_que.params)
    params.extend(bw_lstm_que.params)
    
    ### Calclate Gradients ###
    gradients = T.grad(cost, params)

    ### Model Functions ###
    train_model = theano.function(
        inputs = [x_seq, q_seq, g_hat],
        updates = fw_lstm_doc.rmsprop(params, gradients, LR),
        outputs = [cost],
        allow_input_downcast = True
        )

    test_model = theano.function(
        inputs = [x_seq, q_seq],
        outputs = g,
        allow_input_downcast = True
        )

    A = T.vector('A_Opt')
    B = T.vector('B_Opt')
    C = T.vector('C_Opt')
    D = T.vector('D_Opt')

    ser = [ T.sum((g-A)**2),T.sum((g-B)**2),
            T.sum((g-C)**2),T.sum((g-D)**2) ]

    opt = T.argmin(ser)
    testAns_model = theano.function(
        inputs = [x_seq, q_seq, A, B, C, D],
        outputs = opt,
        allow_input_downcast = True
        )
        
    return train_model, test_model, testAns_model
w_2 = init_weights((h_size, y_size))


# Forward propagation
yhat   = forwardprop(X, w_1, w_2)

# Backward propagation
cost    = T.mean(T.nnet.categorical_crossentropy(yhat, Y))
params  = [w_1, w_2]
updates = backprop(cost, params)



# Train and predict
train   = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
pred_y  = T.argmin(yhat, axis=1)
predict = theano.function(inputs=[X], outputs=pred_y, allow_input_downcast=True)



# Run SGD
"""for iter in range(500):
    train(train_X, train_y)
    train_accuracy = np.mean(np.argmax(train_y, axis=1) == predict(train_X))
    test_accuracy  = np.mean(np.argmax(test_y, axis=1) == predict(test_X))
    print predict(test_X)
    print("Iteration = %d, train accuracy = %.2f%%, test accuracy = %.2f%%"
            % (iter + 1, 100 * train_accuracy, 100 * test_accuracy))
    break"""
          
train(train_X, train_y)
Exemple #49
0
def argmin(x, axis=-1):
    return T.argmin(x, axis=axis, keepdims=False)
Exemple #50
0
	def _match(self, sample):
		diff = (T.sqr(self.codebook)).sum(axis = 1, keepdims = True) + (T.sqr(sample)).sum(axis = 1, keepdims = True) - 2 * T.dot(self.codebook, sample.T)
		bmu = T.argmin(diff)
		err = T.min(diff)
		return err, bmu
def constructMinimalDistancesVariable(x, y, initials, n, m):
    sDistances = constructSquaredDistanceMatrixVariable(x, y, n, m)
    bestIndices = T.argmin(sDistances, axis=0)
    bestXes = x[bestIndices]
    bestInitials = initials[bestIndices]
    return bestXes, bestInitials
Exemple #52
0
 def get_time_range(self, train):
     mask = self.get_input_mask(train=train)
     ind = T.switch(T.eq(mask[:, -1], 1.), mask.shape[-1], T.argmin(mask, axis=-1)).astype('int32')
     self.time_range = ind
     return ind
Exemple #53
0
def argmin(x, axis=-1):
    return T.argmin(x, axis=axis, keepdims=False)
Exemple #54
0
 def pred(self, output):
     W1 = 0.5
     W2 = 0.5
     pred = T.argmin(W1 * T.arccos(T.dot(output, mappings_vec.T)/(absolute(output)*absolute(mappings_vec, axis=1))) +
                     W2 * (absolute(mappings_vec, axis=1) - absolute(output)))
     return pred