Example #1
0
def unet_crossentropy_loss_sampled(y_true, y_pred):
    print 'unet_crossentropy_loss_sampled'
    epsilon = 1.0e-4
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0] # no idea why this is a tuple
    indNeg = T.nonzero(1-y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]
    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1-y_true)]), dtype='int64')

    indPos = indPos[:n_samples]
    indNeg = indNeg[:n_samples]
    loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
    average_loss = T.mean(loss_vector)
    print 'average_loss:', average_loss
    return average_loss
Example #2
0
File: nnet2.py Project: mufan-li/sg
	def nll2(self, y):
		# for predicting whether a course is taken
		return -T.mean(
				T.log(self.output)[T.nonzero(y)]
			) - T.mean(
				T.log(1 - self.output)[T.nonzero(1 - y)]
			) 
Example #3
0
def unet_crossentropy_loss_sampled(y_true, y_pred):
    print 'unet_crossentropy_loss_sampled'
    epsilon = 1.0e-4
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0]  # no idea why this is a tuple
    indNeg = T.nonzero(1 - y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]
    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]),
                       dtype='int64')

    indPos = indPos[:n_samples]
    indNeg = indNeg[:n_samples]
    loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(
        T.log(1 - y_pred_clipped[indNeg]))
    average_loss = T.mean(loss_vector)
    print 'average_loss:', average_loss
    return average_loss
Example #4
0
def prepare_loss(inputlayer, outlayer, pairs, types, loss_function,
                 entropy_penalty=0, V=None, lamb=-1, train_pass=False):
    # reshape to 2d before sending through the network,
    # after which the original shape is recovered
    output = outlayer.output(
        {inputlayer: pairs.reshape((-1, pairs.shape[-1]))},
        train_pass=train_pass).reshape((pairs.shape[0], 2, -1))

    x1, x2 = output[:,0], output[:,1]
    cost = loss_function(x1, x2, types)
    same_loss = cost[T.nonzero(types)].mean()
    diff_loss = cost[T.nonzero(1 - types)].mean()

    if lamb >= 0:
        cost = 1 / (lamb + 1) * same_loss + lamb / (lamb + 1) * diff_loss
    else:
        cost = cost.mean()

    ent = entropy_loss(x1, x2)
    total_cost = cost + entropy_penalty * ent

    if V is not None:
        return total_cost, cost, same_loss, diff_loss, ent, calculate_spread(V)
    else:
        return total_cost, cost, same_loss, diff_loss, ent
Example #5
0
def unet_crossentropy_loss_sampled(y_true, y_pred):
    # weighted version of pixel-wise crossrntropy loss function
    alpha = 0.6
    epsilon = 1.0e-5
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0 - epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0]  # no idea why this is a tuple
    indNeg = T.nonzero(1 - y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]

    # take equal number of samples depending on which class has less
    n_samples = T.cast(T.min([T.sum(y_true), T.sum(1 - y_true)]),
                       dtype='int64')
    # indPos = indPos[:n_samples]
    # indNeg = indNeg[:n_samples]

    total = np.float64(patchSize_out * patchSize_out * patchZ_out)
    loss_vector = ifelse(
        T.gt(n_samples, 0),
        # if this patch has positive samples, then calulate the first formula
        (-alpha * T.sum(T.log(y_pred_clipped[indPos])) -
         (1 - alpha) * T.sum(T.log(1 - y_pred_clipped[indNeg]))) / total,
        -(1 - alpha) * T.sum(T.log(1 - y_pred_clipped[indNeg])) / total)

    average_loss = T.mean(loss_vector) / (1 - alpha)
    return average_loss
Example #6
0
def categorical_crossentropy_segm(prediction_proba, targets):
    '''
    MODIFICATIONS:
        - reshape from image-size to array and back
    '''
    shape = T.shape(prediction_proba)
    pred_mod1 = T.transpose(prediction_proba, (0,2,3,1))
    pred_mod = T.reshape(pred_mod1, (-1,shape[1]))
    if prediction_proba.ndim == targets.ndim:
        targ_mod1 = T.transpose(targets,(0,2,3,1))
        targ_mod = T.reshape(targ_mod1,(-1,shape[1]))
    else:
        targ_mod = T.reshape(targets, (-1,))
    results = categorical_crossentropy(pred_mod, targ_mod)


    results = T.reshape(results, (shape[0],shape[2],shape[3]))



    # QUICK IMPLEMENTATION FOR TWO SPECIFIC CLASSES. NEEDS GENERALIZATION
    # Weights depending on class occurency:
    weights = (1.02275, 44.9647)
    cars_indx, not_cars_indx = T.nonzero(targets), T.nonzero(T.eq(targets,0))
    T.set_subtensor(results[cars_indx], results[cars_indx]*float32(weights[1]) )
    T.set_subtensor(results[not_cars_indx], results[not_cars_indx]*float32(weights[0]) )


    return T.sum(results, axis=(1,2))
    def __init__(self, rng, batchsize, epochs=100, alpha=0.001, beta1=0.9, beta2=0.999, eps=1e-08, l1_weight=0.0, l2_weight=0.1, cost='mse'):
        self.alpha = alpha
        self.beta1 = beta1
        self.beta2 = beta2
        self.eps = eps
        self.l1_weight = l1_weight
        self.l2_weight = l2_weight
        self.rng = rng
        self.theano_rng = RandomStreams(rng.randint(2 ** 30))
        self.epochs = epochs
        self.batchsize = batchsize

        # Where cost is always the cost which is minimised in supervised training
        # the T.nonzero term ensures that the cost is only calculated for examples with a label
        #
        # Convetion: We mark unlabelled examples with a vector of zeros in lieu of a one-hot vector
        if   cost == 'mse':
            self.y_pred = lambda network, x: network(x)
            self.error = lambda network, y_pred, y: T.zeros((1,))
            self.cost = lambda network, x, y: T.mean((network(x)[T.nonzero(y)] - y[T.nonzero(y)]**2))
        elif cost == 'binary_cross_entropy':
            self.y_pred = lambda network, x: network(x)
            self.cost   = lambda network, y_pred, y: T.nnet.binary_crossentropy(y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean()
            # classification error
            self.error  = lambda network, y_pred, y: T.mean(T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1)))
        elif cost == 'cross_entropy':
            self.y_pred = lambda network, x: network(x)
            self.cost   = lambda network, y_pred, y: T.nnet.categorical_crossentropy(y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean()
            # classification error
            self.error  = lambda network, y_pred, y: T.mean(T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1)))
        else:
            self.y_pred = lambda network, x: network(x)
            self.error = lambda network, y_pred, y: T.zeros((1,))
            self.cost = cost
Example #8
0
    def partial_errors(self, y):
        b = T.argmax(y, axis=1)
        zero_ind = T.nonzero(T.eq(b, 0))[0]
        one_ind = T.nonzero(T.eq(b, 1))[0]

        neg_error = T.mean(T.neq(self.y_pred[zero_ind], b[zero_ind]))
        pos_error = T.mean(T.neq(self.y_pred[one_ind], b[one_ind]))
        # print (b.eval())
        return (neg_error + pos_error) * 0.5
Example #9
0
def past_weight_grad_step(xs, es, kp_x, kd_x, kp_e, kd_e, shape, dws=None):
    """
    Do an efficient update of the weights given the two spike-update.

    (This still runs FING SLOWLY!)

    :param xs: An (n_in) vector
    :param es: An (n_out) vector
    :param kp_x:
    :param kd_x:
    :param kp_e:
    :param kd_e:
    :param shapes: (n_in, n_out)
    :return:
    """
    kp_x, kd_x, kp_e, kd_e = [as_floatx(k) for k in (kp_x, kd_x, kp_e, kd_e)]
    n_in, n_out = shape
    rx = kd_x/(kp_x+kd_x)
    re = kd_e/(kp_e+kd_e)

    tx_last = create_shared_variable(np.zeros(n_in)+1)
    te_last = create_shared_variable(np.zeros(n_out)+1)
    x_last = create_shared_variable(np.zeros(n_in))
    e_last = create_shared_variable(np.zeros(n_out))
    x_spikes = tt.neq(xs, 0)
    e_spikes = tt.neq(es, 0)
    x_spike_ixs, = tt.nonzero(x_spikes)
    e_spike_ixs, = tt.nonzero(e_spikes)

    if dws is None:
        dws = tt.zeros(shape)

    t_last = tt.minimum(tx_last[x_spike_ixs, None], te_last)  # (n_x_spikes, n_out)
    dws = tt.inc_subtensor(dws[x_spike_ixs, :], x_last[x_spike_ixs, None]*e_last
        * rx**(tx_last[x_spike_ixs, None]-t_last)
        * re**(te_last[None, :]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    new_x_last = tt.set_subtensor(x_last[x_spike_ixs], x_last[x_spike_ixs]*rx**tx_last[x_spike_ixs]+ xs[x_spike_ixs]/as_floatx(kd_x))
    new_tx_last = tt.switch(x_spikes, 0, tx_last)

    t_last = tt.minimum(new_tx_last[:, None], te_last[e_spike_ixs])  # (n_in, n_e_spikes)
    dws = tt.inc_subtensor(dws[:, e_spike_ixs], new_x_last[:, None]*e_last[e_spike_ixs]
        * rx**(new_tx_last[:, None]-t_last)
        * re**(te_last[None, e_spike_ixs]-t_last)
        * geoseries_sum(re*rx, t_end=t_last, t_start=1)
        )

    add_update(x_last, new_x_last)
    add_update(e_last, tt.set_subtensor(e_last[e_spike_ixs], e_last[e_spike_ixs]*re**te_last[e_spike_ixs]+ es[e_spike_ixs]/as_floatx(kd_e)))
    add_update(tx_last, new_tx_last+1)
    add_update(te_last, tt.switch(e_spikes, 1, te_last+1))
    return dws
Example #10
0
    def __init__(self,
                 data,
                 labels,
                 n_features,
                 l1,
                 learning_rate=0.01,
                 seed=0):

        self.data = data
        self.labels = labels
        self.l1 = l1
        self.seed = seed
        self.learning_rate = learning_rate
        self.rng = np.random.mtrand.RandomState(self.seed)

        self.w = []

        # TODO: generalise to multiple classes
        for i in range(2):
            self.init_weights(n_features, i)

        self.n_class = []
        # TODO: generalise to multiple classes
        for i in range(2):
            #            self.n_class.append(np.shape(self.labels[0]))
            self.n_class.append(T.shape(T.nonzero(T.eq(self.labels, i))[0]))
    def train_batch(self, batch_size):
        T = self.AE.T
        T = T.tocsr()
        nonzero_indices = T.nonzero()
        #pdb.set_trace()
        n_users = len(np.unique(nonzero_indices[0]))
        indices = np.unique(nonzero_indices[0])
        for epoch in xrange(self.epochs):
            l = []
            for ind, i in enumerate(xrange(0, n_users, batch_size)):
                # CHECK : SEEMS BUGGY. 
                #------------------------
                #ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)
                ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)

                #------------------------
                #print ratings
                #pdb.set_trace()
                loss = self.AE.ae_batch(ratings)
                #loss = self.AE.debug(ratings)
                #print loss
                #pdb.set_trace()
                l.append(loss)
            m = np.mean(np.array(l))
            print("mean Loss for epoch %d  batch %d is %f"%(epoch, ind, m))
            rmse = self.RMSE_sparse()
            print("RMSE after one epoch is %f"%(rmse))
            f.write(str(rmse) + '\n')
Example #12
0
    def train_batch(self, batch_size):
        T = self.AE.T
        T = T.tocsr()
        nonzero_indices = T.nonzero()
        #pdb.set_trace()
        n_users = len(np.unique(nonzero_indices[0]))
        indices = np.unique(nonzero_indices[0])
        for epoch in xrange(self.epochs):
            l = []
            for ind, i in enumerate(xrange(0, n_users, batch_size)):
                # CHECK : SEEMS BUGGY.
                #------------------------
                #ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)
                ratings = T[indices[i:(i + batch_size)], :].toarray().astype(
                    np.float32)

                #------------------------
                #print ratings
                #pdb.set_trace()
                loss = self.AE.ae_batch(ratings)
                #loss = self.AE.debug(ratings)
                #print loss
                #pdb.set_trace()
                l.append(loss)
            m = np.mean(np.array(l))
            print("mean Loss for epoch %d  batch %d is %f" % (epoch, ind, m))
            #rmse = self.RMSE_sparse()
            rmse = self.RMSE()
            print("RMSE after one epoch is %f" % (rmse))
            f.write(str(rmse) + '\n')
Example #13
0
def multi_sparse_graph_loss(y_true, y_pred):
    ids = tensor.nonzero(y_true[:, 0] + 1)[0]
    y_true = y_true[ids]
    y_pred = y_pred[ids]

    return tensor.mean(
        objectives.sparse_categorical_crossentropy(y_true, y_pred))
Example #14
0
def linear_activation(self, time, updates):
    """Linear activation."""

    # Destroy impulse if in refractory period
    masked_imp = t.set_subtensor(
        self.impulse[t.nonzero(self.refrac_until > time)], 0.)

    # Add impulse
    new_mem = self.mem + masked_imp

    # Store spiking
    output_spikes = t.ge(new_mem, self.v_thresh)

    spike_idxs = output_spikes.nonzero()

    if settings['reset'] == 'Reset by subtraction':
        if settings['payloads'] and False:  # Experimental, turn off by default
            new_and_reset_mem = t.set_subtensor(new_mem[spike_idxs], 0.)
        else:
            new_and_reset_mem = t.inc_subtensor(new_mem[spike_idxs],
                                                -self.v_thresh)
    else:  # settings['reset'] == 'Reset to zero':
        new_and_reset_mem = t.set_subtensor(new_mem[spike_idxs], 0.)

    updates.append((self.mem, new_and_reset_mem))

    if settings['payloads']:
        residuals = t.inc_subtensor(new_mem[spike_idxs], -self.v_thresh)
        payloads, payloads_sum = update_payload(self, residuals, spike_idxs)
        updates.append((self.payloads, payloads))
        updates.append((self.payloads_sum, payloads_sum))

    return output_spikes
Example #15
0
 def loop (row):
     one_indices = T.nonzero(row)[0]
     zero_indices = T.eq(row, 0).nonzero()[0]
     random = shared_randomstreams.RandomStreams(5)
     ind1=random.random_integers(size=(1,), low=0, high=one_indices.shape[0]-1, ndim=None)
     ind2=random.random_integers(size=(50,), low=0, high=zero_indices.shape[0]-1, ndim=None)
     return one_indices[ind1], zero_indices[ind2]
Example #16
0
 def predict(prob, mask):
     valid_index = T.nonzero(mask > 0)[0]
     prob = prob[valid_index]
     word_index = T.zeros((batch_size, ), dtype='int32')
     word_index = T.set_subtensor(word_index[valid_index],
                                  T.argmax(prob, axis=1))  # +1?
     return word_index
Example #17
0
    def get_latent(self, score):
        padded = T.concatenate([
            score,
            T.alloc(-np.inf, self.args['batch'] * self.args['max_inst_in_doc'],
                    1)
        ],
                               axis=1)
        # padded = T.concatenate([score, T.alloc(0., self.args['batch'] * self.args['max_inst_in_doc'], 1)], axis=1)
        row_indices = np.array(
            [[i] * self.args['max_inst_in_doc']
             for i in np.arange(self.args['batch'] *
                                self.args['max_inst_in_doc'])],
            dtype='int32')
        ante_score = padded[row_indices,
                            self.container['prev_inst_cluster_gold']]
        latent_score = T.max(ante_score, axis=1)
        latent_score = T.set_subtensor(
            latent_score[T.nonzero(T.eq(latent_score, -np.inf))], 0.)

        latent_inst = T.argmax(ante_score, axis=1)
        row_indices = T.arange(self.args['batch'] *
                               self.args['max_inst_in_doc'],
                               dtype='int32')
        alpha = T.set_subtensor(
            self.container['alpha'][row_indices, latent_inst], 0.)

        return T.reshape(
            latent_score,
            [self.args['batch'] * self.args['max_inst_in_doc'], 1
             ]), alpha, latent_inst
def logp_theano_comorbidities(logLike,nObs,B0,B,X,S,T):
        logLike = 0.0

        #Unwrap t=0 points for B0
        zeroIndices = np.roll(T.cumsum(),1)
        #zeroIndices = np.roll(T.cumsum(),1)
        zeroIndices[0] = 0;
        zeroIndices = zeroIndices.astype('int32')

        #import pdb; pdb.set_trace()

        #Likelihood from B0 for X=1 and X=0 cases
        logLike += (X[zeroIndices]*TT.log(B0[:,S[zeroIndices]]).T).sum()
        logLike += ((1-X[zeroIndices])*TT.log(1.-B0[:,S[zeroIndices]]).T).sum()

        stateChange = S[1:]-S[:-1]
    # Don't consider t=0 points
        #import pdb; pdb.set_trace()
        #setZero = TT.as_tensor_variable(zeroIndices[1:]-1)
        #TT.set_subtensor(stateChange[setZero],0)
        stateChange = TT.set_subtensor(stateChange[zeroIndices[1:]-1],0)
        #stateChange[setZero] = 0
        #stateChange[zeroIndices[1:]-1] = 0
        changed = TT.nonzero(stateChange)[0]+1

        #import pdb; pdb.set_trace()

        # A change can only happen from 0 to 1 given our assumptions
        logLike += ((X[changed]-X[changed-1])*TT.log(B[:,S[changed]]).T).sum()
        logLike += (((1-X[changed])*(1-X[changed-1]))*TT.log(1.-B[:,S[changed]]).T).sum()
        #logLike += (X[changed]*np.log(B[:,S[changed]]).T).sum()
        
	return logLike
Example #19
0
 def loop (row):
     one_indices = T.nonzero(row)[0]
     zero_indices = T.eq(row, 0).nonzero()[0]
     random = shared_randomstreams.RandomStreams(5)
     ind1=random.random_integers(size=(1,), low=0, high=one_indices.shape[0]-1, ndim=None)
     ind2=random.random_integers(size=(50,), low=0, high=zero_indices.shape[0]-1, ndim=None)
     return one_indices[ind1], zero_indices[ind2]
Example #20
0
    def call(self, x):
        real = self.get_realpart(x)
        imag = self.get_imagpart(x)
        #mag = self.get_abs(x)
        ang = self.get_angle(x) + 0.0001
        indices1 = T.nonzero(T.ge(ang, pi / 2))
        indices2 = T.nonzero(T.le(ang, 0))

        real = T.set_subtensor(real[indices1], 0)
        imag = T.set_subtensor(imag[indices1], 0)

        real = T.set_subtensor(real[indices2], 0)
        imag = T.set_subtensor(imag[indices2], 0)

        act = K.concatenate([real, imag], axis=1)

        return act
Example #21
0
def add_payloads(prev_layer, input_spikes):
    """Get payloads from previous layer."""

    # Get only payloads of those pre-synaptic neurons that spiked
    payloads = t.set_subtensor(
        prev_layer.payloads[t.nonzero(t.eq(input_spikes, 0.))], 0.)
    print("Using spikes with payloads from layer {}".format(prev_layer.name))
    return t.add(input_spikes, payloads)
Example #22
0
    def exp(self, X, U):
        norm_U = tensor.sqrt(tensor.sum((U ** 2), axis=0)).reshape((1, self._n))

        Y = X * tensor.cos(norm_U) + U * (tensor.sin(norm_U) / norm_U)

        # For those columns where the step is too small, use a retraction.
        exclude = tensor.nonzero(norm_U <= 4.5e-8)[-1]
        Y[:, exclude] = self._normalize_columns(X[:, exclude] + U[:, exclude])

        return Y
def MASK_blanking(x_i):
    # Find indicies of first and last non-zero value in x_i
    idxs = T.nonzero(x_i)[0][[1, -1]]
    # Diff = no of non zero values
    no_values = idxs[1] - idxs[0]
    # Move index inside by proportion of no of values
    idxs0 = T.cast(T.floor(idxs[0] + no_values * blank_proportion), 'int32')
    idxs1 = T.cast(T.floor(idxs[1] - no_values * blank_proportion), 'int32')
    # Return a vector that has a tighter mask than x_i
    return T.set_subtensor(T.zeros_like(x_i)[idxs0:idxs1], T.alloc(1., idxs1-idxs0))
Example #24
0
 def add_synap_post_inp(i, po, p, s, q):
     # i:: sequence
     # po:: post
     # p:: pre
     # s:: dA
     # q:: W
     index = T.nonzero(q[:self.Ne, i])
     npo = T.inc_subtensor(po[index, i], s)
     nw = T.inc_subtensor(q[:, i], p[:, i])
     nw = T.clip(nw, 0, self.wmax)
     return {po: npo, q: nw}
def conditional_feature_matching(f_sample,
                                 f_data,
                                 label,
                                 num_classes,
                                 norm='l2'):
    cfm_loss = 0.
    for c in xrange(num_classes):
        index = T.nonzero(
            T.switch(T.eq(label, c), T.ones_like(label), T.zeros_like(label)))
        cfm_loss += feature_matching(f_sample[index], f_data[index], norm)
    return cfm_loss
Example #26
0
 def add_synap_post_inp(i,po,p,s,q):
     # i:: sequence
     # po:: post
     # p:: pre
     # s:: dA
     # q:: W
     index = T.nonzero(q[:self.Ne,i])
     npo = T.inc_subtensor(po[index,i],s)
     nw = T.inc_subtensor(q[:,i],p[:,i])
     nw = T.clip(nw,0,self.wmax)
     return {po:npo,q:nw}
def conditional_maximum_mean_discripancy(sample,
                                         data,
                                         label,
                                         num_classes,
                                         sigma=[2, 5, 10, 20, 40, 80]):
    cmmd_loss = 0.
    for c in xrange(num_classes):
        index = T.nonzero(
            T.switch(T.eq(label, c), T.ones_like(label), T.zeros_like(label)))
        cmmd_loss += maximum_mean_discripancy(sample[index], data[index],
                                              sigma)
    return cmmd_loss
Example #28
0
        def recurrence(curr_inst, current_hv, prev_inst_cluster,
                       current_cluster):
            curr_indices = offsets + curr_inst
            prev_inst_cluster = T.set_subtensor(
                prev_inst_cluster[:, curr_inst], 0)
            # prev_inst_cluster[:, curr_inst] = 0

            curr_rep_cnn = rep_cnn[curr_indices]

            rep_inter1 = T.concatenate(
                [T.sum(current_hv, axis=1, keepdims=True), current_hv], axis=1)
            rep_inter2 = T.stack([curr_rep_cnn] *
                                 (self.args['max_inst_in_doc'] + 1),
                                 axis=1)
            rep_inter = T.concatenate([rep_inter1, rep_inter2], axis=2)

            score_cluster = T.nnet.sigmoid(
                T.dot(rep_inter, W_global) + b_global)
            score_cluster = T.reshape(
                score_cluster,
                newshape=(self.args['batch'],
                          self.args['max_inst_in_doc'] + 1))
            score_cluster = T.concatenate(
                [score_cluster,
                 T.alloc(-np.inf, self.args['batch'], 1)],
                axis=1)

            row_indices = np.array([[i] * self.args['max_inst_in_doc']
                                    for i in np.arange(self.args['batch'])],
                                   dtype='int32')
            global_score = score_cluster[row_indices, prev_inst_cluster]
            score = local_score[curr_indices] + global_score

            indices_single = T.arange(self.args['batch'], dtype='int32')
            ante_cluster_raw = prev_inst_cluster[indices_single,
                                                 T.argmax(score, axis=1)]
            indices_new_cluster = T.nonzero(T.eq(ante_cluster_raw, 0))
            ante_cluster = T.set_subtensor(
                ante_cluster_raw[indices_new_cluster],
                current_cluster[indices_new_cluster])

            current_cluster = T.set_subtensor(
                current_cluster[indices_new_cluster],
                current_cluster[indices_new_cluster] + 1)
            prev_inst_cluster = T.set_subtensor(
                prev_inst_cluster[:, curr_inst], ante_cluster)

            ante_hv = current_hv[indices_single, ante_cluster - 1]
            current_hv = T.set_subtensor(
                current_hv[indices_single, ante_cluster - 1],
                gru_step(curr_rep_cnn, ante_hv))

            return current_hv, prev_inst_cluster, current_cluster
def unet_crossentropy_loss_sampled(y_true, y_pred):
    epsilon = 1.0e-4
    y_pred_clipped = T.flatten(T.clip(y_pred, epsilon, 1.0-epsilon))
    y_true = T.flatten(y_true)
    # this seems to work
    # it is super ugly though and I am sure there is a better way to do it
    # but I am struggling with theano to cooperate
    # filter the right indices
    indPos = T.nonzero(y_true)[0] # no idea why this is a tuple
    indNeg = T.nonzero(1-y_true)[0]
    # shuffle
    n = indPos.shape[0]
    indPos = indPos[srng.permutation(n=n)]
    n = indNeg.shape[0]
    indNeg = indNeg[srng.permutation(n=n)]
    # subset assuming each class has at least 100 samples present
    indPos = indPos[:200]
    indNeg = indNeg[:200]
    loss_vector = -T.mean(T.log(y_pred_clipped[indPos])) - T.mean(T.log(1-y_pred_clipped[indNeg]))
    average_loss = T.mean(loss_vector)
    return average_loss
Example #30
0
        def fprop_step(state_below, index, state_before, W, U, b):

            state_now = state_before.copy()
            index = self.num_modules -\
                tensor.nonzero(tensor.mod(index+1, self.M))[0].shape[0]
            this_range = index * self.module_dim
            z = tensor.dot(state_below, W[:, :this_range]) +\
                tensor.dot(state_before, U[:, :this_range]) +\
                b[:this_range]
            z = tensor.tanh(z)
            state_now = tensor.set_subtensor(state_now[:, :this_range], z)

            return state_now
Example #31
0
def gru_step(xt, *dropout_maskst):
    xt_oh = T.zeros((xt.shape[0], V), dtype=floatX)
    xt_oh = T.set_subtensor(xt_oh[T.arange(xt.shape[0]),
                                  T.cast(xt, "int32")], 1)

    sts = []
    layer_in = xt_oh
    for i, num_h in enumerate(args.num_hs):
        stm1 = ss[i][:xt.shape[0], :]
        xs = T.concatenate([layer_in, stm1], axis=1)
        z = T.nnet.hard_sigmoid(T.dot(xs, Wzs[i]) + bzs[i])
        r = T.nnet.hard_sigmoid(T.dot(xs, Wrs[i]) + brs[i])
        h = T.tanh(
            T.dot(T.concatenate([layer_in, stm1 * r], axis=1), Whs[i]) +
            bhs[i])

        st = (1 - z) * h + z * stm1
        st = T.set_subtensor(st[T.nonzero(T.eq(xt, -1)), :],
                             stm1[T.nonzero(T.eq(xt, -1)), :])
        sts.append(st)

        if dropout_maskst and args.dropouts[i] != 0:
            st = dropout_layer(st, args.dropouts[i], srng, train_premasks,
                               train_postmasks, dropout_maskst[i])

        if i == len(args.num_hs) - 1:
            ot = T.nnet.softmax(T.dot(st, Wo) + bo)
        else:
            layer_in = st

    xtp1 = T.cast(T.argmax(srng.multinomial(n=1, pvals=ot), axis=1), floatX)

    s_updates = OrderedDict()
    for s, st, num_h in zip(ss, sts, args.num_hs):
        if T.lt(xt.shape[0], s.shape[0]):
            pad = T.zeros((s.shape[0] - xt.shape[0], num_h), dtype=floatX)
            st = T.concatenate([st, pad], axis=0)
        s_updates[s] = st
    return [ot, xtp1], s_updates
Example #32
0
        def add_synap_pre_inp(i,p,po,s,q):
            # i :: sequence
            # p :: pre | post
            # s :: dApre | dApost
            # q :: W
            index = T.nonzero(q[i,:self.Ne])
            np = T.inc_subtensor(p[i,index],s)
##            tmp = p[i,:]
##            tmp=T.inc_subtensor(tmp[index],s)
##            np=T.set_subtensor(p[i,:],tmp)
            #np = T.inc_subtensor(p[i,:],s)
            nw = T.inc_subtensor(q[i,:],po[i,:])
            nw=T.clip(nw,0,self.wmax)
            return {p:np,q:nw}
Example #33
0
 def add_synap_pre_inp(i, p, po, s, q):
     # i :: sequence
     # p :: pre | post
     # s :: dApre | dApost
     # q :: W
     index = T.nonzero(q[i, :self.Ne])
     np = T.inc_subtensor(p[i, index], s)
     ##            tmp = p[i,:]
     ##            tmp=T.inc_subtensor(tmp[index],s)
     ##            np=T.set_subtensor(p[i,:],tmp)
     #np = T.inc_subtensor(p[i,:],s)
     nw = T.inc_subtensor(q[i, :], po[i, :])
     nw = T.clip(nw, 0, self.wmax)
     return {p: np, q: nw}
Example #34
0
def binary_tanh_activation(self, time, updates):
    """Binary tanh activation."""

    # Destroy impulse if in refractory period
    masked_imp = t.set_subtensor(
        self.impulse[t.nonzero(self.refrac_until > time)], 0.)

    # Add impulse
    new_mem = self.mem + masked_imp

    # Store spiking
    signed_spikes = t.set_subtensor(new_mem[t.nonzero(t.gt(new_mem, 0))],
                                    self.v_thresh)
    signed_spikes = t.set_subtensor(
        signed_spikes[t.nonzero(t.lt(signed_spikes, 0))], -self.v_thresh)
    output_spikes = t.set_subtensor(new_mem[t.nonzero(new_mem)], self.v_thresh)

    # Reset neurons
    new_and_reset_mem = t.set_subtensor(new_mem[output_spikes.nonzero()], 0.)

    updates.append((self.mem, new_and_reset_mem))

    return signed_spikes
Example #35
0
File: text.py Project: alphadl/hnmt
 def split_unk_outputs(self, outputs, outputs_mask):
     # Compute separate mask for character level (UNK) words
     # (with symbol < 0).
     charlevel_mask = outputs_mask * T.lt(outputs, 0)
     # ensure that char-level is never empty
     dummy = 1 - T.sum(charlevel_mask).clip(0, 1)
     dummy_mask = T.inc_subtensor(charlevel_mask[0,0], dummy)
     charlevel_indices = T.nonzero(dummy_mask.T)
     # shortlisted words directly in word level decoder,
     # but char level replaced with unk
     unked_outputs = (1 - charlevel_mask) * outputs
     unked_outputs += charlevel_mask * T.as_tensor(
         self.index['<UNK>'])
     return unked_outputs, charlevel_indices
        def sample_loop(s, y_true, y_pred):

            true, pred = y_true[s], y_pred[s]

            pos = pred[true.nonzero()]
            neg = pred[(1 - true).nonzero()]

            pos_tile = T.extra_ops.repeat(pos, neg.shape[0]).reshape((pos.shape[0], neg.shape[0]))
            neg_tile = T.extra_ops.repeat(neg, pos.shape[0]).reshape((neg.shape[0], pos.shape[0])).transpose()

            p_loss = 1 + neg_tile - pos_tile

            loss = p_loss[T.nonzero(p_loss > 0)]

            return T.sum(loss)
 def train_batch(self, batch_size):
     T = self.AE.T
     T = T.tocsr()
     nonzero_indices = T.nonzero()
     #pdb.set_trace()
     n_users = len(np.unique(nonzero_indices[0]))
     indices = np.unique(nonzero_indices[0])
     for epoch in xrange(self.epochs):
         for ind, i in enumerate(xrange(0, n_users, batch_size)):
             ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)
             #print ratings
             #pdb.set_trace()
             loss = self.AE.ae_batch(ratings)
             #loss = self.AE.debug(ratings)
             print loss
Example #38
0
 def step(input_step, previous_activation, time_step, W_in, W_self, biases):
     new_activation = previous_activation.copy()
     modzero = T.nonzero(T.eq(T.mod(time_step, self.group_labels), 0))[0]
     W_in_now = T.flatten(W_in[:, modzero, :], outdim=2)
     W_self_now = T.flatten(W_self[:, modzero, :], outdim=2)
     biases_now = T.flatten(biases[modzero, :])
     activation = T.dot(input_step, W_in_now)
     activation += T.dot(previous_activation, W_self_now)
     activation += biases_now
     activation = self.activation_function(activation)
     modzero_activation_changes = (modzero * self.group_size) + (
         T.ones((modzero.shape[0], self.group_size), dtype='int32') * T.arange(self.group_size, dtype='int32')).T
     modzero_flatten = T.flatten(modzero_activation_changes).astype('int32')
     new_activation = T.set_subtensor(new_activation[:, modzero_flatten], activation)
     time_step += 1
     return new_activation, time_step
Example #39
0
def trial_resp_pdf(t, ind, A, b, v, s):
    """Probability density function for response i at time t."""
    p_neg, updates = theano.reduce(
        fn=lambda v_i, tot, s: normcdf(-v_i / s) * tot,
        sequences=v,
        outputs_info=tt.ones(1, dtype='float64'),
        non_sequences=s)

    # PDF for i and no finish yet for others
    v_ind = tt.arange(v.shape[0])
    i = tt.cast(ind, 'int64')
    pdf = (tpdf(t, A, b, v[i], s) *
           ncdf(t, A, b, v[tt.nonzero(tt.neq(v_ind, i))], s)) / (1 - p_neg)

    # define probability of negative times to zero
    pdf_cond = tt.switch(tt.gt(t, 0), pdf, 0)
    return pdf_cond
        def _step2(diag_, state_, hs_, Cs_):

            hs, Cs = [], []
            token_idxs = tensor.cast(state_.argmax(axis=-1), "int32")
            msk_ = tensor.fill(
                (tensor.zeros_like(token_idxs, dtype="float32")), 1)
            msk_ = msk_.dimshuffle('x', 0)
            state_below0 = self.de_lookuptable[token_idxs].reshape(
                (1, encoderInputs.shape[1], self.de_hidden_size))
            for i, lstm in enumerate(self.decoder_lstm_layers):
                h, C = lstm.forward(state_below0, msk_, hs_[i],
                                    Cs_[i])  #mind msk
                hs += h[-1],
                Cs += C[-1],
                state_below0 = h

            hs, Cs = tensor.as_tensor_variable(hs), tensor.as_tensor_variable(
                Cs)
            state_below0 = state_below0.reshape(
                (encoderInputs.shape[1], self.de_hidden_size))

            attn_index = tensor.nonzero(diag_, True)
            attn_value = tensor.nonzero_values(diag_)

            en_context = Encoder_shuffle[:, attn_index[0], :]
            attn_context = Encoder_shuffle_re[:, attn_index[0], :]

            attn_weight = tensor.batched_dot(attn_context, state_below0)
            attn_weight = tensor.nnet.softmax(attn_weight)
            #attn_weight *= (encoderMask.dimshuffle(1,0))

            attn_weight *= (attn_value.dimshuffle('x', 0))
            ##attn_weight = attn_weight/(tensor.sum(attn_weight, axis=1).dimshuffle(0,'x'))
            ####### ctx_ : (b, h)
            ctx_ = tensor.sum(en_context * attn_weight[:, :, None], axis=1)

            state_below0 = tensor.concatenate([ctx_, state_below0], axis=1)
            newpred = tensor.dot(state_below0,
                                 self.linear) + self.linear_bias[None, :]
            state_below = tensor.nnet.softmax(newpred)
            ##### the beging symbole probablity is 0
            extra_p = tensor.zeros_like(hs[:, :, 0])
            state_below = tensor.concatenate([state_below, extra_p.T], axis=1)

            return state_below, hs, Cs
Example #41
0
 def train(self, batch_size):
     T = self.AE.T
     T = T.tocsr()
     nonzero_indices = T.nonzero()
     #pdb.set_trace()
     n_users = len(np.unique(nonzero_indices[0]))
     indices = np.unique(nonzero_indices[0])
     for epoch in xrange(self.epochs):
         l = []
         for ind, i in enumerate(xrange(0, n_users, batch_size)):
             ratings = T[indices[i:(i + batch_size)], :].toarray().astype(
                 np.float32)
             loss = self.AE.ae_batch(ratings)
             l.append(loss)
         m = np.mean(np.array(l))
         print("mean Loss for epoch %d  batch %d is %f" % (epoch, ind, m))
         rmse = self.RMSE()
         print("RMSE after one epoch is %f" % (rmse))
Example #42
0
File: text.py Project: alphadl/hnmt
 def split_unk_outputs(self, outputs, outputs_mask):
     # Compute separate mask for character level (UNK) words
     # (with symbol < 0 or > self.low_thresh).
     charlevel_mask = outputs_mask * T.lt(outputs, 0)
     # lower threshold used for indexing tensor for charlevel
     lthr = T.as_tensor(self.low_thresh)
     low_charlevel_mask = charlevel_mask + (outputs_mask * T.gt(outputs, lthr))
     # ensure that char-level is never empty
     dummy = 1 - T.sum(low_charlevel_mask).clip(0, 1)
     low_charlevel_mask = T.inc_subtensor(low_charlevel_mask[0,0], dummy)
     low_charlevel_indices = T.nonzero(low_charlevel_mask.T)
     # higher threshold used for
     # shortlisted words directly in word level decoder,
     # but char level replaced with unk
     unked_outputs = (1 - charlevel_mask) * outputs
     unked_outputs += charlevel_mask * T.as_tensor(
         self.index['<UNK>'])
     return unked_outputs, low_charlevel_indices
Example #43
0
        def recurrence(curr_inst, current_hv, prev_inst_cluster,
                       current_cluster):
            curr_indices = offsets + curr_inst
            prev_inst_cluster = T.set_subtensor(
                prev_inst_cluster[:, curr_inst], 0)

            curr_rep_cnn = rep_cnn[curr_indices]
            score_by_cluster = T.batched_dot(curr_rep_cnn,
                                             current_hv.transpose((0, 2, 1)))
            score_nonana = T.batched_dot(curr_rep_cnn, T.sum(current_hv,
                                                             axis=1))
            score_by_cluster = T.concatenate([
                T.reshape(score_nonana, [self.args['batch'], 1]),
                score_by_cluster,
                T.alloc(-np.inf, self.args['batch'], 1)
            ],
                                             axis=1)

            row_indices = np.array([[i] * self.args['max_inst_in_doc']
                                    for i in np.arange(self.args['batch'])],
                                   dtype='int32')
            global_score = score_by_cluster[row_indices, prev_inst_cluster]
            score = local_score[curr_indices] + global_score

            indices_single = T.arange(self.args['batch'], dtype='int32')
            ante_cluster_raw = prev_inst_cluster[indices_single,
                                                 T.argmax(score, axis=1)]
            indices_new_cluster = T.nonzero(T.eq(ante_cluster_raw, 0))
            ante_cluster = T.set_subtensor(
                ante_cluster_raw[indices_new_cluster],
                current_cluster[indices_new_cluster])

            current_cluster = T.set_subtensor(
                current_cluster[indices_new_cluster],
                current_cluster[indices_new_cluster] + 1)
            prev_inst_cluster = T.set_subtensor(
                prev_inst_cluster[:, curr_inst], ante_cluster)

            ante_hv = current_hv[indices_single, ante_cluster - 1]
            current_hv = T.set_subtensor(
                current_hv[indices_single, ante_cluster - 1],
                gru_step(curr_rep_cnn, ante_hv))

            return current_hv, prev_inst_cluster, current_cluster
    def __init__(self,
                 rng,
                 batchsize,
                 epochs=100,
                 alpha=0.001,
                 beta1=0.9,
                 beta2=0.999,
                 eps=1e-08,
                 l1_weight=0.0,
                 l2_weight=0.1,
                 cost='mse'):
        self.alpha = alpha
        self.beta1 = beta1
        self.beta2 = beta2
        self.eps = eps
        self.l1_weight = l1_weight
        self.l2_weight = l2_weight
        self.rng = rng
        self.theano_rng = RandomStreams(rng.randint(2**30))
        self.epochs = epochs
        self.batchsize = batchsize

        # Where cost is always the cost which is minimised in supervised training
        # the T.nonzero term ensures that the cost is only calculated for examples with a label
        #
        # Convetion: We mark unlabelled examples with a vector of zeros in lieu of a one-hot vector
        if cost == 'mse':
            self.y_pred = lambda network, x: network(x)
            self.error = lambda network, y_pred, y: T.zeros((1, ))
            self.cost = lambda network, x, y: T.mean(
                (network(x)[T.nonzero(y)] - y[T.nonzero(y)]**2))
        elif cost == 'binary_cross_entropy':
            self.y_pred = lambda network, x: network(x)
            self.cost = lambda network, y_pred, y: T.nnet.binary_crossentropy(
                y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean()
            # classification error
            self.error = lambda network, y_pred, y: T.mean(
                T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1)))
        elif cost == 'cross_entropy':
            self.y_pred = lambda network, x: network(x)
            self.cost = lambda network, y_pred, y: T.nnet.categorical_crossentropy(
                y_pred[T.nonzero(y)], y[T.nonzero(y)]).mean()
            # classification error
            self.error = lambda network, y_pred, y: T.mean(
                T.neq(T.argmax(y_pred, axis=1), T.argmax(y, axis=1)))
        else:
            self.y_pred = lambda network, x: network(x)
            self.error = lambda network, y_pred, y: T.zeros((1, ))
            self.cost = cost
        def step(i, in_mask, ACT, ACT_, in_se, WT):
            sub_tree_idx_ = T.nonzero(WT[:, i, :] > -1)
            a_ = T.dot(in_se[:, i], self.WSM)  # + self.b
            if self.b is not None:
                a_ += self.b.dimshuffle('x', 0)
            a_ = a_ + T.sum(ACT_[:, i], axis=1)
            a_ = T.tanh(a_)
#            if self.dropout:
#                a_ = a_ / self.retain_prob * self._srng.binomial(a_.shape, p=self.retain_prob,
#                                                                 dtype=theano.config.floatX)
            a_ = T.switch(in_mask, a_, ACT[:, i-1])
            a__ = T.batched_tensordot(a_[sub_tree_idx_[0], :],
                                      self.WC[WT[sub_tree_idx_[0], 
                                                 i, sub_tree_idx_[1]]], axes=1)
#            if self.dropout:
#                a__ = a__ / self.retain_prob * self._srng.binomial(a__.shape, p=self.retain_prob,
#                                                                   dtype=theano.config.floatX)
            newACT_ = T.set_subtensor(ACT_[sub_tree_idx_[0], sub_tree_idx_[1], i],
                                      a__)
            newACT = T.set_subtensor(ACT[:, i], a_)
            return newACT, newACT_
Example #46
0
def binary_sigmoid_activation(self, time, updates):
    """Binary sigmoid activation."""

    # Destroy impulse if in refractory period
    masked_imp = t.set_subtensor(
        self.impulse[t.nonzero(self.refrac_until > time)], 0.)

    # Add impulse
    new_mem = self.mem + masked_imp

    # Store spiking
    output_spikes = t.gt(new_mem, 0)

    spike_idxs = output_spikes.nonzero()

    # Reset neurons
    new_and_reset_mem = t.set_subtensor(new_mem[spike_idxs], 0.)

    updates.append((self.mem, new_and_reset_mem))

    return output_spikes
 def train(self):
     T = self.AE.T
     # COnverting to csr format for indexing
     T = T.tocsr()
     #pdb.set_trace()
     nonzero_indices = T.nonzero()
     for epoch in xrange(self.epochs):
         print("Running epoch %d"%(epoch))
         for i in np.unique(nonzero_indices[0]):
             # get indices of observed values from the user 'i' 's vector
             indices = T[i, :].nonzero()[1]
             #print indices
             #indices = indices.reshape(indices.shape[0],)
             # Get correspoding ratings
             ratings = T[i, indices].toarray()
             #print ratings
             ratings = ratings.reshape(ratings.shape[1],)
             # Convert inputs to theano datatype
             indices = indices.astype(np.int32)
             ratings = ratings.astype(np.int32)
             #pdb.set_trace()
             loss = self.AE.ae(indices, ratings)
             print("Loss at epoch %d is %f"%(epoch, loss))
    def train_batch(self, batch_size):
        T = self.AE.T
        T = T.tocsr()
        nonzero_indices = T.nonzero()
        #pdb.set_trace()
        n_users = len(np.unique(nonzero_indices[0]))
        indices = np.unique(nonzero_indices[0])
        for epoch in xrange(self.epochs):
            for ind, i in enumerate(xrange(0, n_users, batch_size)):
                # CHECK : SEEMS BUGGY. 
                #------------------------
                #ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)
                ratings = T[indices[i:(i + batch_size)], :].toarray().astype(np.float32)

                #------------------------
                #print ratings
                #pdb.set_trace()
                loss = self.AE.ae_batch(ratings)
                #loss = self.AE.debug(ratings)
                #print loss
                #pdb.set_trace()
                print("Loss for epoch %d  batch %d is %f"%(epoch, ind, loss))
            print("RMSE after one epoch is %f"%(self.RMSE()))
Example #49
0
    activation = T.tanh)

layer1 = ll.LSTMLayer(
    input = layer_feat.output,
    n_in = input_dim,
    n_out = 75)

layer2 = ll.LSTMLayer(
    input = layer_feat.output,
    n_in = input_dim,
    n_out = 75,
    backwards = True)

h = T.concatenate((layer1.output,layer2.output[::-1]),axis=1)

word_indices = T.nonzero(T.eq(T.argmax(input,axis=1),input_dim-1))
word_h = h[word_indices]

layer3 = nl.NNLayer(
    input = word_h,
    n_in = layer1.n_out * 2,
    n_out = 100,
    activation = T.tanh)

layer4 = nl.NNLayer(
    input = layer3.output,
    n_in = layer3.n_out,
    n_out = len(ix_to_tag),
    activation = T.nnet.softmax)

layers = [layer_feat, layer1, layer2, layer3]
Example #50
0
layer1 = ll.LSTMLayer(
    input = layer_embed.output,
    n_in  = layer_embed.n_out,
    n_out = 150
)

layer2 = ll.LSTMLayer(
    input = layer_embed.output,
    n_in  = layer_embed.n_out,
    n_out = 150,
    backwards=True
)

#gets only the output vectors whose indices are equal to the end of token
end_indices = T.nonzero(T.eq(T.argmax(x, axis=1),input_dim-1))
#gets only the output vectors whose indices are equal to the start of token
start_indices = T.nonzero(T.eq(T.argmax(x,axis=1),input_dim-2))

hl = T.concatenate((layer1.output, layer2.output[::-1]),axis=1)
hc = extra.cumsum(hl, axis=0)
hsub = hc[end_indices] - hc[start_indices]
diff_indices = T.as_tensor_variable(end_indices) - T.as_tensor_variable(start_indices)
diff_shuf = diff_indices.flatten().dimshuffle(0, 'x')

h = hsub / diff_shuf

h_size = (layer1.n_out + layer2.n_out)

#relationship between near words
layer_c = nl.NNLayer(
Example #51
0
 def mse_rl(y_true, y_pred):
     #We don't really need this here unless you compile your model instead of pickling it.
     #this 
     care = T.nonzero(T.eq(T.isnan(y_true),0))
     return T.mean((y_pred[care] - y_true[care]) ** 2).sum()
Example #52
0
 def get_cv_error_one(self):
     """Stochastic approximation to the pseudo-likelihood"""
     validation_3d = (self.validation[:,self.n_labels:]).reshape((1,T.shape(self.validation)[0],T.shape(self.validation)[1]-self.n_labels))
     labels,confidence = self.predict(validation_3d)
     accuracy = (1.0*T.shape(self.validation)[0] - T.shape(T.nonzero(labels - T.argmax(self.validation[:,:self.n_labels], axis=1)))[1]) / T.shape(self.validation)[0]
     return accuracy
Example #53
0
File: nnet2.py Project: mufan-li/sg
	def nll(self, y):
		# return - T.mean(T.dot(T.log(self.output.T), y))
		return -T.mean(
			T.log(self.output)[T.nonzero(y)]
			)
Example #54
0
def ctc_objective(y_pred, y, y_pred_mask=None, y_mask=None, batch=True):
	''' CTC objective.

	Parameters
	----------
	y_pred : [nb_samples, in_seq_len, nb_classes+1]
		softmax probabilities
	y : [nb_samples, out_seq_len]
		output sequences
	y_mask : [nb_samples, out_seq_len]
		mask decides which labels in y is included (0 for ignore, 1 for keep)
	y_pred_mask : [nb_samples, in_seq_len]
		mask decides which samples in input sequence are used
	batch : True/False
		if batching is not used, nb_samples=1
		Note: the implementation without batch support is more reliable

	Returns
	-------
	grad_cost : the cost you calculate gradient on
	actual_cost : the cost for monitoring model performance (*NOTE: do not calculate
		gradient on this cost)

	Note
	----
	According to @Richard Kurle:
		test error of 38% with 1 bidirectional LSTM layer or with a stack of 3,
		but I could not reproduce the results to those reported in Grave's paper.

		If you get blanks only, you probably have just bad hyperparameters or you
		did not wait enough epochs. At the beginnign of the training,
		only the cost decreases but you don't see yet any characters popping up.

		You will need gradient clipping to prevent exploding gradients as well.
	'''
	y_pred_mask = y_pred_mask if y_pred_mask is not None else T.ones((y_pred.shape[0], y_pred.shape[1]), dtype=floatX)
	y_mask = y_mask if y_mask is not None else T.ones(y.shape, dtype=floatX)
	if batch:
		# ====== reshape input ====== #
		y_pred = y_pred.dimshuffle(1, 0, 2)
		y_pred_mask = y_pred_mask.dimshuffle(1, 0)
		y = y.dimshuffle(1, 0)
		y_mask = y_mask.dimshuffle(1, 0)

		# ====== calculate cost ====== #
		grad_cost = _pseudo_cost(y, y_pred, y_mask, y_pred_mask, False)
		grad_cost = grad_cost.mean()
		monitor_cost = _cost(y, y_pred, y_mask, y_pred_mask, True)
		monitor_cost = monitor_cost.mean()

		return grad_cost, monitor_cost
	else:
		y = T.cast(y, dtype='int32')

		# batch_size=1 => just take [0] to reduce 1 dimension
		y_pred = y_pred[0]
		y_pred_mask = y_pred_mask[0]
		y = y[0]
		y_mask = y_mask[0]

		# after take, ndim=2 go up to 3, need to be reduced back to 2
		y_pred = T.take(y_pred, T.nonzero(y_pred_mask, return_matrix=True), axis=0)[0]
		y = T.take(y, T.nonzero(y_mask, return_matrix=True), axis=0).ravel()

		return _cost_no_batch(y_pred, y)
Example #55
0
    def normal(self, size, avg=0.0, std=1.0, ndim=None, dtype=None,
               nstreams=None, truncate=False, **kwargs):
        """
        Sample a tensor of values from a normal distribution.

        Parameters
        ----------
        size : int_vector_like
            Array dimensions for the output tensor.
        avg : float_like, optional
            The mean value for the truncated normal to sample from (defaults to 0.0).
        std : float_like, optional
            The standard deviation for the truncated normal to sample from (defaults to 1.0).
        truncate : bool, optional
            Truncates the normal distribution at 2 standard deviations if True (defaults to False).
            When this flag is set, the standard deviation of the result will be less than the one specified.
        ndim : int, optional
            The number of dimensions for the output tensor (defaults to None).
            This argument is necessary if the size argument is ambiguous on the number of dimensions.
        dtype : str, optional
            The data-type for the output tensor. If not specified,
            the dtype is inferred from avg and std, but it is at least as precise as floatX.
        kwargs
            Other keyword arguments for random number generation (see uniform).

        Returns
        -------
        samples : TensorVariable
            A Theano tensor of samples randomly drawn from a normal distribution.

        """
        size = _check_size(size)
        avg = undefined_grad(as_tensor_variable(avg))
        std = undefined_grad(as_tensor_variable(std))

        if dtype is None:
            dtype = scal.upcast(config.floatX, avg.dtype, std.dtype)

        avg = tensor.cast(avg, dtype=dtype)
        std = tensor.cast(std, dtype=dtype)

        # generate even number of uniform samples
        # Do manual constant folding to lower optiimizer work.
        if isinstance(size, theano.Constant):
            n_odd_samples = size.prod(dtype='int64')
        else:
            n_odd_samples = tensor.prod(size, dtype='int64')
        n_even_samples = n_odd_samples + n_odd_samples % 2
        uniform = self.uniform((n_even_samples, ), low=0., high=1.,
                               ndim=1, dtype=dtype, nstreams=nstreams, **kwargs)

        # box-muller transform
        u1 = uniform[:n_even_samples // 2]
        u2 = uniform[n_even_samples // 2:]
        r = tensor.sqrt(-2.0 * tensor.log(u1))
        theta = np.array(2.0 * np.pi, dtype=dtype) * u2
        cos_theta, sin_theta = tensor.cos(theta), tensor.sin(theta)
        z0 = r * cos_theta
        z1 = r * sin_theta

        if truncate:
            # use valid samples
            to_fix0 = (z0 < -2.) | (z0 > 2.)
            to_fix1 = (z1 < -2.) | (z1 > 2.)
            z0_valid = z0[tensor.nonzero(~to_fix0)]
            z1_valid = z1[tensor.nonzero(~to_fix1)]

            # re-sample invalid samples
            to_fix0 = tensor.nonzero(to_fix0)[0]
            to_fix1 = tensor.nonzero(to_fix1)[0]
            n_fix_samples = to_fix0.size + to_fix1.size
            lower = tensor.constant(1. / np.e**2, dtype=dtype)
            u_fix = self.uniform((n_fix_samples, ), low=lower, high=1.,
                                 ndim=1, dtype=dtype, nstreams=nstreams, **kwargs)
            r_fix = tensor.sqrt(-2. * tensor.log(u_fix))
            z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0]
            z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1]

            # pack everything together to a useful result
            norm_samples = tensor.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed)
        else:
            norm_samples = tensor.join(0, z0, z1)
        if isinstance(n_odd_samples, theano.Variable):
            samples = norm_samples[:n_odd_samples]
        elif n_odd_samples % 2 == 1:
            samples = norm_samples[:-1]
        else:
            samples = norm_samples
        samples = tensor.reshape(samples, newshape=size, ndim=ndim)
        samples *= std
        samples += avg

        return samples
Example #56
0
def nonzero(x, return_matrix=False):
    return T.nonzero(x, return_matrix)
Example #57
0
 def predict(prob, mask):
     valid_index = T.nonzero(mask > 0)[0]
     prob = prob[valid_index]
     word_index = T.zeros((batch_size,), dtype='int32')
     word_index = T.set_subtensor(word_index[valid_index], T.argmax(prob, axis=1))  # +1?
     return word_index
Example #58
0
 def loss_of_time(prob, y, mask):
     valid_index = T.nonzero(mask > 0)[0]
     # FIXME: why y log twice ?
     loss = -T.sum(T.log(prob[valid_index, y[valid_index]]))
     return loss