Beispiel #1
0
def model(y1, s0, s1):
    if is_theano(y1, s0, s1):
        math = tt
    else:
        math = np

    # Compute the background component
    # TODO: This step can be sped up...
    A = math.dot(
        math.reshape(s0, (-1, 1)),
        math.reshape(
            math.concatenate(([1.0], math.zeros_like(y1)), axis=0), (1, -1)
        ),
    )
    a = math.reshape(math.transpose(A), (-1,))
    if math == tt:
        M0 = math.reshape(ts.dot(D, a), (M, -1))
    else:
        M0 = math.reshape(D.dot(a), (M, -1))

    # Compute the spot component
    A = math.dot(
        math.reshape(s1, (-1, 1)),
        math.reshape(math.concatenate(([1.0], y1), axis=0), (1, -1)),
    )
    a = math.reshape(math.transpose(A), (-1,))
    if math == tt:
        M1 = math.reshape(ts.dot(D, a), (M, -1))
    else:
        M1 = math.reshape(D.dot(a), (M, -1))

    # Remove the baseline
    b = math.reshape(2.0 + math.dot(B1, y1), (M, -1))

    return (M0 + M1) / b
Beispiel #2
0
 def __init__(self, rng, P_input, L2_input, **kwargs):
     #symbol declaration, initialization and definition
     x_1_tm1, x_t = (\
             sparse.csr_matrix("x_1_tm1", dtype=theano.config.floatX),\
             sparse.csr_matrix("x_t",dtype=theano.config.floatX)\
         )\
         if P_input is None else P_input[:2]
     
     #elements of history
     shape = kwargs.get("shape")
     if shape is not None:
         dict_size = shape[0]
         if len(shape) <= 1:
             del shape["shape"]
         else:
             shape["shape"] = shape["shape"][1:]
     else:
         dict_size = (16,1,32,32)
     D_1_tm1 = theano.shared(rng.normal(size=dict_size).astype(theano.config.floatX))        
     Dx_1_tm1 = sparse.dot(x_1_tm1, D_1_tm1)#array access=dot operation      
     super(SequenceCNN, self).__init__(rng=rng, inputsymbol=Dx_1_tm1, **kwargs)#attaches new elements into the fgraph
     self.L2_output_1_tm1 = self.L2_output
     
     #elements of current time
     D_t = theano.shared(rng.normal(size=dict_size).astype(theano.config.floatX))        
     Dx_t = sparse.dot(x_t, D_t)#array access=dot operation
     self.L2_output_t = theano.clone(self.L2_output_1_tm1, replace={Dx_1_tm1:Dx_t})
     
     #element prepartion for model building
     self.P_input = (x_1_tm1,x_t)
     self.params += [D_1_tm1, D_t]
     self.L2_output = self.L2_output_1_tm1*self.L2_output_t
Beispiel #3
0
    def u(self, value):
        value = np.atleast_1d(value)
        assert (len(value.shape) == 1
                ), "Wavelength-dependent limb darkening not yet supported."
        self._u = value

        # Did the degree of limb darkening change?
        if len(self._u) != self._udeg:
            self._udeg = len(self._u)
            # Force the re-instantiation of the internal map
            self.ydeg = self._ydeg

        if self._udeg > 0:

            # Set the coeffs
            self._map[1:] = self._u

            # Compute the limb darkening operator
            F = self._map.ops.F(
                tt.as_tensor_variable(np.append([-1.0], self._u)),
                tt.as_tensor_variable([np.pi]),
            )
            self._L = ts.dot(ts.dot(self._map.ops.A1Inv, F),
                             self._map.ops.A1).eval()
            self._L = csr_matrix(self._L)
Beispiel #4
0
def create_TrainFunc_tranPES(simfn, embeddings,  marge=0.5, alpha=1., beta=1.):

    # parse the embedding data
    embedding = embeddings[0] # D x N matrix
    lembedding = embeddings[1]

    # declare the symbolic variables for training triples
    hp = S.csr_matrix('head positive') # N x batchsize matrix
    rp = S.csr_matrix('relation')
    tp = S.csr_matrix('tail positive')

    hn = S.csr_matrix('head negative')
    tn = S.csr_matrix('tail negative')

    lemb = T.scalar('embedding learning rate')
    lremb = T.scalar('relation learning rate')

    subtensorE = T.ivector('batch entities set')
    subtensorR = T.ivector('batch link set')

    # Generate the training positive and negative triples
    hpmat = S.dot(embedding.E, hp).T #  batchsize x D dense matrix
    rpmat = S.dot(lembedding.E, rp).T
    tpmat = S.dot(embedding.E, tp).T

    hnmat = S.dot(embedding.E, hn).T
    tnmat = S.dot(embedding.E, tn).T

    # calculate the score
    pos = tranPES3(simfn, T.concatenate([hpmat, tpmat], axis=1).reshape((hpmat.shape[0], 2, hpmat.shape[1])).dimshuffle(0, 2, 1), hpmat, rpmat, tpmat)


    negh = tranPES3(simfn, T.concatenate([hnmat, tpmat], axis=1).reshape((hnmat.shape[0], 2, hnmat.shape[1])).dimshuffle(0, 2, 1), hnmat, rpmat, tpmat)
    negt = tranPES3(simfn, T.concatenate([hpmat, tnmat], axis=1).reshape((hpmat.shape[0], 2, hpmat.shape[1])).dimshuffle(0, 2, 1), hpmat, rpmat, tnmat)

    costh, outh = margeCost(pos, negh, marge)
    costt, outt = margeCost(pos, negt, marge)

    embreg = regEmb(embedding, subtensorE, alpha)
    lembreg = regLink(lembedding, subtensorR, beta)
    

    cost = costh + costt + embreg[0] + lembreg
    out = T.concatenate([outh, outt])
    outc = embreg[1]

    # list of inputs to the function
    list_in = [lemb, lremb, hp, rp, tp, hn, tn, subtensorE, subtensorR]

    # updating the embeddings using gradient descend
    emb_grad = T.grad(cost, embedding.E)
    New_embedding = embedding.E - lemb*emb_grad

    remb_grad = T.grad(cost, lembedding.E)
    New_rembedding = lembedding.E - lremb * remb_grad

    updates = OrderedDict({embedding.E: New_embedding, lembedding.E: New_rembedding})

    return theano.function(list_in, [cost, T.mean(out), T.mean(outc), embreg[0], lembreg],
                          updates=updates, on_unused_input='ignore')
Beispiel #5
0
def create_TrainFunc_tranPES(simfn, embeddings,  marge=0.5, alpha=1., beta=1.):

    # parse the embedding data
    embedding = embeddings[0] # D x N matrix
    lembedding = embeddings[1]

    # declare the symbolic variables for training triples
    hp = S.csr_matrix('head positive') # N x batchsize matrix
    rp = S.csr_matrix('relation')
    tp = S.csr_matrix('tail positive')

    hn = S.csr_matrix('head negative')
    tn = S.csr_matrix('tail negative')

    lemb = T.scalar('embedding learning rate')
    lremb = T.scalar('relation learning rate')

    subtensorE = T.ivector('batch entities set')
    subtensorR = T.ivector('batch link set')

    # Generate the training positive and negative triples
    hpmat = S.dot(embedding.E, hp).T #  batchsize x D dense matrix
    rpmat = S.dot(lembedding.E, rp).T
    tpmat = S.dot(embedding.E, tp).T

    hnmat = S.dot(embedding.E, hn).T
    tnmat = S.dot(embedding.E, tn).T

    # calculate the score
    pos = tranPES3(simfn, T.concatenate([hpmat, tpmat], axis=1).reshape((hpmat.shape[0], 2, hpmat.shape[1])).dimshuffle(0, 2, 1), hpmat, rpmat, tpmat)


    negh = tranPES3(simfn, T.concatenate([hnmat, tpmat], axis=1).reshape((hnmat.shape[0], 2, hnmat.shape[1])).dimshuffle(0, 2, 1), hnmat, rpmat, tpmat)
    negt = tranPES3(simfn, T.concatenate([hpmat, tnmat], axis=1).reshape((hpmat.shape[0], 2, hpmat.shape[1])).dimshuffle(0, 2, 1), hpmat, rpmat, tnmat)

    costh, outh = margeCost(pos, negh, marge)
    costt, outt = margeCost(pos, negt, marge)

    embreg = regEmb(embedding, subtensorE, alpha)
    lembreg = regLink(lembedding, subtensorR, beta)
    

    cost = costh + costt + embreg[0] + lembreg
    out = T.concatenate([outh, outt])
    outc = embreg[1]

    # list of inputs to the function
    list_in = [lemb, lremb, hp, rp, tp, hn, tn, subtensorE, subtensorR]

    # updating the embeddings using gradient descend
    emb_grad = T.grad(cost, embedding.E)
    New_embedding = embedding.E - lemb*emb_grad

    remb_grad = T.grad(cost, lembedding.E)
    New_rembedding = lembedding.E - lremb * remb_grad

    updates = OrderedDict({embedding.E: New_embedding, lembedding.E: New_rembedding})

    return theano.function(list_in, [cost, T.mean(out), T.mean(outc), embreg[0], lembreg],
                          updates=updates, on_unused_input='ignore')
    def fprop(self, state_below, add_noise=True):
        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)
        
        self.x = state_below
        
        # linear part
        if isinstance(self.x, S.SparseVariable):
            z = S.dot(self.x,self.W[0]) + self.b[0]
        else:
            z = T.dot(self.x,self.W[0]) + self.b[0]
        
        self.z = self.activate(z, self.expert_activation)
        
        # first layer non-linear part
        if isinstance(self.x, S.SparseVariable):
            h = S.dot(self.x,self.W[1]) + self.b[1]
        else:
            h = T.dot(self.x,self.W[1]) + self.b[1]
        
        # activate hidden units of non-linear part
        self.h = self.activate(h, self.hidden_activation)
            
        noise = 0.
        if add_noise:
            rng = MRG_RandomStreams(self.mlp.rng.randint(2**15))
            noise = rng.normal(size = self.z.shape, 
                                    std=self.noise_stdev ,
                                    dtype=self.z.type.dtype) 
        
        # second layer non-linear part
        self.a = T.dot(self.h,self.W[2]) + self.b[2] + noise
        
        # activate non-linear part
        self.m_mean = self.activate(self.a, self.gater_activation)
        
        # how many are over 0:
        self.effective_sparsity = T.cast(T.gt(self.m_mean, 0), 
                                         theano.config.floatX).mean()
           
        # mix output of linear part with output of non-linear part
        self.p = self.m_mean * self.z
        
        if self.layer_name is not None:
            self.z.name = self.layer_name + '_z'
            self.h.name = self.layer_name + '_h'
            self.a.name = self.layer_name + '_a'
            self.m_mean.name = self.layer_name + '_m_mean'
            self.p.name = self.layer_name + '_p'
        
        return self.p
Beispiel #7
0
    def get_output_for(self, input, **kwargs):
        if not isinstance(input, (S.SparseVariable, S.SparseConstant,
                                  S.sharedvar.SparseTensorSharedVariable)):
            raise ValueError("Input for this layer must be sparse")
        
        activation = S.dot(input, self.W)
        #do the convolution
        activation = S.dot(self.H, activation)

        if self.b is not None:
            activation = activation + self.b.dimshuffle('x', 0)
        return self.nonlinearity(activation)
Beispiel #8
0
def ForwardFn(fnsim, embeddings, leftop, rightop, marge=1.0):
    """
    This function returns a theano function to perform a forward step,
    contrasting couples of positive and negative triplets. members are given
    as sparse matrices. For one positive triplet there is one negative
    triplet.

    :param fnsim: similarity function (on theano variables).
    :param embeddings: an embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    :param marge: marge for the cost function.

    :note: this is useful for W_SABIE [Weston et al., IJCAI 2011]
    """
    embedding, relationl, relationr = parse_embeddings(embeddings)

    # inputs
    inpr = S.csr_matrix()
    inpl = S.csr_matrix()
    inpo = S.csr_matrix()
    inpln = S.csr_matrix()
    inprn = S.csr_matrix()
    inpon = S.csr_matrix()

    # graph
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T
    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T
    lhsn = S.dot(embedding.E, inpln).T
    rhsn = S.dot(embedding.E, inprn).T
    relln = S.dot(relationl.E, inpon).T
    relrn = S.dot(relationr.E, inpon).T
    simi = fnsim(leftop(lhs, rell), rightop(rhs, relr))
    simin = fnsim(leftop(lhsn, relln), rightop(rhsn, relrn))
    cost, out = margincost(simi, simin, marge)
    """
    Theano function inputs.
    :input inpl: sparse csr matrix representing the indexes of the positive
                 triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inpr: sparse csr matrix representing the indexes of the positive
                 triplet 'right' member, shape=(#examples,N [Embeddings]).
    :input inpo: sparse csr matrix representing the indexes of the positive
                 triplet relation member, shape=(#examples,N [Embeddings]).
    :input inpln: sparse csr matrix representing the indexes of the negative
                  triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inprn: sparse csr matrix representing the indexes of the negative
                  triplet 'right' member, shape=(#examples,N [Embeddings]).
    :input inpon: sparse csr matrix representing the indexes of the negative
                  triplet relation member, shape=(#examples,N [Embeddings]).

    Theano function output.
    :output out: binary vector representing when the margin is violated, i.e.
                 when an update occurs.
    """
    return theano.function([inpl, inpr, inpo,
                           inpln, inprn, inpon], [out],
                           on_unused_input='ignore')
Beispiel #9
0
def SimFn(fnsim, embeddings, leftop, rightop):
    """
    This function returns a Theano function to measure the similarity score
    for sparse matrices inputs.

    :param fnsim: similarity function (on Theano variables).
    :param embeddings: an Embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    """
    embedding, relationl, relationr = parse_embeddings(embeddings)

    # Inputs
    inpr = S.csr_matrix('inpr')
    inpl = S.csr_matrix('inpl')
    inpo = S.csr_matrix('inpo')
    # Graph
    #what is T? Are they tensor? lhs, rhs,rell,relr 
    # we just created inpl and inplr inpo . what does it mean to calculate dot product?
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T
    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T
    
    # what is this?
    #ref:
    #leftop = LayerMat('lin', state.ndim, state.nhid)
    #rightop = LayerMat('lin', state.ndim, state.nhid)
    # on call 
    #ry = y.reshape((y.shape[0], self.n_inp, self.n_out))
    #rx = x.reshape((x.shape[0], x.shape[1], 1))
    #return self.act((rx * ry).sum(1))
    
    simi = fnsim(leftop(lhs, rell), rightop(rhs, relr))
    """
    Theano function inputs.
    :input inpl: sparse csr matrix (representing the indexes of the 'left'
                    entities), shape=(#examples, N [Embeddings]).
    :input inpr: sparse csr matrix (representing the indexes of the 'right'
                    entities), shape=(#examples, N [Embeddings]).
    :input inpo: sparse csr matrix (representing the indexes of the
                    relation member), shape=(#examples, N [Embeddings]).

    Theano function output
    :output simi: matrix of score values.
    """
    return theano.function([inpl, inpr, inpo], [simi],
            on_unused_input='ignore')
Beispiel #10
0
def log_first_stage_indep_normal_priors(theta_tilde,
                                        num_variances,
                                        num_betas,
                                        filtered_rows_to_alts):
    """
    Calculates the log of the first stage joint density of error terms
    conditional on the alternative specific variances. Note that the
    error terms are assumed to be INDEPENDENTLY normally distributed
    with mean zero, conditional on the alternative specific variances.
    The returned value is correct up to an additive constant (which is
    comprised of arbitrary constants as well as the log-marginal evidence).

    Parameters
    ----------
    error_terms : 1D ndarray of floats.
        The error terms we want to calculate the log of the joint density of.
    alt_variances : 1D ndarray of positive floats.
        Each value should represent one alternative speciific variance.
    filtered_rows_to_alts : 2D sparse array of zeros and ones.
        Each element (i, j) should denote whether row i corresponds to
        alternative j or not using one's and zero's reespectively.

    Returns
    -------
    log_first_stage : scalar.
        The log of the joint density of the error terms, up to an additive
        constant that contains the log of the normalization constant and the
        log of the other arbitrary contants from the multivariate normal
        distribution.

    References
    ----------
    Gelman, Andrew, et al. (2014). Bayesian Data Analysis, 3rd Ed. Taylor
        & Francis Group. pp. 576-578.
    """
    # Get the position in theta_tilde, at which the betas start
    beta_neg_idx = -1 * num_betas
    # Split theta_tilde into its various components
    alt_variances = tt.exp(theta_tilde[:num_variances])
    error_terms = theta_tilde[num_variances:beta_neg_idx]

    # If the error terms are conditionally independent given the
    # alternative specific variances, then the covariance matrix
    # for the joint distribution of errors is diagonal and the inverse
    # of a diagonal matrix is a diagonal matrix with the inverses on
    # the diagonal. The inverses are calculated below
    inverse_variances = 1.0 / alt_variances

    # Map the inverse variances to their corresponding rows of error terms
    long_inverse_variances =\
        sparse.dot(filtered_rows_to_alts, inverse_variances)

    squared_errors = error_terms**2

    # Below, we implement -0.5 * (theta - mu)^T Sigma^{-1} (theta - mu) for
    # the specific case of a diagonal Sigma, Mu = 0, and theta = error_terms
    log_first_stage =\
        -0.5 * tt.sum(tt.mul(long_inverse_variances, squared_errors))

    return log_first_stage
Beispiel #11
0
	def get_train_function(self):
		# specify the computational graph
		target = T.matrix('target')
		weight = theano.shared(np.random.randn(len(self.feature_map), len(self.label_map)), name='weight')
		feat_mat = sparse.csr_matrix(name='feat_mat')
		mask_mat = sparse.csr_matrix(name='mask_mat')
		sum_pred = sparse.dot( mask_mat, T.nnet.softmax( sparse.dot(feat_mat, weight) ) )
		pred = sum_pred / sum_pred.sum(axis=1).reshape((sum_pred.shape[0], 1))
		objective = T.nnet.categorical_crossentropy(pred, target).sum() + self.param.l2_regularization * (weight ** 2).sum()
		grad_weight = T.grad(objective, weight)

		# print 'Compiling function ...'
		# compile the function
		train = theano.function(inputs = [feat_mat, mask_mat, target], outputs = [objective, weight], updates=[(weight, weight - 0.1*grad_weight)] )

		return train
Beispiel #12
0
def optimize_func(transform_matrix):
    t0 = time.time()
    M = S.csr_matrix(dtype=theano.config.floatX)
    N = S.csr_matrix(dtype=theano.config.floatX)
    ON = S.csr_matrix(dtype=theano.config.floatX)
    lr = T.scalar('learning rate', dtype=theano.config.floatX)
    # print M, N, ON, lr

    TN = S.dot(transform_matrix, N)
    D = T.sqr(M - TN)
    # PD = S.sqr(N-ON)
    # PD = T.sqrt(S.sp_sum(PD, 1))
    # TPD = T.sqr(TN - ON)
    # TPD = T.sqrt(TPD.sum(1))

    # D2 = T.sqr(PD-TPD)
    cost = T.sum(D)  #+ T.sum(D2)

    list_in = [lr, M, N, ON]
    gradient = T.grad(cost, transform_matrix)
    new_transform_matrix = transform_matrix - lr * gradient
    t1 = time.time()
    print 'opt func cost is ' + str(t1 - t0)
    return theano.function(list_in,
                           cost,
                           updates=[(transform_matrix, new_transform_matrix)],
                           on_unused_input='ignore')
Beispiel #13
0
 def matmul(self, a, b, transpose_a=False, transpose_b=False, a_is_sparse=False, b_is_sparse=False, name=None):
     if transpose_a:
         a = a.T
     if transpose_b:
         b = b.T
     if a_is_sparse or b_is_sparse:
         return sparse.dot(a, b)
     return T.dot(a, b)
Beispiel #14
0
    def get_output_for(self, input, **kwargs):
        target_indices = kwargs.get('target_indices') 
        activation = T.dot(input, self.W)
        #do the convolution
        activation = S.dot(self.H, activation)

        if self.b is not None:
            activation = activation + self.b.dimshuffle('x', 0)
        activation = activation[target_indices, :]
        return self.nonlinearity(activation)
Beispiel #15
0
    def _get_diagonal_term(self, X_left, X_right, diag_init):
        diag = tn.shared(value=diag_init, name='diag')

        if _tn_is_sparse(X_left) or _tn_is_sparse(X_right):
            XlXr = tsp.mul(X_left, X_right)
            y_pred = tsp.dot(XlXr, diag)
        else:
            XlXr = T.mul(X_left, X_right)
            y_pred = T.dot(XlXr, diag)

        return y_pred, [diag]
Beispiel #16
0
    def _get_diagonal_term(self, X_left, X_right, diag_init):
        diag = tn.shared(value=diag_init, name='diag')

        if _tn_is_sparse(X_left) or _tn_is_sparse(X_right):
            XlXr = tsp.mul(X_left, X_right)
            y_pred = tsp.dot(XlXr, diag)
        else:
            XlXr = T.mul(X_left, X_right)
            y_pred = T.dot(XlXr, diag)

        return y_pred, [diag]
 def labelFunct(self, batchSize, xFeats):
     #  xFeats [l, h]
     # l = batchSize
     # self.W = theano.printing.Print("W ") (self.W)
     # self.Wb = theano.printing.Print("Wb ") (self.Wb)
     scores = sparse.dot(xFeats, self.W) + self.Wb  # [l, h] x [h, r] => [l, r]
     relationProbs = T.nnet.softmax(scores)
     # scores = theano.printing.Print("scores ") (scores)
     labels = T.argmax(scores, axis=1)  #  [l, r] => [l]
     # labels = theano.printing.Print("labels ") (labels)
     return (labels, relationProbs)
 def labelFunct(self, batchSize, xFeats):
     #  xFeats [l, h]
     # l = batchSize
     # self.W = theano.printing.Print("W ") (self.W)
     # self.Wb = theano.printing.Print("Wb ") (self.Wb)
     scores = sparse.dot(xFeats,
                         self.W) + self.Wb  # [l, h] x [h, r] => [l, r]
     relationProbs = T.nnet.softmax(scores)
     # scores = theano.printing.Print("scores ") (scores)
     labels = T.argmax(scores, axis=1)  #  [l, r] => [l]
     # labels = theano.printing.Print("labels ") (labels)
     return (labels, relationProbs)
Beispiel #19
0
def SimFn(fnsim, embeddings, leftop, rightop, op=''):
    """
    This function returns a Theano function to measure the similarity score for sparse matrices inputs.

    :param fnsim: similarity function (on Theano variables).
    :param embeddings: an Embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    """

    embedding, relationl, relationr = parse_embeddings(embeddings)

    # Inputs

    inpr, inpl, inpo = S.csr_matrix('inpr'), S.csr_matrix(
        'inpl'), S.csr_matrix('inpo')

    # Graph
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T

    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T

    lop, rop = leftop(lhs, rell), rightop(rhs, relr)

    simi = fnsim(lop, rop)
    """
    Theano function inputs.
    :input inpl: sparse csr matrix (representing the indexes of the 'left' entities), shape=(#examples, N [Embeddings]).
    :input inpr: sparse csr matrix (representing the indexes of the 'right' entities), shape=(#examples, N [Embeddings]).
    :input inpo: sparse csr matrix (representing the indexes of the relation member), shape=(#examples, N [Embeddings]).

    Theano function output
    :output simi: matrix of score values.
    """
    return theano.function([inpl, inpr, inpo], [simi],
                           on_unused_input='ignore')
Beispiel #20
0
	def get_train_function(self):
		# specify the computational graph
		weight = theano.shared(np.random.randn(len(self.feature_map), len(self.label_map)), name='weight')
		# weight = theano.shared(np.zeros((len(self.feature_map), len(self.label_map))), name='weight')
		feat_mat = sparse.csr_matrix(name='feat_mat')

		f_target = T.matrix('f_target')
		f_mask_mat = sparse.csr_matrix(name='f_mask_mat')
		f_sum_pred = sparse.dot( f_mask_mat, T.nnet.softmax( sparse.dot(feat_mat, weight) ) )
		f_pred = f_sum_pred / f_sum_pred.sum(axis=1).reshape((f_sum_pred.shape[0], 1))

		i_target = T.matrix('i_target')
		i_mask_mat = sparse.csr_matrix(name='l_mask_mat')
		i_pred = sparse.dot( i_mask_mat, T.nnet.softmax( sparse.dot(feat_mat, weight) ) )

		objective = self.param.feature_lambda * T.nnet.categorical_crossentropy(f_pred, f_target).sum() + T.nnet.categorical_crossentropy(i_pred, i_target).sum() + self.param.l2_lambda * (weight ** 2).sum() / 2
		grad_weight = T.grad(objective, weight)

		# print 'Compiling function ...'
		# compile the function
		train = theano.function(inputs = [feat_mat, f_mask_mat, f_target, i_mask_mat, i_target], outputs = [objective, weight], updates=[(weight, weight - 0.1*grad_weight)] )

		return train
Beispiel #21
0
 def __init__(self, rng, P_input, L2_input=None, **kwargs):
     #1.symbol declaration, initialization and definition
     I = sparse.csr_matrix("I") if P_input is None else P_input
     shape = kwargs.get("shape") or [(16,1,32,32), (4,16,16,2,2), (4,4,4,2,2)]
     dict_size, kwargs["shape"] = shape[0], shape[1:]
     D = theano.shared(\
         rng.uniform(low=-1,high=1,size=dict_size).astype(theano.config.floatX)\
     )
     DI = sparse.dot(I, D)#array access=dot operation
     
     #2.attaches I and D into the fgraph
     super(SparseCNN, self).__init__(rng=rng, P_input=DI, **kwargs)
     self.params += [D,]
     self.P_input = I#take I as input for the sparseCNN
Beispiel #22
0
def SimFn(fnsim, embeddings, leftop, rightop, op=''):
    """
    This function returns a Theano function to measure the similarity score for sparse matrices inputs.

    :param fnsim: similarity function (on Theano variables).
    :param embeddings: an Embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    """

    embedding, relationl, relationr = parse_embeddings(embeddings)

    # Inputs

    inpr, inpl, inpo = S.csr_matrix('inpr'), S.csr_matrix('inpl'), S.csr_matrix('inpo')

    # Graph
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T

    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T

    lop, rop = leftop(lhs, rell), rightop(rhs, relr)

    simi = fnsim(lop, rop)

    """
    Theano function inputs.
    :input inpl: sparse csr matrix (representing the indexes of the 'left' entities), shape=(#examples, N [Embeddings]).
    :input inpr: sparse csr matrix (representing the indexes of the 'right' entities), shape=(#examples, N [Embeddings]).
    :input inpo: sparse csr matrix (representing the indexes of the relation member), shape=(#examples, N [Embeddings]).

    Theano function output
    :output simi: matrix of score values.
    """
    return theano.function([inpl, inpr, inpo], [simi], on_unused_input='ignore')
Beispiel #23
0
Datei: smf.py Projekt: rn5l/rsc18
 def _generate_train_model_batch_function(self):
     
     #s = T.matrix('s', dtype=self.floatX)
     s = S.csr_matrix('s', dtype=self.floatX)
     
     #u = T.vector('u', dtype=self.intX)
     i = T.vector('i', dtype=self.intX)
     y = T.vector('y', dtype=self.intX)
     #items = T.vector('items', dtype=self.intX)
     
     Sit = self.S
     sit = s.T
     
     #Uu = self.U[u]
     
     Iy = self.I[y]
     
     BSy = self.BS[y]
     #BUy = self.BU[y]
     BIy = self.BI[y]
     
     I1i = self.I1[i]
     I2y = self.I2[y]
     
     #predU =  T.dot( Iy, Uu.T ).T + BUy.flatten()
     
     se = S.dot( Sit.T, sit )
     #se = T.dot( Sit.T, sit )
     predS =  T.dot( Iy, se ).T + BSy.flatten()
     
     predI = T.dot( I1i, I2y.T ) + BIy.flatten()
     
     pred = predS + predI #+ predU
     pred = getattr(self, self.activation )( pred )
     
     cost = getattr(self, self.objective )( pred, y )
     
     
     param_list = [self.S]
     fullparam_list = [self.I,self.I1,self.I2,self.BI,self.BS] #+ [self.U]
     subparam_list = [Iy,I1i,I2y,BIy,BSy] #+ [Uu]
     subparam_idx = [y,i,y,y,y] #+ [u]
     updates = self.descent( cost, param_list, fullparam_list, subparam_list, subparam_idx, self.learning_rate, momentum=self.momentum )
     #updates = getattr(self, self.learn)(cost, [self.U,self.S,self.I,self.IC,self.BI,self.BS], self.learning_rate)
     #updates = getattr(self, self.learn)(cost, , ,, self.learning_rate, momentum=self.momentum)
     
     #self.train_model_batch = theano.function(inputs=[s, i, u, y, items], outputs=cost, updates=updates  )
     inp = [s, i, y] #+ [u]
     self.train_model_batch = theano.function(inputs=inp, outputs=cost, updates=updates  )
    def compRelationProbsFunc(self, xFeats):
        #  xFeats [l, h] matrix
        # xFeats = theano.printing.Print("xFeats")(xFeats)
        # self.Wb = theano.printing.Print("Wb ") (self.Wb)
        # self.W = theano.printing.Print("W ") (self.W)
        # scores of each role by a classifier
        relationScores = sparse.dot(xFeats, self.W) + self.Wb   # [l, h] x [h, r] => [l, r]
        #relationScores = theano.printing.Print("relationScores=")(relationScores)

        # convert it to probabilities
        relationProbs = T.nnet.softmax(relationScores)
        #relationProbs = theano.printing.Print("relationProbs = ")(relationProbs)


        return relationProbs  # [l, r]
    def compRelationProbsFunc(self, xFeats):
        #  xFeats [l, h] matrix
        # xFeats = theano.printing.Print("xFeats")(xFeats)
        # self.Wb = theano.printing.Print("Wb ") (self.Wb)
        # self.W = theano.printing.Print("W ") (self.W)
        # scores of each role by a classifier
        relationScores = sparse.dot(
            xFeats, self.W) + self.Wb  # [l, h] x [h, r] => [l, r]
        #relationScores = theano.printing.Print("relationScores=")(relationScores)

        # convert it to probabilities
        relationProbs = T.nnet.softmax(relationScores)
        #relationProbs = theano.printing.Print("relationProbs = ")(relationProbs)

        return relationProbs  # [l, r]
Beispiel #26
0
    def get_output_for(self, input, **kwargs):
        if input.ndim > 2:
            # if the input has more than two dimensions, flatten it into a
            # batch of feature vectors.
            input = input.flatten(2)

        # According to pull-request 595 from eduardo4jesus
        # Though it might be the case, the activation layer will remain
        # dense since Weights represent dense matrix ( Kinda makes sense)

        if (type(input) == S.SparseVariable) or (type(input) == S.SparseConstant):
            activation = S.dot(input, self.W)
        else:
            activation = T.dot(input, self.W)
        if self.b is not None:
            activation = activation + self.b.dimshuffle('x', 0)
        return self.nonlinearity(activation)
Beispiel #27
0
 def __init__(self, rng, x, topic_num=100):
     
     #input
     L2_input = sparse.csr_matrix("x",dtype=theano.config.floatX)
     #params
     vocab_size = x.shape[1]
     mu, sigma = x.data.mean(), x.data.var()**0.5
     
     rng = numpy.random.RandomState(numpy.random.randint(2**32-1)) if rng is None else rng
     self.L2_w = theano.shared(\
         numpy.asarray(\
             rng.normal(loc=mu,scale=sigma,size=(vocab_size, topic_num)),\
             dtype=theano.config.floatX\
         ),\
         borrow=True\
     )
     self.L2_b = theano.shared(numpy.zeros(topic_num,dtype=theano.config.floatX), borrow=True)
     self.params = [self.L2_w, self.L2_b]
     
     #stick-breaking:sticks->orthgonal sticks
     L2_stick = sparse.dot(L2_input,self.L2_w)+self.L2_b-\
         0.5*(L2_input.size/vocab_size*tensor.sum(self.L2_w**2,0)+self.L2_b**2)  
     zero_space = tensor.zeros((L2_input.shape[0],1),dtype=theano.config.floatX)
     L2_orth_stick = tensor.join(1, L2_stick, zero_space)\
         - tensor.join(1, zero_space, tensor.cumsum(L2_stick,1))
     Pasterik_orth_stick = tensor.log(1 + tensor.exp(L2_orth_stick))      
     #training model definition
     Likelihood = tensor.mean(Pasterik_orth_stick)
     grads = theano.grad(Likelihood, self.params)#gradient w.r.t params
     eta = tensor.scalar("eta")
     updates = [(param, param+eta*grad) for param, grad in zip(self.params, grads)]
     self._fit = theano.function(\
         inputs=[L2_input, eta],\
         outputs=Likelihood,\
         updates=updates\
     )
     #predict model definition
     self._predict = theano.function(\
         inputs=[L2_input],\
         outputs=tensor.argmax(L2_stick,axis=-1)\
     )
     self._codec = theano.function(\
         inputs=[L2_input],\
         outputs=L2_stick>0\
     )
Beispiel #28
0
    def __init__(self, x_in, n_in, n_out, activation=None, rng=None, seed=0):
        """
        Initialize the layer.

        Inputs:
            - x_in: a symbolic theano variable describing the input
            - n_in: dimensions the input will have
            - n_out: dimensions the output should have
            - activation: non-linear activation function applied to the output (if any)
            - seed: used to initialize the random number generator
        """
        if rng is None:
            rng = np.random.RandomState(seed)
        # initialize the weights - optimal values depend on the activation function
        if activation is None:
            W_values = rng.randn(n_in, n_out) * 0.01
        else:
            W_values = np.asarray(rng.uniform(
                low=-np.sqrt(6. / (n_in + n_out)),
                high=np.sqrt(6. / (n_in + n_out)),
                size=(n_in, n_out)),
                                  dtype=theano.config.floatX)
            if activation == T.nnet.sigmoid:
                W_values *= 4

        self.W = theano.shared(value=W_values, name='W', borrow=True)

        # initialize the biases b as a vector of n_out 0s
        self.b = theano.shared(value=np.zeros((n_out, ),
                                              dtype=theano.config.floatX),
                               name='b',
                               borrow=True)
        # compute the output
        if isinstance(x_in.type, sparse.type.SparseType):
            lin_output = sparse.dot(x_in, self.W) + self.b
        else:
            lin_output = T.dot(x_in, self.W) + self.b
        # apply the activation function (if any)
        self.output = (lin_output
                       if not activation else activation(lin_output))
        # parameters of the model
        self.params = [self.W, self.b]
Beispiel #29
0
 def __init__(self, rng, x, topic_num=100):
     #input
     L2_input = sparse.csr_matrix("x",dtype=theano.config.floatX)
     #params
     vocab_size = x.shape[1]
     mu, sigma = x.data.mean(), 2.56*x.data.var()**0.5
     
     rng = numpy.random.RandomState(numpy.random.randint(2**32-1)) if rng is None else rng
     self.L2_w = theano.shared(\
         numpy.asarray(\
             mu + (mu if mu < sigma else sigma)*rng.uniform(low=-1,high=1,size=(vocab_size, topic_num)),\
             dtype=theano.config.floatX\
         ),\
         borrow=True\
     )
     self.L2_b = theano.shared(numpy.zeros(topic_num, dtype=theano.config.floatX), borrow=True)
     
     self.params = [self.L2_w, self.L2_b]
     #output
     L2_topic = sparse.dot(L2_input,self.L2_w)+self.L2_b
             
     #difference based objective function
     Pasterik_topic = tensor.log(tensor.sum(tensor.exp(L2_topic-L2_topic.max(-1, keepdims=True)),-1))#avoiding overflow
     d_xw_w2 = tensor.mean(Pasterik_topic) -\
         0.5*(L2_input.size*tensor.mean(self.L2_w*self.L2_w)+tensor.dot(self.L2_b,self.L2_b))
     grads = theano.grad(d_xw_w2, self.params)#gradient w.r.t params
     eta = tensor.scalar("eta")
     updates = [(param, param+eta*grad) for param, grad in zip(self.params, grads)]
     #training model definition
     self._fit = theano.function(\
         inputs=[L2_input, eta],\
         outputs=d_xw_w2, \
         updates=updates\
     )
     #predict model definition
     self._predict = theano.function(\
         inputs=[L2_input],\
         outputs=tensor.argmax(L2_topic,axis=-1)\
     )       
Beispiel #30
0
    def y(self, y):
        self._y = np.array(y)
        self.ydeg = int(np.sqrt(len(y)) - 1)
        self.map_ref = starry.Map(ydeg=self.ydeg, reflected=True)
        self.map_emi = starry.Map(ydeg=self.ydeg)
        if self.ydeg > 0:
            self.map_ref[1:, :] = y[1:]
            self.map_emi[1:, :] = y[1:]
        self._A1y = ts.dot(self.map_ref.ops.A1, tt.as_tensor_variable(y))

        # Reset
        self.intensity(0.0,
                       0.0, [0.0], [0.0], [0.0], [0.0], [0.0],
                       0.0,
                       reset=True)
        self.flux([0.0], [0.0], [0.0], [0.0], [0.0], 0.0, reset=True)
        self.dfluxdro([0.0], [0.0], [0.0], [0.0], [0.0], 0.0, reset=True)
        if self.ydeg > 0:
            self.flux_emitted([0.0], [0.0], [0.0], [0.0], [0.0],
                              0.0,
                              reset=True)
            self.dfluxdro_emitted([0.0], [0.0], [0.0], [0.0], [0.0],
                                  0.0,
                                  reset=True)
Beispiel #31
0
def theano_safe_sparse_dot(X, Y):
    if _tn_is_sparse(X) or _tn_is_sparse(Y):
        return tsp.dot(X, Y)
    else:
        return T.dot(X, Y)
 def __init__(self, nodenet):
     if nodenet.sparse:
         self.propagate = theano.function([], [nodenet.w, nodenet.a], updates={nodenet.a: ST.dot(nodenet.w, nodenet.a)})
     else:
         self.propagate = theano.function([], [nodenet.w, nodenet.a], updates={nodenet.a: T.dot(nodenet.w, nodenet.a)})
Beispiel #33
0
def TrainFn1Member(fnsim, embeddings, leftop, rightop, rel=True,
                    loss=loss.hinge, loss_margin=1.0, op=None, method='SGD',
                    decay=0.999, epsilon=1e-6, max_learning_rate=None,

                    weight_L1_param_regularizer=None, weight_L2_param_regularizer=None,
                    weight_contractive_regularizer_left=None, weight_contractive_regularizer_right=None):

    embedding, relationl, relationr = parse_embeddings(embeddings)

    # Inputs
    inpr, inpl, inpo = S.csr_matrix('inpr'), S.csr_matrix('inpl'), S.csr_matrix('inpo')
    inpln, inprn = S.csr_matrix('inpln'), S.csr_matrix('inprn')

    # Learning rates for parameters and embeddings
    rate_params = T.scalar('rate_params')
    rate_embeddings = T.scalar('rate_embeddings')

    # Graph
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T

    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T

    lhsn = S.dot(embedding.E, inpln).T
    rhsn = S.dot(embedding.E, inprn).T

    simi = fnsim(leftop(lhs, rell), rightop(rhs, relr))
    # Negative 'left' member
    similn = fnsim(leftop(lhsn, rell), rightop(rhs, relr))
    # Negative 'right' member
    simirn = fnsim(leftop(lhs, rell), rightop(rhsn, relr))

    costl, outl = loss(simi, similn, margin=loss_margin)
    costr, outr = loss(simi, simirn, margin=loss_margin)

    cost, out = costl + costr, T.concatenate([outl, outr])

    # List of inputs of the function
    list_in = [rate_embeddings, rate_params, inpl, inpr, inpo, inpln, inprn]

    if rel:
        # If rel is True, we also consider a negative relation member
        inpon = S.csr_matrix()

        relln = S.dot(relationl.E, inpon).T
        relrn = S.dot(relationr.E, inpon).T

        simion = fnsim(leftop(lhs, relln), rightop(rhs, relrn))

        costo, outo = loss(simi, simion, margin=loss_margin)
        cost += costo
        out = T.concatenate([out, outo])
        list_in += [inpon]

    # <EXPERIMENTAL_CODE>
    # Should I also plug examples from corrupted triples ?
    if weight_contractive_regularizer_left is not None:
        cost = cost + (weight_contractive_regularizer_left * R.contractive_regularizer(lop, lhs))

    if weight_contractive_regularizer_right is not None:
        cost = cost + (weight_contractive_regularizer_right * R.contractive_regularizer(rop, rhs))

    for rel_param in set([relationl.E, relationr.E]):
        if weight_L1_param_regularizer is not None:
            cost = cost + (weight_L1_param_regularizer * R.L1_regularizer(rel_param))
        if weight_L2_param_regularizer is not None:
            cost = cost + (weight_L2_param_regularizer * R.L2_regularizer(rel_param))
    # </EXPERIMENTAL_CODE>

    params = leftop.params + rightop.params + (fnsim.params if hasattr(fnsim, 'params') else [])

    embeds = [embedding.E] + ([relationr.E, relationl.E] if (type(embeddings) == list) else [])

    # The function updates the implicit function arguments according to the updates.
    updates = collections.OrderedDict()

    if (method == 'SGD'):
        pass # do nothing

    elif (method == 'MOMENTUM'):
        param_previous_update_map = collections.OrderedDict()

        for param in params + embeds:
            # Allocate the previous updates
            previous_update_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_previous_update = theano.shared(value=previous_update_value, name='su_' + param.name)

            param_previous_update_map[param] = param_previous_update

    elif (method == 'ADAGRAD'):
        param_squared_gradients_map = collections.OrderedDict()

        for param in params + embeds:
            # Allocate the sums of squared gradients
            squared_gradients_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_squared_gradients = theano.shared(value=squared_gradients_value, name='sg_' + param.name)

            param_squared_gradients_map[param] = param_squared_gradients

    elif (method == 'ADADELTA'):
        param_squared_gradients_map = collections.OrderedDict()
        param_squared_updates_map = collections.OrderedDict()

        for param in params + embeds:
            # Allocate the sums of squared gradients
            squared_gradients_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_squared_gradients = theano.shared(value=squared_gradients_value, name='sg_' + param.name)
            param_squared_gradients_map[param] = param_squared_gradients

            # Allocate the sums of squared updates
            squared_updates_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_squared_updates = theano.shared(value=squared_updates_value, name='su_' + param.name)

            param_squared_updates_map[param] = param_squared_updates

    elif (method == 'RMSPROP'):
        param_squared_gradients_map = collections.OrderedDict()

        for param in params + embeds:
            # Allocate the sums of squared gradients
            squared_gradients_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_squared_gradients = theano.shared(value=squared_gradients_value, name='sg_' + param.name)

            param_squared_gradients_map[param] = param_squared_gradients

    else:
        raise ValueError('Unknown method: %s' % (method))

    # Parameter Gradients
    gradientsparams = T.grad(cost, params)

    # Embeddings gradients
    gradientsembeds = T.grad(cost, embeds)


    # Learning Rates
    rates_params = [rate_params for i in range(len(params))]

    # In TransE etc. the rate for predicates' embeddings (that do not get normalized) is rate_params, not rate_embeddings
    rates_embeddings = [rate_embeddings, rate_params, rate_params] if len(embeds) > 1 else [rate_embeddings] # [rate_embeddings for i in range(len(embeds))]

    for param, gradient, rate in zip(params + embeds, gradientsparams + gradientsembeds, rates_params + rates_embeddings):

        if (method == 'SGD'): # SGD
            U.sgd(param, rate, gradient, updates)

        elif (method == 'MOMENTUM'): # SGD+MOMENTUM
            param_previous_update = param_previous_update_map[param]
            U.momentum(param, rate, decay, gradient, updates, param_previous_update)

        elif (method == 'ADAGRAD'): # ADAGRAD
            param_squared_gradients = param_squared_gradients_map[param]
            U.adagrad(param, rate, epsilon, gradient, updates, param_squared_gradients)

        elif (method == 'ADADELTA'): # ADADELTA
            param_squared_gradients = param_squared_gradients_map[param]
            param_squared_updates = param_squared_updates_map[param]
            U.adadelta(param, rate, decay, epsilon, gradient, updates, param_squared_gradients, param_squared_updates)

        elif (method == 'RMSPROP'): # RMSPROP
            param_squared_gradients = param_squared_gradients_map[param]
            U.rmsprop(param, rate, decay, max_learning_rate, epsilon, gradient, updates, param_squared_gradients)

        else:
            raise ValueError('Unknown method: %s' % (method))

    return theano.function(list_in, [T.mean(cost), T.mean(out)], updates=updates, on_unused_input='ignore')
Beispiel #34
0
def TrainFn1Member(fnsim, embeddings, leftop, rightop, marge=1.0, rel=True):
    """
    This function returns a theano function to perform a training iteration,
    contrasting positive and negative triplets. members are given as sparse
    matrices. For one positive triplet there are two or three (if rel == True)
    negative triplets. To create a negative triplet we replace only one member
    at a time.

    :param fnsim: similarity function (on theano variables).
    :param embeddings: an embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    :param marge: marge for the cost function.
    :param rel: boolean, if true we also contrast w.r.t. a negative relation
                member.
    """
    embedding, relationl, relationr = parse_embeddings(embeddings)

    # Inputs
    inpr = S.csr_matrix()
    inpl = S.csr_matrix()
    inpo = S.csr_matrix()
    inpln = S.csr_matrix()
    inprn = S.csr_matrix()
    lrparams = T.scalar('lrparams')
    lrembeddings = T.scalar('lrembeddings')

    # Graph
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T
    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T
    lhsn = S.dot(embedding.E, inpln).T
    rhsn = S.dot(embedding.E, inprn).T
    simi = fnsim(leftop(lhs, rell), rightop(rhs, relr))
    # Negative 'left' member
    similn = fnsim(leftop(lhsn, rell), rightop(rhs, relr))
    # Negative 'right' member
    simirn = fnsim(leftop(lhs, rell), rightop(rhsn, relr))
    costl, outl = margincost(simi, similn, marge)
    costr, outr = margincost(simi, simirn, marge)
    cost = costl + costr
    out = T.concatenate([outl, outr])
    # List of inputs of the function
    list_in = [lrembeddings, lrparams,
            inpl, inpr, inpo, inpln, inprn]
    if rel:
        # If rel is True, we also consider a negative relation member
        inpon = S.csr_matrix()
        relln = S.dot(relationl.E, inpon).T
        relrn = S.dot(relationr.E, inpon).T
        simion = fnsim(leftop(lhs, relln), rightop(rhs, relrn))
        costo, outo = margincost(simi, simion, marge)
        cost += costo
        out = T.concatenate([out, outo])
        list_in += [inpon]

    if hasattr(fnsim, 'params'):
        # If the similarity function has some parameters, we update them too.
        gradientsparams = T.grad(cost,
            leftop.params + rightop.params + fnsim.params)
        updates = OrderedDict((i, i - lrparams * j) for i, j in zip(
            leftop.params + rightop.params + fnsim.params, gradientsparams))
    else:
        gradientsparams = T.grad(cost, leftop.params + rightop.params)
        updates = OrderedDict((i, i - lrparams * j) for i, j in zip(
            leftop.params + rightop.params, gradientsparams))
    gradients_embedding = T.grad(cost, embedding.E)
    newE = embedding.E - lrembeddings * gradients_embedding
    updates.update({embedding.E: newE})
    if type(embeddings) == list:
        # If there are different embeddings for the relation member.
        gradients_embedding = T.grad(cost, relationl.E)
        newE = relationl.E - lrparams * gradients_embedding
        updates.update({relationl.E: newE})
        gradients_embedding = T.grad(cost, relationr.E)
        newE = relationr.E - lrparams * gradients_embedding
        updates.update({relationr.E: newE})
    """
    Theano function inputs.
    :input lrembeddings: learning rate for the embeddings.
    :input lrparams: learning rate for the parameters.
    :input inpl: sparse csr matrix representing the indexes of the positive
                 triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inpr: sparse csr matrix representing the indexes of the positive
                 triplet 'right' member, shape=(#examples,N [Embeddings]).
    :input inpo: sparse csr matrix representing the indexes of the positive
                 triplet relation member, shape=(#examples,N [Embeddings]).
    :input inpln: sparse csr matrix representing the indexes of the negative
                  triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inprn: sparse csr matrix representing the indexes of the negative
                  triplet 'right' member, shape=(#examples,N [Embeddings]).
    :opt input inpon: sparse csr matrix representing the indexes of the
                      negative triplet relation member, shape=(#examples,N
                      [Embeddings]).

    Theano function output.
    :output mean(cost): average cost.
    :output mean(out): ratio of examples for which the margin is violated,
                       i.e. for which an update occurs.
    """
    return theano.function(list_in, [T.mean(cost), T.mean(out)],
            updates=updates, on_unused_input='ignore')
Beispiel #35
0
def TrainFn(fnsim, embeddings, leftop, rightop, marge=1.0):
    """
    This function returns a theano function to perform a training iteration,
    contrasting couples of positive and negative triplets. members are given
    as sparse matrices. for one positive triplet there is one negative
    triplet.

    :param fnsim: similarity function (on theano variables).
    :param embeddings: an embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    :param marge: marge For the cost function.
    """
    embedding, relationl, relationr = parse_embeddings(embeddings)
    # Inputs
    inpr = S.csr_matrix()
    inpl = S.csr_matrix()
    inpo = S.csr_matrix()
    inpln = S.csr_matrix()
    inprn = S.csr_matrix()
    inpon = S.csr_matrix()
    lrparams = T.scalar('lrparams')
    lrembeddings = T.scalar('lrembeddings')

    # Graph
    ## Positive triplet
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T
    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T
    simi = fnsim(leftop(lhs, rell), rightop(rhs, relr))
    ## Negative triplet
    lhsn = S.dot(embedding.E, inpln).T
    rhsn = S.dot(embedding.E, inprn).T
    relln = S.dot(relationl.E, inpon).T
    relrn = S.dot(relationr.E, inpon).T
    simin = fnsim(leftop(lhsn, relln), rightop(rhsn, relrn))

    cost, out = margincost(simi, simin, marge)
    # Parameters gradients
    if hasattr(fnsim, 'params'):
        # If the similarity function has some parameters, we update them too.
        gradientsparams = T.grad(cost,
            leftop.params + rightop.params + fnsim.params)
        updates = OrderedDict((i, i - lrparams * j) for i, j in zip(
            leftop.params + rightop.params + fnsim.params, gradientsparams))
    else:
        gradientsparams = T.grad(cost, leftop.params + rightop.params)
        updates = OrderedDict((i, i - lrparams * j) for i, j in zip(
            leftop.params + rightop.params, gradientsparams))
    # Embeddings gradients
    gradients_embedding = T.grad(cost, embedding.E)
    newE = embedding.E - lrembeddings * gradients_embedding
    updates.update({embedding.E: newE})
    if type(embeddings) == list:
        # If there are different embeddings for the relation member.
        gradients_embedding = T.grad(cost, relationl.E)
        newE = relationl.E - lrparams * gradients_embedding
        updates.update({relationl.E: newE})
        gradients_embedding = T.grad(cost, relationr.E)
        newE = relationr.E - lrparams * gradients_embedding
        updates.update({relationr.E: newE})
    """
    Theano function inputs.
    :input lrembeddings: learning rate for the embeddings.
    :input lrparams: learning rate for the parameters.
    :input inpl: sparse csr matrix representing the indexes of the positive
                 triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inpr: sparse csr matrix representing the indexes of the positive
                 triplet 'right' member, shape=(#examples,N [Embeddings]).
    :input inpo: sparse csr matrix representing the indexes of the positive
                 triplet relation member, shape=(#examples,N [Embeddings]).
    :input inpln: sparse csr matrix representing the indexes of the negative
                  triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inprn: sparse csr matrix representing the indexes of the negative
                  triplet 'right' member, shape=(#examples,N [Embeddings]).
    :input inpon: sparse csr matrix representing the indexes of the negative
                  triplet relation member, shape=(#examples,N [Embeddings]).

    Theano function output.
    :output mean(cost): average cost.
    :output mean(out): ratio of examples for which the margin is violated,
                       i.e. for which an update occurs.
    """
    return theano.function([lrembeddings, lrparams, inpl, inpr, inpo,
                           inpln, inprn, inpon],
                           [T.mean(cost), T.mean(out)], updates=updates,
                           on_unused_input='ignore')
Beispiel #36
0
def run(jobman,debug = False):
    expstart = time.time()
    hp = jobman.state

    if not os.path.exists('files/'): os.mkdir('files/')

    # Symbolic variables
    s_posit = T.matrix()
    s_negat = T.matrix()
    idx_start = T.lscalar()
    idx_stop = T.lscalar()
    s_valid = theano.sparse.csr_matrix()



    w2i = cPickle.load(open('/mnt/scratch/bengio/bengio_group/data/gutenberg/merged_word2idx.pkl'))
    i2w = dict( (v,k) for k,v in w2i.iteritems() )
    i2w[0] = 'UNK'
    senna = [ i2w[i] for i in range(len(i2w.keys())) ]


    nsenna = len(senna)
    
    embedding = cae(i_size=nsenna, h_size=hp['embedsize'], e_act = identity)
    H = ae(i_size = hp['embedsize']*hp['wsize'], h_size=hp['hsize'], e_act = T.tanh)
    L = logistic(i_size = hp['hsize'], h_size = 1, act = identity)

    del H.params['d_bias']
    del embedding.params['d_bias']
    del embedding.params['e_bias']
    minsize = hp['minsize']
    maxsize = hp['maxsize']

    dsize = maxsize - minsize +1

    H.params['e_bias'] = theano.shared( numpy.array(numpy.zeros((dsize,hp['hsize'])),dtype=theano.config.floatX),name='e_bias')

    path = hp['loadpath']
 
    if path:
        load(embedding,path+'/embedding.pkl')
        #load(H,path+'/hidden.pkl')
        #load(L,path+'/logistic.pkl')
        hp['embedsize'] = embedding.params['e_weights'].get_value(borrow=True).shape[1]
        #hp['hsize'] = H.params['e_weights'].get_value(borrow=True).shape[1]
        jobman.save()

    H.params['e_bias'] = theano.shared( numpy.array(numpy.zeros((dsize,hp['hsize'])),dtype=theano.config.floatX),name='e_bias')
    valid_embedding = sparse.supervised.logistic(i_size=nsenna, h_size=hp['embedsize'], act = identity)
    valid_embedding.params['weights'] = sp.shared(value = scipy.sparse.csr_matrix(embedding.params['e_weights'].get_value(borrow=True)))


    lr = hp['lr']
    h_size = hp['hsize']
    bs = hp['bs']

    posit_embed = T.dot(s_posit, embedding.params['e_weights']).reshape((1,hp['embedsize']*hp['wsize']))
    negat_embed = T.dot(s_negat, embedding.params['e_weights']).reshape((hp['nneg'],hp['embedsize']*hp['wsize']))
    valid_embed = sp.dot(s_valid,valid_embedding.params['weights']).reshape((nsenna,hp['embedsize']*hp['wsize']))

    posit_embed_left = T.concatenate([posit_embed[:,idx_start*hp['embedsize']:idx_stop*hp['embedsize']],
                                  T.zeros_like(posit_embed[:,idx_stop*hp['embedsize']:]) ],axis=1)

    negat_embed_left = T.concatenate([negat_embed[:,idx_start*hp['embedsize']:idx_stop*hp['embedsize']],
                                   T.zeros_like(negat_embed[:,idx_stop*hp['embedsize']:]) ],axis=1)

    posit_embed_right = T.concatenate([ T.zeros_like(posit_embed[:,:idx_start*hp['embedsize']]),
                                  posit_embed[:,idx_start*hp['embedsize']:idx_stop*hp['embedsize']]],axis=1)

    negat_embed_right = T.concatenate([ T.zeros_like(negat_embed[:,:idx_start*hp['embedsize']]),
                                   negat_embed[:,idx_start*hp['embedsize']:idx_stop*hp['embedsize']]],axis=1)



    posit_embed = T.concatenate([ T.zeros_like(posit_embed[:,:idx_start*hp['embedsize']]),
                                  posit_embed[:,idx_start*hp['embedsize']:idx_stop*hp['embedsize']],
                                  T.zeros_like(posit_embed[:,idx_stop*hp['embedsize']:]) ],axis=1)

    negat_embed = T.concatenate([ T.zeros_like(negat_embed[:,:idx_start*hp['embedsize']]),
                                   negat_embed[:,idx_start*hp['embedsize']:idx_stop*hp['embedsize']],
                                   T.zeros_like(negat_embed[:,idx_stop*hp['embedsize']:]) ],axis=1)

    
    #posit_embed = ifelse(T.eq(idx_start, 0), posit_embed_left, posit_embed)
    #posit_embed = ifelse(T.eq(idx_stop, hp['maxsize']), posit_embed_right, posit_embed)

    #negat_embed = ifelse(T.eq(idx_start, 0), negat_embed_left, negat_embed)
    #negat_embed = ifelse(T.eq(idx_stop, hp['maxsize']), negat_embed_right, negat_embed)

    Hposit = T.tanh(T.dot(posit_embed,H.params['e_weights']) + H.params['e_bias'][idx_stop-idx_start-minsize,:])
    Hnegat = T.tanh(T.dot(negat_embed,H.params['e_weights']) + H.params['e_bias'][idx_stop-idx_start-minsize,:])
    posit_score = L.encode(Hposit)
    negat_score = L.encode(Hnegat)
    valid_score = L.encode(H.encode(valid_embed))

    C = (negat_score - posit_score.flatten() + hp['margin'])

    CC = (rect(C)).mean()

    opt = theano.function([s_posit, s_negat, idx_start, idx_stop],
                          (rect(C)).mean(),
                          updates = dict( L.update(CC,lr) + H.update(CC,lr) + embedding.update_norm(CC,lr)) )

    validfct = theano.function([s_valid],valid_score)

    def saveexp():
        save(embedding,fname+'embedding.pkl')
        save(H,fname+'hidden.pkl')
        save(L,fname+'logistic.pkl')

    delta = hp['wsize']/2
    rest = hp['wsize']%2

    freq_idx = cPickle.load(open('/mnt/scratch/bengio/bengio_group/data/gutenberg/sorted_vocab.pkl'))[:2000]
    fname = ''
    validsentence = []# cPickle.load(open('/scratch/rifaisal/data/wiki_april_2010/valid_debug.pkl'))
    tseenwords = not debug
    for e in range(hp['epoch']):
        hp['split'] = numpy.random.randint(45)
        sentences = cPickle.load(open('/mnt/scratch/bengio/bengio_group/data/gutenberg/ints_50000/split'+str(hp['split'])+'.pkl'))
        nsent = len(sentences)
        bigc = []
        bigr = []

        seen_words = 0
        for i,s in enumerate(sentences):
            nword = len(s)
            seen_words += nword
            tseenwords += nword

            if nword < hp['maxsize'] + 2:
                continue
            rndsize = numpy.random.randint(low=hp['minsize']+1,high=hp['maxsize']-1)
            idxsta = numpy.random.randint(low=1, high=hp['maxsize']-rndsize)
            idxsto = idxsta+rndsize

            print 'r',rndsize,'b',idxsta,'e',idxsto,'shape',H.params['e_bias'].get_value().shape

            c =[]
            r =[]
            if debug:
                print ' *** Processing document',i,'with',nword,
                sys.stdout.flush()
            for j in range(delta,nword-delta):
                nd = rndsize/2
                rd = rndsize%2
                pchunk = s[j-delta:j+delta+rest]
                nchunk = []
                
                rndidx = numpy.random.randint(nsenna, size = (hp['nneg'],))
                nchunk = []
                for kk in range(hp['nneg']):
                    tmpchunk = copy.copy(pchunk)
                    tmpchunk[idxsta+nd] = rndidx[kk]
                    nchunk += tmpchunk
                assert len(nchunk) == len(pchunk)*hp['nneg']
                p, n  = (idx2mat(pchunk,nsenna), idx2mat(nchunk,nsenna))
                l = opt(p,n, idxsta, idxsto)
                c.append(l)

                if debug:
                    print '.',
                    break


            if debug:
                print ''

            bigc += [numpy.array(c).sum()]

            if 0:#(time.time() - expstart) > ( 3600 * 24 * 6 + 3600*20) or (tseenwords)>(10*hp['freq']):
                tseenwords = 0
                valid_embedding.params['weights'] = sp.shared(value = scipy.sparse.csr_matrix(embedding.params['e_weights'].get_value(borrow=True)))
                mrk = evaluation.error(validsentence, validfct, nsenna, hp['wsize'])
                hp['mrk'] = mrk
                jobman.save()
                saveexp()
                print 'Random Valid Mean rank',mrk


            if seen_words > hp['freq'] or debug:
                seen_words = 0
                hp['score'] = numpy.array(bigc).mean() 
                hp['e'] = e
                hp['i'] = i
                print ''
                print e,i,'NN Score:', hp['score']

                if not debug:
                    ne = knn(freq_idx,embedding.params['e_weights'].get_value(borrow=True))
                    open('files/'+fname+'nearest.txt','w').write(display(ne,senna))
                    saveexp()
                sys.stdout.flush()
                jobman.save()
                
    saveexp()
    def fprop(self, state_below, add_noise=True):
        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)
        
        self.x = state_below
        
        # linear part
        if isinstance(self.x, S.SparseVariable):
            self.z = S.dot(self.x,self.W[0]) + self.b[0]
        else:
            self.z = T.dot(self.x,self.W[0]) + self.b[0]
        
        # first layer non-linear part
        if isinstance(self.x, S.SparseVariable):
            h = S.dot(self.x,self.W[1]) + self.b[1]
        else:
            h = T.dot(self.x,self.W[1]) + self.b[1]
        
        # activate hidden units of non-linear part
        if self.hidden_activation is None:
            pass
        elif self.hidden_activation == 'tanh':
            self.h = T.tanh(h)
        elif self.hidden_activation == 'sigmoid':
            self.h = T.nnet.sigmoid(h)
        elif self.hidden_activation == 'softmax':
            self.h = T.nnet.softmax(h)
        elif self.hidden_activation == 'rectifiedlinear':
            self.h = T.maximum(0, h)
        else:
            raise NotImplementedError()
        
        noise = 0
        if self.noise_beta is not None:
            noise = (1.-self.noise_normality) * self.beta_mean
            #print self.noise_normality
            print (1.-self.noise_normality) * self.noise_scale * (self.sparsity_target - 0.5)
        print noise
        
        if add_noise:
            rng = MRG_RandomStreams(self.mlp.rng.randint(2**15))
            if self.noise_beta is not None:
                noise = (1.-self.noise_normality) * self.beta_dist[ \
                        self.beta_idx:self.beta_idx+self.x.shape[0],:] \
                        + (self.noise_normality * self.noise_scale \
                                * rng.normal(size = self.z.shape, 
                                        std=self.noise_stdev ,
                                        dtype=self.z.type.dtype) \
                            )
            else:
                noise = self.noise_scale \
                    * rng.normal(size = self.z.shape, 
                                        std=self.noise_stdev ,
                                        dtype=self.z.type.dtype)
                
        #print self.beta_dist.get_value().shape
            
        # second layer non-linear part
        self.a = T.dot(self.h,self.W[2]) + self.b[2] + noise
        
        # activate non-linear part to get bernouilli probabilities
        self.m_mean = T.nnet.sigmoid(self.a)
           
        # mix output of linear part with output of non-linear part
        self.p = self.m_mean * self.z
        
        if self.layer_name is not None:
            self.z.name = self.layer_name + '_z'
            self.h.name = self.layer_name + '_h'
            self.a.name = self.layer_name + '_a'
            self.m_mean.name = self.layer_name + '_m_mean'
            self.p.name = self.layer_name + '_p'
        
        return self.p
Beispiel #38
0
def theano_safe_sparse_dot(X, Y):
    if _tn_is_sparse(X) or _tn_is_sparse(Y):
        return tsp.dot(X, Y)
    else:
        return T.dot(X, Y)
    def __init__(self,
                 feature_count,
                 classifier=False,
                 k=8,
                 stdev=0.1,
                 sparse=False):
        self.classifier = classifier
        d = feature_count

        # *** Symbolic variables ***
        if sparse:
            X = S.csr_matrix(name='inputs', dtype='float32')
        else:
            X = T.matrix()
        y = T.vector()
        beta_w1 = T.scalar()
        beta_v = T.scalar()

        # *** Model parameters ***
        # bias term (intercept)
        w0_init = np.zeros(1)
        self.w0 = theano.shared(w0_init, allow_downcast=True)
        # first order coefficients
        w1_init = np.zeros(d)
        self.w1 = theano.shared(w1_init, allow_downcast=True)
        # interaction factors
        v_init = stdev * np.random.randn(k, d)
        self.v = theano.shared(v_init, allow_downcast=True)

        # *** The Model ***
        # The formula for pairwise interactions is from the bottom left
        # of page 997 of Rendle 2010, "Factorization Machines."
        # This version scales linearly in k and d, as opposed to O(d^2).
        if sparse:
            interactions = 0.5 * T.sum((S.dot(X, T.transpose(self.v)) ** 2) - \
                                       S.dot(S.mul(X,X), T.transpose(self.v ** 2)), axis=1)
            y_hat = T.addbroadcast(self.w0, 0) + S.dot(X,
                                                       self.w1) + interactions
        else:
            interactions = 0.5 * T.sum((T.dot(X, T.transpose(self.v)) ** 2) - \
                                       T.dot(X ** 2, T.transpose(self.v ** 2)), axis=1)
            y_hat = T.addbroadcast(self.w0, 0) + T.dot(X,
                                                       self.w1) + interactions
        if self.classifier:
            y_hat = T.nnet.sigmoid(y_hat)

        # *** Loss Function ***
        if self.classifier:
            error = T.mean(T.nnet.binary_crossentropy(y_hat, y))
        else:
            error = T.mean((y - y_hat)**2)
        # regularization
        L2 = beta_w1 * T.mean(self.w1**2) + beta_v * T.mean(self.v**2)
        loss = error + L2

        # *** Learning ***
        updates = []
        params = [self.w0, self.w1, self.v]
        grads = T.grad(cost=loss, wrt=params)
        # RMSProp
        lr, rho, epsilon = 0.001, 0.9, 1e-6
        for p, g in zip(params, grads):
            acc = theano.shared(p.get_value() * 0.)
            acc_new = rho * acc + (1 - rho) * g**2
            gradient_scaling = T.sqrt(acc_new + epsilon)
            g = g / gradient_scaling
            updates.append((acc, acc_new))
            updates.append((p, p - lr * g))

        self.theano_train = theano.function(inputs=[X, y, beta_w1, beta_v],
                                            outputs=loss,
                                            updates=updates,
                                            allow_input_downcast=True)

        self.theano_cost = theano.function(inputs=[X, y, beta_w1, beta_v],
                                           outputs=loss,
                                           allow_input_downcast=True)

        # *** Prediction ***
        self.theano_predict = theano.function(inputs=[X],
                                              outputs=y_hat,
                                              allow_input_downcast=True)
Beispiel #40
0
 def get_hidden_values(self, inp):
     """ Computes the values of the hidden layer """
     return T.nnet.sigmoid(sparse.dot(inp, self.W) + self.b)
    def fprop(self, state_below, add_noise=True, threshold=None, stochastic=True):
        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)
        
        self.x = state_below
        
        # linear part
        if isinstance(self.x, S.SparseVariable):
            self.z = S.dot(self.x,self.W[0]) + self.b[0]
        else:
            self.z = T.dot(self.x,self.W[0]) + self.b[0]


        self.stopper = self.x * T.ones_like(self.x)
        # first layer non-linear part
        if isinstance(self.stopper, S.SparseVariable):
            h = S.dot(self.stopper,self.W[1]) + self.b[1]
        else:
            h = T.dot(self.stopper,self.W[1]) + self.b[1]
        
        # activate hidden units of non-linear part
        if self.hidden_activation is None:
            pass
        elif self.hidden_activation == 'tanh':
            self.h = T.tanh(h)
        elif self.hidden_activation == 'sigmoid':
            self.h = T.nnet.sigmoid(h)
        elif self.hidden_activation == 'softmax':
            self.h = T.nnet.softmax(h)
        elif self.hidden_activation == 'rectifiedlinear':
            self.h = T.maximum(0, h)
        else:
            raise NotImplementedError()
        
        
        rng = MRG_RandomStreams(self.mlp.rng.randint(2**15))
        noise = 0
        if self.noise_beta is not None:
            noise = (1.-self.noise_normality) * self.beta_mean
        print noise
        
        if add_noise:
            if self.noise_beta is not None:
                noise = (1.-self.noise_normality) * self.beta_dist[ \
                        self.beta_idx:self.beta_idx+self.x.shape[0],:] \
                        + (self.noise_normality * self.noise_scale \
                                * rng.normal(size = self.z.shape, 
                                        std=self.noise_stdev ,
                                        dtype=self.z.type.dtype) \
                            )
            else:
                noise = self.noise_scale \
                    * rng.normal(size = self.z.shape, 
                                        std=self.noise_stdev ,
                                        dtype=self.z.type.dtype)
        
        
        # second layer non-linear part
        self.a = T.dot(self.h,self.W[2]) + self.b[2] + noise
        
        # activate non-linear part to get bernouilli probabilities
        self.m_mean = T.nnet.sigmoid(self.a)
        
        # Separate stochastic from deterministic part:
        self.stoch_m_mean = self.m_mean**self.stochastic_ratio
        self.deter_m_mean = self.m_mean**(1.-self.stochastic_ratio)
        
        if threshold is None:
            if stochastic:
                # sample from bernouili probs to generate a mask
                self.m = rng.binomial(size = self.stoch_m_mean.shape, n = 1 , \
                    p = self.m_mean, dtype=self.stoch_m_mean.type.dtype)
            else:
                self.m = self.m_mean
        else:
            # deterministic mask:
            self.m = T.cast(T.gt(self.stoch_m_mean, threshold), \
                                           theano.config.floatX)
                                        
        self.consider_constant = [self.m, self.stopper]
           
        # mix output of linear part with output of non-linear part
        self.p = self.m * self.deter_m_mean * self.z
        
        if self.layer_name is not None:
            self.z.name = self.layer_name + '_z'
            self.h.name = self.layer_name + '_h'
            self.a.name = self.layer_name + '_a'
            self.m_mean.name = self.layer_name + '_m_mean'
            self.stoch_m_mean.name = self.layer_name + '_stoch_m_mean'
            self.deter_m_mean.name = self.layer_name + '_deter_m_mean'
            self.p.name = self.layer_name + '_p'
        
        return self.p
    def fprop(self, state_below, threshold=None, stochastic=True):
        self.input_space.validate(state_below)

        if self.requires_reformat:
            if not isinstance(state_below, tuple):
                for sb in get_debug_values(state_below):
                    if sb.shape[0] != self.dbm.batch_size:
                        raise ValueError("self.dbm.batch_size is %d but got shape of %d" % (self.dbm.batch_size, sb.shape[0]))
                    assert reduce(lambda x,y: x * y, sb.shape[1:]) == self.input_dim

            state_below = self.input_space.format_as(state_below, self.desired_space)
        
        self.x = state_below
        
        # experts part
        if isinstance(self.x, S.SparseVariable):
            z = S.dot(self.x,self.W[0]) + self.b[0]
        else:
            z = T.dot(self.x,self.W[0]) + self.b[0]
 
        # activate hidden units of gater part
        if self.expert_activation is None:
            self.z = z
        elif self.hidden_activation == 'tanh':
            self.z = T.tanh(z)
        elif self.expert_activation == 'sigmoid':
            self.z = T.nnet.sigmoid(z)
        elif self.expert_activation == 'softmax':
            self.z = T.nnet.softmax(z)
        elif self.expert_activation == 'rectifiedlinear':
            self.z = T.maximum(0, z)
        else:
            raise NotImplementedError()
        
        # first layer of gater
        if isinstance(self.x, S.SparseVariable):
            h = S.dot(self.x,self.W[1]) + self.b[1]
        else:
            h = T.dot(self.x,self.W[1]) + self.b[1]
        
        # activate hidden units of gater
        if self.hidden_activation is None:
            self.h = h
        elif self.hidden_activation == 'tanh':
            self.h = T.tanh(h)
        elif self.hidden_activation == 'sigmoid':
            self.h = T.nnet.sigmoid(h)
        elif self.hidden_activation == 'softmax':
            self.h = T.nnet.softmax(h)
        elif self.hidden_activation == 'rectifiedlinear':
            self.h = T.maximum(0, h)
        else:
            raise NotImplementedError()
        
        # second layer gater
        self.a = T.dot(self.h,self.W[2]) + self.b[2]
        
        # activate gater output to get bernouilli probabilities
        self.m_mean = T.nnet.sigmoid(self.a)
        
        if threshold is None:
            if stochastic:
                # sample from bernouili probs to generate a mask
                rng = MRG_RandomStreams(self.mlp.rng.randint(2**15))
                self.m = rng.binomial(size = self.m_mean.shape, n = 1, 
                        p = self.m_mean, dtype=self.m_mean.type.dtype)
            else:
                self.m = self.m_mean
        else:
            # deterministic mask:
            self.m = T.cast(T.gt(self.m_mean, threshold), \
                                        theano.config.floatX)
           
        self.m2 = T.dot(self.m, self.groups) 
        # mask expert output with samples from gater
        self.p = self.m2 * self.z
        
        if self.layer_name is not None:
            self.z.name = self.layer_name + '_z'
            self.h.name = self.layer_name + '_h'
            self.a.name = self.layer_name + '_a'
            self.m_mean.name = self.layer_name + '_m_mean'
            self.m.name = self.layer_name + '_m'
            self.p.name = self.layer_name + '_p'
        
        return self.p
Beispiel #43
0
    mu_u = np.zeros(N)
    mu_u[0] = 1.0
    cov_u = 1e-2 * np.eye(N)
    cov_u[0, 0] = 1e-10
    u = pm.MvNormal("u", mu_u, cov_u, shape=(N, ))
    u = tt.reshape(u, (N, 1))

    # The spectral basis
    mu_vT = np.ones(K)
    cov_vT = 1e-2 * np.eye(K)
    vT = pm.MvNormal("vT", mu_vT, cov_vT, shape=(K, ))
    vT = tt.reshape(vT, (1, K))

    # Compute the model
    uvT = tt.reshape(tt.dot(u, vT), (N * K, 1))
    f_model = tt.reshape(ts.dot(D, uvT), (M * Kobs, ))

    # Track some values for plotting later
    pm.Deterministic("f_model", f_model)

    # Save our initial guess
    f_model_guess = xo.eval_in_model(f_model)

    # The likelihood function assuming known Gaussian uncertainty
    pm.Normal("obs", mu=f_model, sd=ferr, observed=f)

# Maximum likelihood solution
with model:
    map_soln = xo.optimize()

# Plot some stuff
Beispiel #44
0
def sparse_slice_rows(H, idx):
    '''Returns a dense slice H[idx, :]'''

    vecs = to_one_hot(idx, H.shape[0], dtype=H.dtype)

    return ts.dot(vecs, H)
Beispiel #45
0
    def init_functions(self):
        '''Construct functions for the model'''

        # Construct the objective function

        #   Input variables
        u_i, y_s, y_t = T.ivectors(['u_i', 'y_s', 'y_t'])

        dropout = T.fscalar(name='p')

        #   Intermediate variables: n_examples * n_songs
        item_scores = T.dot(self._U[u_i], self._V.T) + self._b

        # subtract off the row-wise max for numerical stability
        item_scores = item_scores - item_scores.max(axis=1, keepdims=True)

        e_scores = T.exp(item_scores)

        if T.gt(dropout, 0.0):
            # Construct a random dropout mask
            retain_prob = 1.0 - dropout
            M = self._rng.binomial(e_scores.shape,
                                   p=retain_prob,
                                   dtype=theano.config.floatX)

            # Importance weight so that E[M[i,j]] = 1
            M /= retain_prob

            # The positive examples should always be sampled
            M = theano.tensor.set_subtensor(M[T.arange(y_t.shape[0]), y_t],
                                            1.0)

            e_scores = e_scores * M

        #   Edge feasibilities: n_examples * n_edges
        prev_feas = sparse_slice_rows(self.H, y_s)
        #   Detect and reset initial-state transitions
        prev_feas = theano.tensor.set_subtensor(prev_feas[y_s < 0, :], 1)

        #   Raw edge probabilities: n_examples * n_edges
        edge_given_prev = T.nnet.softmax(prev_feas * self._w)

        #   Compute edge normalization factors: n_examples * n_edges
        #     sum of score mass in each edge for each user
        edge_norms = ts.dot(e_scores, self.H)

        #   Slice the edge weights according to incoming feasibilities: n_examples
        next_weight = e_scores[T.arange(y_t.shape[0]), y_t]

        #   Marginalize: n_examples * n_edges
        next_feas = sparse_slice_rows(self.H, y_t)

        probs = next_weight * T.sum(next_feas * (edge_given_prev / (_EPS + edge_norms)),
                                    axis=1,
                                    keepdims=True)

        # Data likelihood term
        ll = T.log(probs)
        avg_ll = ll.mean()

        # Priors
        w_prior = -0.5 * self.edge_reg * (self._w**2).sum()
        b_prior = -0.5 * self.bias_reg * (self._b**2).sum()
        u_prior = -0.5 * self.user_reg * (self._U**2).sum()
        v_prior = -0.5 * self.song_reg * (self._V**2).sum()

        # negative log-MAP objective
        cost = -1.0 * (avg_ll + u_prior + v_prior + b_prior + w_prior)

        # Construct the updates
        variables = []
        if 'e' in self.params:
            variables.append(self._w)
        if 'b' in self.params:
            variables.append(self._b)
        if 'u' in self.params:
            variables.append(self._U)
        if 's' in self.params:
            variables.append(self._V)

        updates = lasagne.updates.adagrad(cost, variables)

        self._train = theano.function(inputs=[u_i, y_s, y_t, dropout],
                                      outputs=[avg_ll, cost],
                                      updates=updates)

        self._loglikelihood = theano.function(inputs=[u_i, y_s, y_t,
                                                      theano.Param(dropout,
                                                                   default=0.0,
                                                                   name='p')],
                                              outputs=[ll])
Beispiel #46
0
    def solve(self, u=None, vT=None, b=None, u_guess=None, 
              vT_guess=None, b_guess=None, u_mu=0.0, u_sig=0.01, 
              vT_mu=1.0, vT_sig=0.3, vT_rho=3.e-5, b_mu=1.0, 
              b_sig=0.1, niter_adam=100, niter_linear=100, 
              temp=1.e3, **kwargs):
        """
        
        """
        if not self._loaded:
            raise RuntimeError("Please load or generate a dataset first.")

        # Data covariance
        self.F_CInv = np.ones_like(self.F) / self.ferr ** 2
        self.F_lndet = np.sum(np.log(2 * np.pi * self.F_CInv.reshape(-1)))

        # Prior on `u`
        self.u_cinv = np.ones(self.N - 1) / u_sig ** 2
        self.u_mu = np.ones(self.N - 1) * u_mu
        self.u_lndet = np.sum(np.log(2 * np.pi * self.u_cinv))

        # Gaussian process prior on `vT`
        self.vT_mu = (np.ones(self.Kp) * vT_mu).reshape(1, -1)
        if vT_rho > 0.0:
            kernel = celerite.terms.Matern32Term(np.log(vT_sig), np.log(vT_rho))
            gp = celerite.GP(kernel)
            vT_C = gp.get_matrix(self.lam_padded)
            cho_C = cho_factor(vT_C)
            self.vT_CInv = cho_solve(cho_C, np.eye(self.Kp))
            self.vT_CInvmu = cho_solve(cho_C, self.vT_mu.reshape(-1))
            self.vT_lndet = -2 * np.sum(np.log(2 * np.pi * np.diag(cho_C[0])))
        else:
            self.vT_CInv = np.ones(self.Kp) / vT_sig ** 2
            self.vT_CInvmu = (self.vT_CInv * self.vT_mu)
            self.vT_lndet = np.sum(np.log(2 * np.pi * self.vT_CInv))
            self.vT_CInv = np.diag(self.vT_CInv)

        # Prior on `b`
        self.b_cinv = np.ones(self.M) / b_sig ** 2
        self.b_mu = np.ones(self.M) * b_mu
        self.b_lndet = self.M * np.log(2 * np.pi / b_sig ** 2)

        # Simple linear solves
        if (u is not None) and (vT is not None):
            self.u = u
            self.vT = vT
            self._compute_b()
        elif (u is not None) and (b is not None):
            self.u = u
            self.b = b
            self._compute_vT()
        elif (vT is not None) and (b is not None):
            self.b = b
            self.vT = vT
            self._compute_u()
        
        # Non-linear
        else:

            # Get our guesses going
            if u is not None:
                self.u = u
                var_names = ["vT", "b"]
                if vT_guess is None and b_guess is None:
                    self.b = np.ones(self.M)
                    self._compute_vT(T=temp)
                elif vT_guess is not None:
                    self.vT = vT_guess
                    self._compute_b(T=temp)
                elif b_guess is not None:
                    self.b = b_guess
                    self._compute_vT(T=temp)
                else: raise ValueError("Unexpected branch!")
            elif vT is not None:
                self.vT = vT
                var_names = ["u", "b"]
                if u_guess is None and b_guess is None:
                    self.b = np.ones(self.M)
                    self._compute_u(T=temp)
                elif u_guess is not None:
                    self.u = u_guess
                    self._compute_b(T=temp)
                elif b_guess is not None:
                    self.b = b_guess
                    self._compute_u(T=temp)
                else: raise ValueError("Unexpected branch!")
            elif b is not None:
                self.b = b
                var_names = ["u", "vT"]
                if u_guess is None and vT_guess is None:
                    self.u = self.u_mu + u_sig * np.random.randn(self.N - 1)
                    self._compute_vT(T=temp)
                elif u_guess is not None:
                    self.u = u_guess
                    self._compute_vT(T=temp)
                elif vT_guess is not None:
                    self.vT = vT_guess
                    self._compute_u(T=temp)
                else: raise ValueError("")
            else:
                var_names = ["u", "vT", "b"]
                if vT_guess is None and b_guess is None and u_guess is None:
                    self.b = np.ones(self.M)
                    self.u = self.u_mu + u_sig * np.random.randn(self.N - 1)
                    self._compute_vT(T=temp)
                elif u_guess is not None:
                    self.u = u_guess
                    if vT_guess is None and b_guess is None:
                        self.b = np.ones(self.M)
                        self._compute_vT(T=temp)
                    elif vT_guess is not None:
                        self.vT = vT_guess
                        self._compute_b(T=temp)
                    elif b_guess is not None:
                        self.b = b_guess
                        self._compute_vT(T=temp)
                    else: raise ValueError("Unexpected branch!")
                elif vT_guess is not None:
                    self.vT = vT_guess
                    if b_guess is None:
                        self.b = np.ones(self.M)
                        self._compute_u(T=temp)
                    else:
                        self.b = b_guess
                        self._compute_u(T=temp)
                elif b_guess is not None:
                    self.b = b_guess
                    self.u = self.u_mu + u_sig * np.random.randn(self.N - 1)
                    self._compute_vT(T=temp)
                else: raise ValueError("Unexpected branch!")

            # Initialize the variables to the guesses
            vars = []
            if "u" in var_names:
                u = theano.shared(self.u)
                vars += [u]
            else:
                u = tt.as_tensor_variable(self.u)
            if "vT" in var_names:
                vT = theano.shared(self.vT)
                vars += [vT]
            else:
                vT = tt.as_tensor_variable(self.vT)

            # Compute the model
            D = ts.as_sparse_variable(self.D)
            a = tt.reshape(tt.dot(tt.reshape(
                                  tt.concatenate([[1.0], u]), (-1, 1)), 
                                  tt.reshape(vT, (1, -1))), (-1,))
            self.map[1:, :] = u
            b = self.map.flux(theta=self.theta)
            B = tt.reshape(b, (-1, 1))
            M = tt.reshape(ts.dot(D, a), (self.M, -1)) / B

            # Compute the likelihood
            r = tt.reshape(self.F - M, (-1,))
            cov = tt.reshape(self.F_CInv, (-1,))
            lnlike = -0.5 * (tt.sum(r ** 2 * cov) + self.F_lndet)

            # Compute the prior
            lnprior = -0.5 * (tt.sum((u - self.u_mu) ** 2 * self.u_cinv) + self.u_lndet)
            lnprior += -0.5 * (tt.dot(tt.dot(tt.reshape((vT - self.vT_mu), (1, -1)), self.vT_CInv), tt.reshape((vT - self.vT_mu), (-1, 1)))[0, 0] + self.vT_lndet)

            # The full loss
            loss = -(lnlike + lnprior)
            best_loss = loss.eval()
            best_u = u.eval()
            best_vT = vT.eval()
            best_b = b.eval()
            lnlike_val = np.zeros(niter_adam + 1)
            lnprior_val = np.zeros(niter_adam + 1)
            lnlike_val[0] = lnlike.eval()
            lnprior_val[0] = lnprior.eval()

            # Optimize
            upd = Adam(loss, vars, **kwargs)
            train = theano.function([], 
                [u, vT, b, loss, lnlike, lnprior], updates=upd)
            for n in tqdm(1 + np.arange(niter_adam)):
                u_val, vT_val, b_val, loss_val, lnlike_val[n], lnprior_val[n] = train()
                if (loss_val < best_loss):
                    best_loss = loss_val
                    best_u = u_val
                    best_vT = vT_val
                    best_b = b_val

            # We're done!
            self.u = best_u
            self.vT = best_vT
            self.b = best_b
            self.lnlike = lnlike_val
            self.lnprior = lnprior_val

        self._solved = True
Beispiel #47
0
def RankRelFn(fnsim, embeddings, leftop, rightop,
              subtensorspec=None, adding=False):
    """
    This function returns a Theano function to measure the similarity score of
    all relation entities given couples of 'right' and 'left' entities (as
    sparse matrices).

    :param fnsim: similarity function (on Theano variables).
    :param embeddings: an Embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    :param subtensorspec: only measure the similarity score for the entities
                          corresponding to the first subtensorspec (int)
                          entities of the embedding matrix (default None: all
                          entities)
    :param adding: if the right member is composed of several entities the
                   function needs to more inputs: we have to add the embedding
                   value of the other entities (with the appropriate scaling
                   factor to perform the mean pooling).
    """
    embedding, relationl, relationr = parse_embeddings(embeddings)

    # Inputs
    inpr = S.csr_matrix('inpr')
    inpl = S.csr_matrix('inpl')
    if adding:
        inpoadd = S.csr_matrix('inpoadd')
        scal = T.scalar('scal')
    # Graph
    if subtensorspec is None:
        rell = relationl.E
        relr = relationr.E
    else:
        # We compute the score only for a subset of entities
        rell = relationl.E[:, :subtensorspec].T
        relr = relationr.E[:, :subtensorspec].T
    if adding:
        # Add the embeddings of the other entities (mean pooling)
        rell = rell * scal + (S.dot(relationl.E, inpoadd).T).reshape(
                (1, embedding.D))
        relr = relr * scal + (S.dot(relationr.E, inpoadd).T).reshape(
                (1, embedding.D))
    lhs = (S.dot(embedding.E, inpl).T).reshape((1, embedding.D))
    rhs = (S.dot(embedding.E, inpr).T).reshape((1, embedding.D))
    # hack to prevent a broadcast problem with the Bilinear layer
    if hasattr(leftop, 'forwardrankrel'):
        tmpleft = leftop.forwardrankrel(lhs, rell)
    else:
        tmpleft = leftop(lhs, rell)
    if hasattr(rightop, 'forwardrankrel'):
        tmpright = rightop.forwardrankrel(rhs, relr)
    else:
        tmpright = rightop(lhs, rell)
    simi = fnsim(tmpleft, tmpright)
    """
    Theano function inputs.
    :input inpl: sparse csr matrix representing the indexes of the 'left'
                 entities, shape=(#examples,N [Embeddings]).
    :input inpr: sparse csr matrix representing the indexes of the 'right'
                 entities, shape=(#examples,N [Embeddings]).
    :opt input inpoadd: sparse csr matrix representing the indexes of the
                        other entities of the relation member with the
                        appropriate scaling factor, shape = (#examples, N
                        [Embeddings]).
    :opt input scal: scaling factor to perform the mean: 1 / [#entities in the
                     member].

    Theano function output.
    :output simi: matrix of score values.
    """
    if not adding:
        return theano.function([inpl, inpr], [simi], on_unused_input='ignore')
    else:
        return theano.function([inpl, inpr, inpoadd, scal], [simi],
                on_unused_input='ignore')
Beispiel #48
0
def run(jobman, debug=False):
    expstart = time.time()
    hp = jobman.state

    if not os.path.exists('files/'): os.mkdir('files/')

    # Symbolic variables
    s_bow = T.matrix()
    s_idx = T.iscalar()
    s_tf = T.scalar()
    s_posit = T.matrix()  #theano.sparse.csr_matrix()
    s_negat = T.matrix()  #theano.sparse.csr_matrix()

    sentences = cPickle.load(
        open('/scratch/rifaisal/data/guten/guten_subset_idx.pkl'))

    senna = cPickle.load(open('/scratch/rifaisal/data/guten/senna.pkl'))
    gsubset = cPickle.load(
        open('/scratch/rifaisal/data/guten/guten_vocab_subset.pkl')).flatten(
        ).tolist()
    hashtab = dict(zip(gsubset, range(len(gsubset))))

    tfidf_data = numpy.load('/scratch/rifaisal/data/guten/guten_tfidf.npy'
                            ).item().tocsr().astype('float32')

    #tfidf = cPickle.load(open('/scratch/rifaisal/repos/senna/gutentokenizer.pkl'))

    senna = numpy.array(senna)[gsubset].tolist()
    s_valid = theano.sparse.csr_matrix()

    validsentence = sentences[10000:10010]

    nsent = len(sentences)
    nsenna = len(senna)

    # Layers

    embedding = cae(i_size=nsenna, h_size=hp['embedsize'], e_act=identity)

    H = ae(i_size=hp['embedsize'] * hp['wsize'],
           h_size=hp['hsize'],
           e_act=T.tanh)
    L = logistic(i_size=hp['hsize'], h_size=1, act=identity)
    S = logistic(i_size=hp['embedsize'], h_size=nsenna, act=T.nnet.softmax)

    valid_embedding = sparse.supervised.logistic(i_size=nsenna,
                                                 h_size=hp['embedsize'],
                                                 act=identity)
    valid_embedding.params['weights'] = sp.shared(
        value=scipy.sparse.csr_matrix(embedding.params['e_weights'].get_value(
            borrow=True)))
    valid_embedding.params['bias'] = embedding.params['e_bias']

    lr = hp['lr']
    h_size = hp['hsize']
    bs = hp['bs']

    posit_embed = T.dot(s_posit, embedding.params['e_weights']).reshape(
        (1, hp['embedsize'] * hp['wsize']))
    negat_embed = T.dot(s_negat, embedding.params['e_weights']).reshape(
        (hp['nneg'], hp['embedsize'] * hp['wsize']))
    valid_embed = sp.dot(s_valid, valid_embedding.params['weights']).reshape(
        (nsenna, hp['embedsize'] * hp['wsize']))

    posit_score = L.encode(H.encode(posit_embed))
    negat_score = L.encode(H.encode(negat_embed))
    valid_score = L.encode(H.encode(valid_embed))

    C = (negat_score - posit_score.flatten() + hp['margin'])

    s_bow_pred = S.encode(embedding.encode(s_bow))

    pred = s_tf * nllsoft(s_bow_pred, s_idx)

    CC = (rect(C)).mean() + hp['lambda'] * pred

    opt = theano.function(
        [s_posit, s_negat, s_bow, s_idx, s_tf], [(rect(C)).mean(), pred],
        updates=dict(
            S.update(CC, lr) + L.update(CC, lr) + H.update(CC, lr) +
            embedding.update_norm(CC, lr)))

    #validfct = theano.function([s_valid],valid_score)

    def saveexp():
        save(embedding, fname + 'embedding.pkl')
        save(H, fname + 'hidden.pkl')
        save(L, fname + 'logistic.pkl')

    delta = hp['wsize'] / 2
    rest = hp['wsize'] % 2

    freq_idx = cPickle.load(
        open('/scratch/rifaisal/data/guten/gutten_sorted_vocab.pkl'))[:1000]
    freq_idx = [hashtab[idx] for idx in freq_idx]

    fname = ''

    for e in range(hp['epoch']):
        c = []
        r = []
        count = 1
        for i in range(nsent):
            rsent = numpy.random.randint(nsent - 1)
            nword = len(sentences[rsent])
            if nword < hp['wsize'] + 2:
                continue

            pidx = numpy.random.randint(low=delta, high=nword - delta)
            pchunk = sentences[rsent][pidx - delta:pidx + delta + rest]
            nchunk = []
            st = sentences[rsent][pidx - delta:pidx]
            en = sentences[rsent][pidx + 1:pidx + delta + rest]
            rndidx = numpy.random.randint(nsenna, size=(hp['nneg'], ))
            nchunk = []
            for j in range(hp['nneg']):
                nchunk += en + [rndidx[j]] + st

            assert len(nchunk) == len(pchunk) * hp['nneg']
            tfidf_chunk = tfidf_data[rsent:rsent + 1].toarray()
            #pdb.set_trace()
            tfidf_value = tfidf_chunk[0, sentences[rsent][pidx]]
            tfidf_chunk[0, sentences[rsent][pidx]] = 0.
            tfidx = sentences[rsent][
                pidx]  # numpy.zeros(tfidf_chunk.shape).astype('float32')
            #tfidx[0,sentences[rsent][pidx]] = 1.
            p, n, b, iidx, tfval = (idx2mat(pchunk,
                                            nsenna), idx2mat(nchunk, nsenna),
                                    tfidf_chunk, tfidx, tfidf_value)
            count += tfval != 0
            l, g = opt(p, n, b, iidx, tfval)
            c = c
            c.append(l)
            r.append(g)
            """
            if (time.time() - expstart) > ( 3600 * 24 * 6 + 3600*20) or (i+1)%(20*hp['freq']) == 0 and debug==False:
                valid_embedding.params['weights'] = sp.shared(value = scipy.sparse.csr_matrix(embedding.params['e_weights'].get_value(borrow=True)))
                mrk = evaluation.error(validsentence, validfct, nsenna, hp['wsize'])
                hp['mrk'] = mrk
                jobman.save()
                saveexp()
                print 'Random Valid Mean rank',mrk
            """

            if (i + 1) % hp['freq'] == 0 or debug:
                hp['score'] = numpy.array(c).sum() / (numpy.array(c) > 0).sum()
                hp['pred'] = numpy.array(r).sum() / float(count)
                hp['e'] = e
                hp['i'] = i
                print ''
                print e, i, 'NN Score:', hp['score'], 'Reconstruction:', hp[
                    'pred']

                if debug != True:
                    ne = knn(
                        freq_idx,
                        embedding.params['e_weights'].get_value(borrow=True))
                    open('files/' + fname + 'nearest.txt',
                         'w').write(display(ne, senna))
                    saveexp()
                sys.stdout.flush()
                jobman.save()

    saveexp()
Beispiel #49
0
def plot_results(
    doppler,
    loss=[],
    cho_y1=None,
    cho_s=None,
    name="vogtstar",
    nframes=None,
    render_movies=False,
    open_plots=False,
    overlap=2.0,
    res=300,
):
    """
    Plot the results of the Doppler imaging problem for the SPOT star.

    """
    # Get the values we'll need for plotting
    ydeg = doppler.ydeg
    udeg = doppler._udeg
    u = doppler.u
    theta = doppler.theta
    y1_true = doppler.y1_true
    s_true = doppler.s_true
    s_deconv = doppler.s_deconv
    baseline_true = doppler.baseline_true
    y1 = np.array(doppler.y1)
    s = np.array(doppler.s)
    baseline = doppler.baseline().reshape(-1)
    model = doppler.model()
    F = doppler.F
    lnlam = doppler.lnlam
    lnlam_padded = doppler.lnlam_padded
    M = doppler.M
    inc = doppler.inc

    # List of figure files we're generating
    files = []

    # Plot the baseline
    # HACK: Append the first measurement to the last to get
    # a plot going from -180 to 180 (endpoints included)
    theta_ = np.append(theta, [180.0])
    baseline_true_ = np.append(baseline_true, [baseline_true[0]])
    baseline_ = np.append(baseline, [baseline[0]])
    fig, ax = plt.subplots(1, figsize=(8, 5))
    ax.plot(theta_, baseline_true_, label="true")
    ax.plot(theta_, baseline_, label="inferred")
    if cho_y1 is not None:
        U = np.triu(cho_y1[0])
        B = doppler._map.design_matrix(theta=doppler.theta).eval()[:, 1:]
        A = np.linalg.solve(U.T, B.T)
        baseline_sig = np.sqrt(np.sum(A**2, axis=0))
        baseline_sig_ = np.append(baseline_sig, [baseline_sig[0]])
        ax.fill_between(
            theta_,
            baseline_ - baseline_sig_,
            baseline_ + baseline_sig_,
            color="C1",
            alpha=0.25,
            lw=0,
        )
    ax.legend(loc="lower left", fontsize=14)
    ax.set_xlabel(r"$\theta$ (degrees)")
    ax.margins(0, None)
    ax.set_xticks([-180, -135, -90, -45, 0, 45, 90, 135, 180])
    ax.set_ylabel("baseline")
    fig.savefig("%s_baseline.pdf" % name, bbox_inches="tight")
    files.append("baseline.pdf")
    plt.close()

    # Plot the loss
    if len(np.atleast_1d(loss).flatten()) > 1:

        # Compute the loss @ true value
        doppler.y1 = y1_true
        doppler.s = s_true
        loss_true = doppler.loss()

        # Print for the record
        print("True loss: %.2f" % loss_true)
        print("Best loss: %.2f" % np.min(loss))

        # Plot
        fig, ax = plt.subplots(1, figsize=(12, 5))
        ax.plot(loss, label="loss", color="C0")
        ax.axhline(loss_true, color="C1", ls="--", label="loss @ true values")
        ax.set_yscale("log")
        ax.set_ylabel("negative log probability")
        ax.set_xlabel("iteration")
        ax.legend(loc="upper right")
        fig.savefig("%s_prob.pdf" % name, bbox_inches="tight")
        files.append("prob.pdf")
        plt.close()

    # Plot the Ylm coeffs
    fig, ax = plt.subplots(1, figsize=(12, 5))
    n = np.arange(1, doppler.N)
    ax.plot(n, y1_true, "C0-", label="true")
    lo = (doppler.y1_mu - doppler.y1_sig) * np.ones_like(y1)
    hi = (doppler.y1_mu + doppler.y1_sig) * np.ones_like(y1)
    ax.fill_between(n, lo, hi, color="C1", lw=0, alpha=0.25, label="prior")
    ax.plot(n, y1, "C1-", label="inferred")
    if cho_y1 is not None:
        cov_y1 = cho_solve(cho_y1, np.eye(doppler.N - 1))
        sig_y1 = np.sqrt(np.diag(cov_y1))
        ax.fill_between(n, y1 - sig_y1, y1 + sig_y1, color="C1", alpha=0.5)
    ax.set_ylabel("spherical harmonic coefficient")
    ax.set_xlabel("coefficient number")
    ax.legend(loc="lower right", fontsize=14)
    ax.margins(0.01, None)
    fig.savefig("%s_coeffs.pdf" % name, bbox_inches="tight")
    files.append("coeffs.pdf")
    plt.close()

    # Render the true map
    map = starry.Map(ydeg=ydeg, udeg=udeg)
    map.inc = inc
    map[1:, :] = y1_true
    if udeg > 0:
        map[1:] = u
    if nframes is None:
        nframes = len(theta)
        theta_img = np.array(theta)
    else:
        theta_img = np.linspace(-180, 180, nframes + 1)[:-1]
    if render_movies:
        map.show(theta=np.linspace(-180, 180, 50), mp4="%s_true.mp4" % name)
        files.append("true.mp4")
    img_true_rect = (map.render(projection="rect",
                                res=res).eval().reshape(res, res))

    # Render the inferred map
    map[1:, :] = y1
    img = map.render(theta=theta_img).eval()
    if render_movies:
        map.show(theta=np.linspace(-180, 180, 50),
                 mp4="%s_inferred.mp4" % name)
        files.append("inferred.mp4")
    img_rect = map.render(projection="rect", res=res).eval().reshape(res, res)

    # Render the pixelwise uncertainties
    if cho_y1 is not None:

        # Compute the polynomial transform matrix
        xyz = map.ops.compute_rect_grid(tt.as_tensor_variable(res))
        P = map.ops.pT(xyz[0], xyz[1], xyz[2])[:, :doppler.N]

        # Transform it to Ylm & evaluate it
        P = ts.dot(P, map.ops.A1).eval()

        # Rotate it so north points up
        """
        R = map.ops.R([1, 0, 0], -(90.0 - inc) * np.pi / 180.0)
        for l in range(map.ydeg + 1):
            idx = slice(l ** 2, (l + 1) ** 2)
            P[:, idx] = P[:, idx].dot(R[l])
        """

        # Discard Y_{0, 0}, whose variance is zero
        P = P[:, 1:]

        # NOTE: This is the slow way of computing sigma
        # CPT = cho_solve(cho_y1, P.T)
        # cov = np.dot(P, CPT)
        # sig = np.sqrt(np.diag(cov))

        # This is the streamlined version
        U = np.triu(cho_y1[0])
        A = np.linalg.solve(U.T, P.T)
        img_sig_rect = np.sqrt(np.sum(A**2, axis=0)).reshape(res, res)

        # This is how I'd compute the *prior* uncertainty on the pixels
        nsamp = 1000
        prior_std = np.std([
            np.dot(
                P[0],
                doppler.y1_sig * np.random.randn(doppler.N - 1) +
                doppler.y1_mu,
            ) for i in range(nsamp)
        ])

    # Normalize to the maximum for plotting
    vmax = np.nanmax(img_true_rect)
    img_rect /= vmax
    img_true_rect /= vmax
    if cho_y1 is not None:
        img_sig_rect /= vmax
        prior_std /= vmax

    # Plot the maps side by side
    if cho_y1 is not None:
        fig, ax = plt.subplots(3, figsize=(10, 13))
        fig.subplots_adjust(hspace=0.3)
    else:
        fig, ax = plt.subplots(2, figsize=(10, 8))
    im = ax[0].imshow(
        img_true_rect,
        origin="lower",
        extent=(-180, 180, -90, 90),
        cmap="plasma",
        vmin=0,
        vmax=1,
    )
    divider = make_axes_locatable(ax[0])
    cax = divider.append_axes("right", size="4%", pad=0.25)
    plt.colorbar(im, cax=cax, format="%.2f")
    im = ax[1].imshow(
        img_rect,
        origin="lower",
        extent=(-180, 180, -90, 90),
        cmap="plasma",
        vmin=0,
        vmax=1,
    )
    divider = make_axes_locatable(ax[1])
    cax = divider.append_axes("right", size="4%", pad=0.25)
    plt.colorbar(im, cax=cax, format="%.2f")
    ax[0].annotate(
        "true",
        xy=(0, 0),
        xytext=(7, 7),
        xycoords="axes fraction",
        textcoords="offset points",
        ha="left",
        va="bottom",
        fontsize=22,
        color="k",
        zorder=101,
    )
    ax[1].annotate(
        "inferred",
        xy=(0, 0),
        xytext=(7, 7),
        xycoords="axes fraction",
        textcoords="offset points",
        ha="left",
        va="bottom",
        fontsize=22,
        color="k",
        zorder=101,
    )
    if cho_y1 is not None:
        im = ax[2].imshow(
            img_sig_rect,
            origin="lower",
            extent=(-180, 180, -90, 90),
            cmap="plasma",
            vmin=0,
            vmax=prior_std,
        )
        ticks = np.linspace(0, prior_std, 5)
        ticklabels = ["%.2f" % t for t in ticks]
        ticklabels[-1] = r"$\sigma_\mathrm{prior}$"
        divider = make_axes_locatable(ax[2])
        cax = divider.append_axes("right", size="4%", pad=0.25)
        cb = plt.colorbar(im, cax=cax, format="%.2f", ticks=ticks)
        cb.ax.set_yticklabels(ticklabels)
        ax[2].annotate(
            "uncertainty",
            xy=(0, 0),
            xytext=(7, 7),
            xycoords="axes fraction",
            textcoords="offset points",
            ha="left",
            va="bottom",
            fontsize=22,
            color="k",
            zorder=101,
        )
    for axis in ax:
        latlines = np.linspace(-90, 90, 7)[1:-1]
        lonlines = np.linspace(-180, 180, 13)
        for lat in latlines:
            axis.axhline(lat, color="k", lw=0.5, alpha=0.5, zorder=100)
        for lon in lonlines:
            axis.axvline(lon, color="k", lw=0.5, alpha=0.5, zorder=100)
        axis.set_xticks(lonlines)
        axis.set_yticks(latlines)
        # axis.set_xlabel("Longitude [deg]", fontsize=16)
        # axis.set_ylabel("Latitude [deg]", fontsize=16)
        for tick in (axis.xaxis.get_major_ticks() +
                     axis.yaxis.get_major_ticks()):
            tick.label.set_fontsize(10)
    fig.savefig("%s_rect.pdf" % name, bbox_inches="tight")
    files.append("rect.pdf")
    plt.close()

    # Plot the "Joy Division" graph
    fig = plt.figure(figsize=(8, 11.5))
    ax_img = [
        plt.subplot2grid((nframes, 8), (n, 0), rowspan=1, colspan=1)
        for n in range(nframes)
    ]
    ax_f = [plt.subplot2grid((nframes, 8), (0, 1), rowspan=1, colspan=7)]
    ax_f += [
        plt.subplot2grid(
            (nframes, 8),
            (n, 1),
            rowspan=1,
            colspan=7,
            sharex=ax_f[0],
            sharey=ax_f[0],
        ) for n in range(1, nframes)
    ]
    for n in range(nframes):
        ax_img[n].imshow(img[n],
                         extent=(-1, 1, -1, 1),
                         origin="lower",
                         cmap="plasma")
        ax_img[n].axis("off")
        m = int(np.round(np.linspace(0, M - 1, nframes)[n]))
        ax_f[n].plot(lnlam, F[m], "k.", ms=2, alpha=0.75, clip_on=False)
        ax_f[n].plot(lnlam, model[m], "C1-", lw=1, clip_on=False)
        ax_f[n].axis("off")
    ymed = np.median(F)
    ydel = 0.5 * (np.max(F) - np.min(F)) / overlap
    ax_f[0].set_ylim(ymed - ydel, ymed + ydel)
    fig.savefig("%s_timeseries.pdf" % name, bbox_inches="tight", dpi=400)
    files.append("timeseries.pdf")
    plt.close()

    # Plot the rest frame spectrum
    fig, ax = plt.subplots(1)
    ax.plot(lnlam_padded, s_true.reshape(-1), "C0-", label="true")
    if s_deconv is not None:
        ax.plot(
            lnlam_padded,
            s_deconv.reshape(-1),
            "C1--",
            lw=1,
            alpha=0.5,
            label="guess",
        )
    ax.plot(lnlam_padded, s.reshape(-1), "C1-", label="inferred")
    if cho_s is not None:
        cov_s = cho_solve(cho_s, np.eye(doppler.Kp))
        sig_s = np.sqrt(np.diag(cov_s))
        ax.fill_between(lnlam_padded,
                        s - sig_s,
                        s + sig_s,
                        color="C1",
                        alpha=0.5)
    ax.axvspan(lnlam_padded[0], lnlam[0], color="k", alpha=0.3)
    ax.axvspan(lnlam[-1], lnlam_padded[-1], color="k", alpha=0.3)
    ax.set_xlim(lnlam_padded[0], lnlam_padded[-1])
    ax.set_xlabel(r"$\ln\left(\lambda/\lambda_\mathrm{r}\right)$")
    ax.set_ylabel(r"Normalized intensity")
    ax.legend(loc="lower left", fontsize=12)
    fig.savefig("%s_spectrum.pdf" % name, bbox_inches="tight")
    files.append("spectrum.pdf")
    plt.close()

    # Open
    if open_plots:
        for file in files:
            subprocess.run(["open", "%s_%s" % (name, file)])
Beispiel #50
0
def TrainFn(fnsim, embeddings, leftop, rightop,
                loss=loss.hinge, loss_margin=1.0, op='', method='SGD',
                decay=0.999, epsilon=1e-6, max_learning_rate=None,

                weight_L1_param_regularizer=None, weight_L2_param_regularizer=None,
                weight_contractive_regularizer_left=None, weight_contractive_regularizer_right=None):
    """
    This function returns a theano function to perform a training iteration, contrasting couples of positive and negative triplets. members are given
    as sparse matrices. for one positive triplet there is one negative triplet.

    :param fnsim: similarity function (on theano variables).
    :param embeddings: an embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    """

    embedding, relationl, relationr = parse_embeddings(embeddings)

    # Inputs
    inpr, inpl, inpo = S.csr_matrix('inpr'), S.csr_matrix('inpl'), S.csr_matrix('inpo')
    inpln, inprn, inpon = S.csr_matrix('inpln'), S.csr_matrix('inprn'), S.csr_matrix('inpon')

    # Learning rates for parameters and embeddings
    rate_params = T.scalar('rate_params')
    rate_embeddings = T.scalar('rate_embeddings')

    # E: D x N, inp: N x B -> <E, inp>: D x B -> <E, inp>.T: B x D

    # Positive triplet functions
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T

    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T

    # Negative triplet functions
    lhsn = S.dot(embedding.E, inpln).T
    rhsn = S.dot(embedding.E, inprn).T

    relln = S.dot(relationl.E, inpon).T
    relrn = S.dot(relationr.E, inpon).T

    # Similarity Function, applied to g_lhs and g_rhs
    lop, rop = leftop(lhs, rell), rightop(rhs, relr)
    lopn, ropn = leftop(lhsn, relln), rightop(rhsn, relrn)

    simi = fnsim(lop, rop)
    simin = fnsim(lopn, ropn)

    supported_loss_args = inspect.getargspec(loss)[0]
    loss_args = {} if 'margin' not in supported_loss_args else { 'margin':loss_margin }

    cost, out = loss(simi, simin, **loss_args)

    # <EXPERIMENTAL_CODE>
    # Should I also plug examples from corrupted triples ?
    if weight_contractive_regularizer_left is not None:
        cost = cost + (weight_contractive_regularizer_left * R.contractive_regularizer(lop, lhs))

    if weight_contractive_regularizer_right is not None:
        cost = cost + (weight_contractive_regularizer_right * R.contractive_regularizer(rop, rhs))

    for rel_param in set([relationl.E, relationr.E]):
        if weight_L1_param_regularizer is not None:
            cost = cost + (weight_L1_param_regularizer * R.L1_regularizer(rel_param))
        if weight_L2_param_regularizer is not None:
            cost = cost + (weight_L2_param_regularizer * R.L2_regularizer(rel_param))
    # </EXPERIMENTAL_CODE>

    params = leftop.params + rightop.params + (fnsim.params if hasattr(fnsim, 'params') else [])
    params = list(set(params))

    embeds = [embedding.E] + ([relationr.E, relationl.E] if (type(embeddings) == list) else [])
    embeds = list(set(embeds))

    # The function updates the implicit function arguments according to the updates.
    updates = collections.OrderedDict()


    if (method == 'SGD'):
        pass # do nothing

    elif (method == 'MOMENTUM'):
        param_previous_update_map = collections.OrderedDict()

        for param in params + embeds:
            # Allocate the previous updates
            previous_update_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_previous_update = theano.shared(value=previous_update_value, name='su_' + param.name)

            param_previous_update_map[param] = param_previous_update

    elif (method == 'ADAGRAD'):
        param_squared_gradients_map = collections.OrderedDict()

        for param in params + embeds:
            # Allocate the sums of squared gradients
            squared_gradients_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_squared_gradients = theano.shared(value=squared_gradients_value, name='sg_' + param.name)

            param_squared_gradients_map[param] = param_squared_gradients

    elif (method == 'ADADELTA'):
        param_squared_gradients_map = collections.OrderedDict()
        param_squared_updates_map = collections.OrderedDict()

        for param in params + embeds:
            # Allocate the sums of squared gradients
            squared_gradients_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_squared_gradients = theano.shared(value=squared_gradients_value, name='sg_' + param.name)

            param_squared_gradients_map[param] = param_squared_gradients

            # Allocate the sums of squared updates
            squared_updates_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_squared_updates = theano.shared(value=squared_updates_value, name='su_' + param.name)

            param_squared_updates_map[param] = param_squared_updates

    elif (method == 'RMSPROP'):
        param_squared_gradients_map = collections.OrderedDict()

        for param in params + embeds:
            # Allocate the sums of squared gradients
            squared_gradients_value = numpy.zeros(param.get_value().shape, dtype=theano.config.floatX)
            param_squared_gradients = theano.shared(value=squared_gradients_value, name='sg_' + param.name)

            param_squared_gradients_map[param] = param_squared_gradients

    else:
        raise ValueError('Unknown method: %s' % (method))


    # Parameter Gradients
    gradientsparams = T.grad(cost, params)

    # Embeddings gradients
    gradientsembeds = T.grad(cost, embeds)


    # Learning Rates
    rates_params = [rate_params for i in range(len(params))]

    # In TransE etc. the rate for predicates' embeddings (that do not get normalized) is rate_params, not rate_embeddings
    rates_embeddings = [rate_embeddings, rate_params, rate_params] if len(embeds) > 1 else [rate_embeddings] # [rate_embeddings for i in range(len(embeds))]

    for param, gradient, rate in zip(params + embeds, gradientsparams + gradientsembeds, rates_params + rates_embeddings):

        if (method == 'SGD'): # SGD
            U.sgd(param, rate, gradient, updates)

        elif (method == 'MOMENTUM'): # SGD+MOMENTUM
            param_previous_update = param_previous_update_map[param]
            U.momentum(param, rate, decay, gradient, updates, param_previous_update)

        elif (method == 'ADAGRAD'): # ADAGRAD
            param_squared_gradients = param_squared_gradients_map[param]
            U.adagrad(param, rate, epsilon, gradient, updates, param_squared_gradients)

        elif (method == 'ADADELTA'): # ADADELTA
            param_squared_gradients = param_squared_gradients_map[param]
            param_squared_updates = param_squared_updates_map[param]
            U.adadelta(param, rate, decay, epsilon, gradient, updates, param_squared_gradients, param_squared_updates)

        elif (method == 'RMSPROP'): # RMSPROP
            param_squared_gradients = param_squared_gradients_map[param]
            U.rmsprop(param, rate, decay, max_learning_rate, epsilon, gradient, updates, param_squared_gradients)

        else:
            raise ValueError('Unknown method: %s' % (method))

    """
    Theano function inputs.
    :input rate_embeddings: learning/decay rate for the embeddings.
    :input rate_params: learning/decay rate for the parameters.

    :input inpl: sparse csr matrix representing the indexes of the positive triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inpr: sparse csr matrix representing the indexes of the positive triplet 'right' member, shape=(#examples,N [Embeddings]).
    :input inpo: sparse csr matrix representing the indexes of the positive triplet relation member, shape=(#examples,N [Embeddings]).
    :input inpln: sparse csr matrix representing the indexes of the negative triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inprn: sparse csr matrix representing the indexes of the negative triplet 'right' member, shape=(#examples,N [Embeddings]).
    :input inpon: sparse csr matrix representing the indexes of the negative triplet relation member, shape=(#examples,N [Embeddings]).

    Theano function output.
    :output mean(cost): average cost.
    :output mean(out): ratio of examples for which the margin is violated,
                       i.e. for which an update occurs.
    """
    return theano.function([rate_embeddings, rate_params, inpl, inpr, inpo, inpln, inprn, inpon],
                           [T.mean(cost), T.mean(out)], updates=updates, on_unused_input='ignore')
Beispiel #51
0
def ForwardFn1Member(fnsim, embeddings, leftop, rightop, marge=1.0, rel=True):
    """
    This function returns a theano function to perform a forward step,
    contrasting positive and negative triplets. members are given as sparse
    matrices. For one positive triplet there are two or three (if rel == True)
    negative triplets. To create a negative triplet we replace only one member
    at a time.

    :param fnsim: similarity function (on theano variables).
    :param embeddings: an embeddings instance.
    :param leftop: class for the 'left' operator.
    :param rightop: class for the 'right' operator.
    :param marge: marge for the cost function.
    :param rel: boolean, if true we also contrast w.r.t. a negative relation
                member.

    :note: this is useful for W_SABIE [Weston et al., IJCAI 2011]
    """
    embedding, relationl, relationr = parse_embeddings(embeddings)

    # inputs
    inpr = S.csr_matrix()
    inpl = S.csr_matrix()
    inpo = S.csr_matrix()
    inpln = S.csr_matrix()
    inprn = S.csr_matrix()

    # graph
    lhs = S.dot(embedding.E, inpl).T
    rhs = S.dot(embedding.E, inpr).T
    rell = S.dot(relationl.E, inpo).T
    relr = S.dot(relationr.E, inpo).T
    lhsn = S.dot(embedding.E, inpln).T
    rhsn = S.dot(embedding.E, inprn).T
    simi = fnsim(leftop(lhs, rell), rightop(rhs, relr))
    similn = fnsim(leftop(lhsn, rell), rightop(rhs, relr))
    simirn = fnsim(leftop(lhs, rell), rightop(rhsn, relr))
    costl, outl = margincost(simi, similn, marge)
    costr, outr = margincost(simi, simirn, marge)
    list_in = [inpl, inpr, inpo, inpln]
    list_out = [outl, outr]
    if rel:
        inpon = S.csr_matrix()
        relln = S.dot(relationl.E, inpon).T
        relrn = S.dot(relationr.E, inpon).T
        simion = fnsim(leftop(lhs, relln), rightop(rhs, relrn))
        costo, outo = margincost(simi, simion, marge)
        out = T.concatenate([outl, outr, outo])
        list_in += [inpon]
        list_out += [outo]
    """
    Theano function inputs.
    :input inpl: sparse csr matrix representing the indexes of the positive
                 triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inpr: sparse csr matrix representing the indexes of the positive
                 triplet 'right' member, shape=(#examples,N [Embeddings]).
    :input inpo: sparse csr matrix representing the indexes of the positive
                 triplet relation member, shape=(#examples,N [Embeddings]).
    :input inpln: sparse csr matrix representing the indexes of the negative
                  triplet 'left' member, shape=(#examples,N [Embeddings]).
    :input inprn: sparse csr matrix representing the indexes of the negative
                  triplet 'right' member, shape=(#examples,N [Embeddings]).
    :opt input inpon: sparse csr matrix representing the indexes of the
                      negative triplet relation member, shape=(#examples,N
                      [Embeddings]).

    Theano function output.
    :output outl: binary vector representing when the margin is violated, i.e.
                  when an update occurs, for the 'left' member.
    :output outr: binary vector representing when the margin is violated, i.e.
                  when an update occurs, for the 'right' member.
    :opt output outo: binary vector representing when the margin is violated,
                  i.e. when an update occurs, for the relation member.
    """
    return theano.function(list_in, list_out, on_unused_input='ignore')
Beispiel #52
0
    def solve(self,
              y1=None,
              s=None,
              baseline=None,
              y1_guess=None,
              s_guess=None,
              baseline_guess=None,
              niter=100,
              T=1.0,
              dlogT=-0.25,
              optimizer="NAdam",
              dcf=10.0,
              quiet=False,
              **kwargs):
        """Solve the Doppler imaging problem.
        
        Returns:
            ``(loss, cho_y1, cho_s)``, a tuple containing the array of
            loss values during the optimization and the Cholesky factorization
            of the covariance matrices of ``y1`` and ``s``, if available 
            (otherwise the latter two are set to ``None``.)
        """
        # Check the optimizer is valid
        if optimizer.lower() == "nadam":
            optimizer = NAdam
        elif optimizer.lower() == "adam":
            optimizer = Adam
        else:
            raise ValueError("Invalid optimizer.")

        # Figure out what to solve for
        known = []
        if s is not None:
            known += ["s"]
        if y1 is not None:
            known += ["y1"]

        if ("y1" in known) and ("s" in known):

            # Nothing to do here but ingest the values!
            self.y1 = y1
            self.s = s
            return self.loss(), None, None

        elif "y1" in known:

            # Easy: it's a linear problem
            self.y1 = y1
            cho_s = self.compute_s()
            return self.loss(), None, cho_s

        else:

            if ("s" in known) and (baseline is not None):

                # Still a linear problem!
                self.s = s
                cho_y1 = self.compute_y1(baseline=baseline)
                return self.loss(), cho_y1, None

            else:

                # Non-linear. Let's use (N)Adam.

                if "s" in known:

                    # We know `s` and need to solve for
                    # `y1` w/o any baseline knowledge.
                    s_guess = s

                else:

                    # We know *nothing*!

                    # Estimate `v^T` from the deconvolved mean spectrum
                    if s_guess is None:

                        fmean = np.mean(self.F, axis=0)
                        fmean -= np.mean(fmean)
                        diagonals = np.tile(self.kT()[0].reshape(-1, 1),
                                            self.K)
                        offsets = np.arange(self.W)
                        A = diags(diagonals,
                                  offsets, (self.K, self.Kp),
                                  format="csr")
                        LInv = (dcf**2 * self.ferr**2 / self.s_sig**2 *
                                np.eye(A.shape[1]))
                        s_guess = 1.0 + np.linalg.solve(
                            A.T.dot(A).toarray() + LInv, A.T.dot(fmean))

                        # Save this for later
                        self.s_deconv = s_guess

                # Estimate `y1` w/o baseline knowledge
                # If `baseline_guess` is `None`, this is done via
                # a Taylor expansion; see ``compute_y1()``.
                if y1_guess is None:

                    self.s = s_guess
                    self.compute_y1(T=T, baseline=baseline_guess)
                    y1_guess = self.y1

                # Initialize the variables
                self.y1 = y1_guess
                self.s = s_guess

                # Tempering params
                if T > 1.0:
                    T_arr = 10**np.arange(np.log10(T), 0, dlogT)
                    T_arr = np.append(T_arr, [1.0])
                    niter_bilin = len(T_arr)
                else:
                    T_arr = [1.0]
                    niter_bilin = 1

                # Loss array
                loss_val = np.zeros(niter_bilin + niter + 1)
                loss_val[0] = self.loss()

                # Iterative bi-linear solve
                if niter_bilin > 0:

                    if not quiet:
                        print("Running bi-linear solver...")

                    best_loss = loss_val[0]
                    best_y1 = self.y1
                    best_s = self.s

                    for n in tqdm(range(niter_bilin), disable=quiet):

                        # Compute `y1` using the previous baseline
                        self.compute_y1(T=T_arr[n], baseline=self.baseline())

                        # Compute `s` using the current `y1`
                        if "s" not in known:
                            self.compute_s(T=T_arr[n])

                        loss_val[n + 1] = self.loss()

                        if loss_val[n + 1] < best_loss:
                            best_loss = loss_val[n + 1]
                            best_y1 = self.y1
                            best_s = self.s

                    self.y1 = best_y1
                    self.s = best_s

                # Non-linear solve
                if niter > 0:

                    # Theano nonlienar solve. Variables:
                    y1 = theano.shared(self.y1)
                    s = theano.shared(self.s)
                    if "s" in known:
                        theano_vars = [y1]
                    else:
                        theano_vars = [y1, s]

                    # Compute the model
                    D = ts.as_sparse_variable(self.D())
                    a = tt.reshape(
                        tt.dot(
                            tt.reshape(tt.concatenate([[1.0], y1]), (-1, 1)),
                            tt.reshape(s, (1, -1)),
                        ),
                        (-1, ),
                    )
                    b = tt.dot(
                        self._map.design_matrix(theta=self.theta),
                        tt.reshape(tt.concatenate([[1.0], y1]), (-1, 1)),
                    )
                    B = tt.reshape(b, (-1, 1))
                    M = tt.reshape(ts.dot(D, a), (self.M, -1)) / B

                    # Compute the loss
                    r = tt.reshape(self.F - M, (-1, ))
                    cov = tt.reshape(self._F_CInv, (-1, ))
                    lnlike = -0.5 * tt.sum(r**2 * cov)
                    lnprior = (-0.5 * tt.sum(
                        (y1 - self.y1_mu)**2 / self.y1_sig**2) + -0.5 * tt.sum(
                            (b - self.baseline_mu)**2 / self.baseline_sig**2) +
                               -0.5 * tt.dot(
                                   tt.dot(
                                       tt.reshape((s - self.s_mu), (1, -1)),
                                       self._s_CInv,
                                   ),
                                   tt.reshape((s - self.s_mu), (-1, 1)),
                               )[0, 0])
                    loss = -(lnlike + lnprior)
                    best_loss = loss.eval()
                    best_y1 = y1.eval()
                    best_s = s.eval()

                    if not quiet:
                        print("Running non-linear solver...")

                    upd = optimizer(loss, theano_vars, **kwargs)
                    train = theano.function([], [y1, s, loss], updates=upd)
                    for n in tqdm(1 + niter_bilin + np.arange(niter),
                                  disable=quiet):
                        y1_val, s_val, loss_val[n] = train()
                        if loss_val[n] < best_loss:
                            best_loss = loss_val[n]
                            best_y1 = y1_val
                            best_s = s_val

                    # We are done!
                    self.y1 = best_y1
                    self.s = best_s

                # Estimate the covariance of `y1` conditioned on `s`
                # and the covariance of `s` conditioned on `y1`.
                # Note that the covariance of `y1` is computed from
                # the linearization that allows us to simultaneously
                # solve for the baseline.
                y1_curr = np.array(self.y1)
                cho_y1 = self.compute_y1()
                self.y1 = y1_curr

                if "s" not in known:
                    s_curr = np.array(self.s)
                    cho_s = self.compute_s()
                    self.s = s_curr
                else:
                    cho_s = None

                return loss_val, cho_y1, cho_s