Example #1
0
def expectation_check(training_sentences,training_labels,w):
    lhs_array = np.zeros(np.size(w))
    rhs_array = np.zeros(np.size(w))
    for i in range(len(training_labels)):
        y = training_labels[i]
        x = training_sentences[i]
        N = len(y)
        x_info = ff.sent_precheck(x)
        for k in range(1,N):
            trueFF = ff.metaff(y[k-1],y[k],x_info,k)
            for j in trueFF:
                lhs_array[j] += 1
        
        g = sr.g(w,x)
        e_g = np.exp(g)
        alpha = sr.alpha_mat(g)
        beta = sr.beta_mat(g)
        z = sr.Z(alpha,beta)
        for k in range(np.shape(g)[0]):
            for m1 in range(8):
                for m2 in range(8):
                    factor = alpha[k,m1]*beta[k+1,m2]*e_g[k,m1,m2]/z
                    #get list of non-zero (and thus =1) f_j for (i,m1,m2)
                    trueFF = ff.metaff(m1,m2,x_info,k)
                    #add the weighting factor to them
                    for j in trueFF:
                        rhs_array[j] += factor
    return lhs_array,rhs_array
Example #2
0
def collins_epoch(train_labels, train_sentences, w0):
    """
    This function is a single epoch of the Collins perceptron.  An epoch ends 
    after every example in the (shuffled) training data has been visited once.
    
    train_labels - A list containing ALL of the labels in the training data.
    train_sentences - A list of ALL sentences in the training data.
    w0 - The initial parameter values, at the start of the epoch.
    """
    
    Ntrain = len(train_sentences)  # number of training examples
    assert(Ntrain == len(train_labels))
    J = len(w0)  # number of parameters, equal to number of feature functions
    #w = np.zeros(shape=(Ntrain+1,J))  # to store the parameter trajectory
    #w[0] = w0
    
    # pick out a random subset of training examples
    #sentences_self = train_sentences[:100]
    #labels_self = train_labels[:100]
    
    # track average number of true feature functions
    av_true = 0.0
    nevals = 0.0
    
    for nex,(sentence,label) in enumerate(zip(train_sentences,train_labels)):
        if (nex+1)%1000 == 0:
            print nex + 1
        
        # first, calculate g
        g_ex = sr.g(w0,sentence)
        
        # now U
        U_ex = sr.U(g_ex)
        
        # find the best label
        y_best = sr.bestlabel(U_ex,g_ex)
        
        # update the weight
        #w[nex+1] = w[nex]
        x_info = ffs.sent_precheck(sentence)
        
        for i,(m1,m2,b1,b2) in enumerate(zip(label[:-1],label[1:],y_best[:-1],y_best[1:])):
            trueFF = ffs.metaff(m1,m2,x_info,i+1)
            bestFF = ffs.metaff(b1,b2,x_info,i+1)
            
            av_true += float(len(trueFF))
            nevals += 1.0
            
            for j in trueFF:
                #w[nex+1,j] += 1
                w0[j] += 1
            for j in bestFF:
                #w[nex+1,j] -= 1
                w0[j] -= 1
                #continue
    
    print 'Average number of true FF\'s: ',(av_true/nevals)
    
    return w0
Example #3
0
def g(w, x):
    """
    Calculates g functions for each pair of tags y in a sentence x.
    
    w - List of weights associated with ffs.
    x - Sequence (sentence) over which to evaluate g.
    
    Returns: a (N-1) X M X M matrix where N is the length of the sentence, and 
    there are M possible tags for each word.
    """
    
    M = 8  # number of possible tags
    N = len(x)  # length of sentence
    
    __g__ = np.zeros(shape=(N-1,M,M))
    
    # preprocess the sentence
    x_info = ffs.sent_precheck(x)
    
    for i,m1,m2 in it.product(range(1,N),range(0,M),range(0,M)):
        # get the nonzero feature function indices for this tag pair
        trueFF = ffs.metaff(m1,m2,x_info,i)
        # fill in the nonzero elements of g
        for j in trueFF:
            __g__[i-1,m1,m2] += w[j]
    
    return __g__
Example #4
0
def compute_gradient(x, y, w, dw):
    """
    This term computes the gradient vector
    """
    dw *= 0.0

    # get info about this training example
    x_info = ff.sent_precheck(x)
    N = len(x)

    # compute some necessary arrays of factors from the subroutines module
    g = sr.g(w, x)
    e_g = np.exp(g)
    alpha = sr.alpha_mat(g)
    beta = sr.beta_mat(g)
    z = sr.Z(alpha, beta)
    # iterate over position in sentence and tag values, getting a list of indices
    # of feature functions to update at for each position and tag pair value.
    for i in range(np.shape(g)[0]):
        for m1 in range(8):
            for m2 in range(8):
                factor = alpha[i, m1] * beta[i + 1, m2] * e_g[i, m1, m2] / z
                # get list of non-zero (and thus =1) f_j for (i,m1,m2)
                # print m1,m2,x_info,i
                trueFF = ff.metaff(m1, m2, x_info, i + 1)
                # add the weighting factor to them
                for j in trueFF:
                    dw[j] -= factor

    # now I go through and use data from y to compute the "true" value of F_J,
    # once more iterating over i, but not m1,m2 (instead getting those values
    # from the supplied y
    for i in range(1, N):
        trueFF = ff.metaff(y[i - 1], y[i], x_info, i)
        for j in trueFF:
            dw[j] += 1
    return dw
Example #5
0
def LCL_single(x,y,w):
    """
    computes the LCL for a single training example
    """
    x_info = ff.sent_precheck(x)
    
    g = sr.g(w,x)
    alpha = sr.alpha_mat(g)
    beta = sr.beta_mat(g)
    z = sr.Z(alpha,beta)
    N = len(x)
    sum_on_j = 0.0
    
    for i in range(1,N):
        trueFF = ff.metaff(y[i-1],y[i],x_info,i)
        for j in trueFF:
            sum_on_j += w[j]
    LCL = -math.log(z)
    return LCL