def expectation_check(training_sentences,training_labels,w): lhs_array = np.zeros(np.size(w)) rhs_array = np.zeros(np.size(w)) for i in range(len(training_labels)): y = training_labels[i] x = training_sentences[i] N = len(y) x_info = ff.sent_precheck(x) for k in range(1,N): trueFF = ff.metaff(y[k-1],y[k],x_info,k) for j in trueFF: lhs_array[j] += 1 g = sr.g(w,x) e_g = np.exp(g) alpha = sr.alpha_mat(g) beta = sr.beta_mat(g) z = sr.Z(alpha,beta) for k in range(np.shape(g)[0]): for m1 in range(8): for m2 in range(8): factor = alpha[k,m1]*beta[k+1,m2]*e_g[k,m1,m2]/z #get list of non-zero (and thus =1) f_j for (i,m1,m2) trueFF = ff.metaff(m1,m2,x_info,k) #add the weighting factor to them for j in trueFF: rhs_array[j] += factor return lhs_array,rhs_array
def collins_epoch(train_labels, train_sentences, w0): """ This function is a single epoch of the Collins perceptron. An epoch ends after every example in the (shuffled) training data has been visited once. train_labels - A list containing ALL of the labels in the training data. train_sentences - A list of ALL sentences in the training data. w0 - The initial parameter values, at the start of the epoch. """ Ntrain = len(train_sentences) # number of training examples assert(Ntrain == len(train_labels)) J = len(w0) # number of parameters, equal to number of feature functions #w = np.zeros(shape=(Ntrain+1,J)) # to store the parameter trajectory #w[0] = w0 # pick out a random subset of training examples #sentences_self = train_sentences[:100] #labels_self = train_labels[:100] # track average number of true feature functions av_true = 0.0 nevals = 0.0 for nex,(sentence,label) in enumerate(zip(train_sentences,train_labels)): if (nex+1)%1000 == 0: print nex + 1 # first, calculate g g_ex = sr.g(w0,sentence) # now U U_ex = sr.U(g_ex) # find the best label y_best = sr.bestlabel(U_ex,g_ex) # update the weight #w[nex+1] = w[nex] x_info = ffs.sent_precheck(sentence) for i,(m1,m2,b1,b2) in enumerate(zip(label[:-1],label[1:],y_best[:-1],y_best[1:])): trueFF = ffs.metaff(m1,m2,x_info,i+1) bestFF = ffs.metaff(b1,b2,x_info,i+1) av_true += float(len(trueFF)) nevals += 1.0 for j in trueFF: #w[nex+1,j] += 1 w0[j] += 1 for j in bestFF: #w[nex+1,j] -= 1 w0[j] -= 1 #continue print 'Average number of true FF\'s: ',(av_true/nevals) return w0
def g(w, x): """ Calculates g functions for each pair of tags y in a sentence x. w - List of weights associated with ffs. x - Sequence (sentence) over which to evaluate g. Returns: a (N-1) X M X M matrix where N is the length of the sentence, and there are M possible tags for each word. """ M = 8 # number of possible tags N = len(x) # length of sentence __g__ = np.zeros(shape=(N-1,M,M)) # preprocess the sentence x_info = ffs.sent_precheck(x) for i,m1,m2 in it.product(range(1,N),range(0,M),range(0,M)): # get the nonzero feature function indices for this tag pair trueFF = ffs.metaff(m1,m2,x_info,i) # fill in the nonzero elements of g for j in trueFF: __g__[i-1,m1,m2] += w[j] return __g__
def compute_gradient(x, y, w, dw): """ This term computes the gradient vector """ dw *= 0.0 # get info about this training example x_info = ff.sent_precheck(x) N = len(x) # compute some necessary arrays of factors from the subroutines module g = sr.g(w, x) e_g = np.exp(g) alpha = sr.alpha_mat(g) beta = sr.beta_mat(g) z = sr.Z(alpha, beta) # iterate over position in sentence and tag values, getting a list of indices # of feature functions to update at for each position and tag pair value. for i in range(np.shape(g)[0]): for m1 in range(8): for m2 in range(8): factor = alpha[i, m1] * beta[i + 1, m2] * e_g[i, m1, m2] / z # get list of non-zero (and thus =1) f_j for (i,m1,m2) # print m1,m2,x_info,i trueFF = ff.metaff(m1, m2, x_info, i + 1) # add the weighting factor to them for j in trueFF: dw[j] -= factor # now I go through and use data from y to compute the "true" value of F_J, # once more iterating over i, but not m1,m2 (instead getting those values # from the supplied y for i in range(1, N): trueFF = ff.metaff(y[i - 1], y[i], x_info, i) for j in trueFF: dw[j] += 1 return dw
def LCL_single(x,y,w): """ computes the LCL for a single training example """ x_info = ff.sent_precheck(x) g = sr.g(w,x) alpha = sr.alpha_mat(g) beta = sr.beta_mat(g) z = sr.Z(alpha,beta) N = len(x) sum_on_j = 0.0 for i in range(1,N): trueFF = ff.metaff(y[i-1],y[i],x_info,i) for j in trueFF: sum_on_j += w[j] LCL = -math.log(z) return LCL