def full_grad_full(W1,b1,W2,b2,Wlabel,alpha,neg_list,pos_list,vocab,normalized): #set up some functions for array manipulation d = np.shape(W1)[0] W = np.zeros((d,(2*d)+1)) W[:,:(2*d)] = W1 W[:,(2*d)] = b1 U = np.zeros((2*d,d+1)) U[:,:d] = W2 U[:,d] = b2 k = np.shape(Wlabel)[0] V = np.zeros((k,d)) V[:,:d] = Wlabel pars = (W,U,V) #set up arrays that will hold the derivatives dW = np.zeros(np.shape(W)) dU = np.zeros(np.shape(U)) dV = np.zeros(np.shape(Wlabel)) dVocab = np.zeros(np.shape(vocab)) #loop over neg_list label = np.array([0.0,1.0]) for example in neg_list: tree_stuff = tm.build_tree(example,label,vocab,W1,W2,b1,b2,Wlabel,normalized) tree_meanings = tree_stuff[1] treeinfo = tree_stuff[0] #extra_ones = np.ones(np.shape(tree_meanings)[1]) #tree = np.vstack((tree_meanings,extra_ones)) Dlist = backprop_full(tree_meanings,treeinfo,label,pars,alpha,normalized) dW+=Dlist[0] dU+=Dlist[1] dV+=Dlist[2] dx = Dlist[3] for i in range(len(example)): voc_index = example[i] -1 dVocab[voc_index,:] += dx[i,:] #loop over pos_list label = np.array([1.0,0.0]) for example in pos_list: tree_stuff = tm.build_tree(example,label,vocab,W1,W2,b1,b2,Wlabel,normalized) tree_meanings = tree_stuff[1] treeinfo = tree_stuff[0] #extra_ones = np.ones(np.shape(tree_meanings)[1]) #tree = np.vstack((tree_meanings,extra_ones)) Dlist = backprop_full(tree_meanings,treeinfo,label,pars,alpha,normalized) dW+=Dlist[0] dU+=Dlist[1] dV+=Dlist[2] dx = Dlist[3] for i in range(len(example)): voc_index = example[i] - 1 dVocab[voc_index,:] += dx[i,:] #divide by size of training set dW /= float(len(neg_list) + len(pos_list)) dU /= float(len(neg_list) + len(pos_list)) dV /= float(len(neg_list) + len(pos_list)) dVocab /= float(len(neg_list) + len(pos_list)) return dW[:,:(2*d)],dW[:,(2*d)],dU[:,:d],dU[:,d],dV,dVocab
def phrase_ranking(neg_examples,pos_examples,vocab,W1,W2,b1,b2,Wlabel,normalized): #first loop over neg_examples phrase_list = [] score_list = [] meaning_list = [] label = np.array([0.0,1.0]) for example in neg_examples: sub_phrase_list = [] sub_score_list = [] sub_meaning_list = [] N = len(example) num_nodes = 2*N - 1 tree_stuff = tm.build_tree(example,label,vocab,W1,W2,b1,b2,Wlabel,normalized) tree_info = tree_stuff[0] tree_meanings = tree_stuff[1] for i in range(N): sub_phrase_list.append([example[i]]) for k in range(N,num_nodes): left = tree_info[0,k] right = tree_info[1,k] phrase = sub_phrase_list[left] + sub_phrase_list[right] sub_phrase_list.append(phrase) for j in range(num_nodes): predicted = tm.make_d(tree_meanings[:,j],Wlabel) score = math.log(predicted[0]/predicted[1]) sub_score_list.append(score) sub_meaning_list.append(tree_meanings[:,j]) score_list += sub_score_list meaning_list += sub_meaning_list phrase_list += sub_phrase_list label = np.array([1.0,0.0]) for example in pos_examples: sub_phrase_list = [] sub_score_list = [] sub_meaning_list = [] N = len(example) num_nodes = 2*N - 1 tree_stuff = tm.build_tree(example,label,vocab,W1,W2,b1,b2,Wlabel,normalized) tree_info = tree_stuff[0] tree_meanings = tree_stuff[1] for i in range(N): sub_phrase_list.append([example[i]]) for k in range(N,num_nodes): left = tree_info[0,k] right = tree_info[1,k] phrase = sub_phrase_list[left] + sub_phrase_list[right] sub_phrase_list.append(phrase) for j in range(num_nodes): predicted = tm.make_d(tree_meanings[:,j],Wlabel) score = math.log(predicted[0]/predicted[1]) sub_score_list.append(score) sub_meaning_list.append(tree_meanings[:,j]) score_list += sub_score_list meaning_list += sub_meaning_list phrase_list += sub_phrase_list return phrase_list,score_list,meaning_list
def compare_predict(W1,b1,W2,b2,Wlabel,pos_neg,example,vocab,normalized): if(pos_neg == 1): label = np.array([1.0,0.0]) else: label = np.array([0.0,1.0]) data = tm.build_tree(example,label,vocab,W1,W2,b1,b2,Wlabel,normalized) predictions = data[3] top_predict = predictions[:,-1] accurate = np.dot(label,top_predict) if(accurate >= 0.5): is_correct = 1.0 else: is_correct = 0.0 return is_correct