def trainer_initialization(m_trainer, trees, params, d, c, len_voc, rel_list, name_term, name_word): param_list = unroll_params_noWcrf(params, d, c, len_voc, rel_list) (rel_dict, Wv, b, L) = param_list crf_x = [] crf_y = [] sents = [] for tree in trees: nodes = tree.get_nodes() for node in nodes: node.vec = L[:, node.ind].reshape( (d, 1) ) prop.forward_prop(param_list, tree, d, c) sent = [] h_input = np.ones((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1,), dtype = int) #add pos matrix pos_mat = np.zeros((len(tree.nodes) - 1, 15)) for ind, node in enumerate(tree.nodes): if ind != 0: #get pos vector pos = node.pos pos_vec = pos2vec(pos) for i in range(15): pos_mat[ind - 1, i] = pos_vec[i] if tree.get(ind).is_word == 0: y_label[ind - 1] = 0 sent.append(None) for i in range(d): h_input[ind - 1][i] = 0 else: y_label[ind - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[ind - 1][i] = node.p[i] y_label = np.asarray(y_label) crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) crf_sent_labels = [str(item) for item in y_label] m_trainer.append(crf_sent_features, crf_sent_labels) return m_trainer
def trainer_initialization(m_trainer, trees, params, d, c, len_voc, rel_list): param_list = unroll_params_noWcrf(params, d, c, len_voc, rel_list) (rel_dict, Wv, b, L) = param_list for tree in trees: nodes = tree.get_nodes() for node in nodes: node.vec = L[:, node.ind].reshape((d, 1)) prop.forward_prop(param_list, tree, d, c) sent = [] #input feature matrix to crf for the sentence h_input = np.ones((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1, ), dtype=int) for ind, node in enumerate(tree.nodes): if ind != 0: if tree.get(ind).is_word == 0: y_label[ind - 1] = 0 sent.append(None) for i in range(d): h_input[ind - 1][i] = 0 else: y_label[ind - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[ind - 1][i] = node.p[i] y_label = np.asarray(y_label) crf_sent_features = sent2features(d, sent, h_input) crf_sent_labels = [str(item) for item in y_label] m_trainer.append(crf_sent_features, crf_sent_labels) return m_trainer
def par_objective(epoch, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \ rel_list, lambdas, trainer, num, eta, dec, boolean): #initialize gradients grads = init_crfrnn_grads(rel_list, d, c, len_voc) error_sum = np.zeros(1) num_nodes = 0 tree_size = 0 #compute for one instance tree = data nodes = tree.get_nodes() for node in nodes: node.vec = L[:, node.ind].reshape((d, 1)) prop.forward_prop([rel_dict, Wv, b, L], tree, d, c) tree_size += len(nodes) #after a rnn forward pass, compute crf sent = [] #input matrix composed of hidden vector from RNN h_input = np.zeros((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1, ), dtype=int) for ind, node in enumerate(tree.nodes): if ind != 0: #if current token is punctuation if tree.get(ind).is_word == 0: y_label[ind - 1] = 0 sent.append(None) for i in range(d): h_input[ind - 1][i] = 0 #if current token is a word else: y_label[ind - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[ind - 1][i] = node.p[i] crf_sent_features = sent2features(d, sent, h_input) #when parameters are updated, hidden vectors are also updated for crf input #this is for updating CRF input features, num is the index of the instance trainer.modify(crf_sent_features, num) # crf feature dimension attr_size = 3 * (d + 1) + 3 d_size = (len(tree.nodes) - 1) * attr_size #delta for hidden matrix from crf delta_features = np.zeros(d_size) #check if we need to store the model if boolean == True: trainer.train(model=str(epoch) + str(num) + 'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) else: trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) grad_h = [] start = 0 #pass delta h to separate feature vectors to backpropagate to rnn for ind, node in enumerate(tree.nodes): if ind != 0: grad_h.append(-delta_features[start:start + attr_size]) start += attr_size for ind, node in enumerate(tree.nodes): if ind != 0: if tree.get(ind).is_word != 0: node.grad_h = grad_h[ind - 1][1:d + 1].reshape(d, 1) #check if the sentence only contains one word if len(tree.nodes) > 2: if ind == 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][2 * d + 2:3 * d + 2].reshape(d, 1) elif ind < len(sent) - 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][2 * d + 2:3 * d + 2].reshape(d, 1) if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2:2 * d + 2].reshape(d, 1) else: if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2:2 * d + 2].reshape(d, 1) prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads) [lambda_W, lambda_L] = lambdas reg_cost = 0.0 #regularization for relation matrices for key in rel_list: reg_cost += 0.5 * lambda_W * sum(rel_dict[key]**2) grads[0][key] = grads[0][key] / tree_size grads[0][key] += lambda_W * rel_dict[key] #regularization for transformation matrix and bias reg_cost += 0.5 * lambda_W * sum(Wv**2) grads[1] = grads[1] / tree_size grads[1] += lambda_W * Wv grads[2] = grads[2] / tree_size #regularization for word embedding reg_cost += 0.5 * lambda_L * sum(L**2) grads[3] = grads[3] / tree_size grads[3] += lambda_L * L cost = error_sum[0] + reg_cost return cost, grads, Wcrf
def evaluate(epoch, inst_ind, data, rel_dict, Wv, b, We, vocab, rel_list, d, c, mixed=False): #output labels tagger = pycrfsuite.Tagger() tagger.open(str(epoch) + str(inst_ind) + 'crf.model') #word2vec dic_file = open('util/data_semEval/w2v_sample.txt', 'r') dic = dic_file.readlines() dictionary = {} for line in dic: word_vector = line.split(",") word = ','.join(word_vector[:len(word_vector) - d - 1]) vector_list = [] for element in word_vector[len(word_vector) - d - 1:len(word_vector) - 1]: vector_list.append(float(element)) vector = np.asarray(vector_list) dictionary[word] = vector test_trees = data bad_trees = [] for ind, tree in enumerate(test_trees): if tree.get(0).is_word == 0: # print tree.get_words() bad_trees.append(ind) continue # print 'removed', len(bad_trees) for ind in bad_trees[::-1]: test_trees = np.delete(test_trees, ind) true = [] predict = [] count = 0 for ind, tree in enumerate(test_trees): nodes = tree.get_nodes() sent = [] h_input = np.zeros((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1, ), dtype=int) for index, node in enumerate(nodes): if node.word.lower() in vocab: node.vec = We[:, node.ind].reshape((d, 1)) elif node.word.lower() in dictionary.keys(): if mixed: node.vec = (dictionary[node.word.lower()].append( 2 * np.random.rand(50) - 1)).reshape((d, 1)) else: node.vec = dictionary[node.word.lower()].reshape(d, 1) else: node.vec = np.random.rand(d, 1) count += 1 prop.forward_prop([rel_dict, Wv, b, We], tree, d, c, labels=False) for index, node in enumerate(tree.nodes): if index != 0: if tree.get(index).is_word == 0: y_label[index - 1] = 0 sent.append(None) for i in range(d): h_input[index - 1][i] = 0 else: y_label[index - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[index - 1][i] = node.p[i] crf_sent_features = sent2features(d, sent, h_input) for item in y_label: true.append(str(item)) #predict prediction = tagger.tag(crf_sent_features) for label in prediction: predict.append(label)
def par_objective(seed_i, name_term, name_word, epoch, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \ rel_list, lambdas, trainer, num, eta, dec, boolean): #initialize gradients grads = init_crfrnn_grads(rel_list, d, c, len_voc) error_sum = np.zeros(1) num_nodes = 0 tree_size = 0 #compute for one instance tree = data nodes = tree.get_nodes() for node in nodes: node.vec = L[:, node.ind].reshape( (d, 1) ) prop.forward_prop([rel_dict, Wv, b, L], tree, d, c) tree_size += len(nodes) #after a nn forward pass, compute crf sent = [] #input matrix composed of hidden vector from RNN h_input = np.zeros((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1,), dtype = int) #add pos matrix pos_mat = np.zeros((len(tree.nodes) - 1, 15)) for ind, node in enumerate(tree.nodes): if ind != 0: #get pos vector pos = node.pos pos_vec = pos2vec(pos) for i in range(15): pos_mat[ind - 1, i] = pos_vec[i] #if current token is punctuation if tree.get(ind).is_word == 0: y_label[ind - 1] = 0 sent.append(None) for i in range(d): h_input[ind - 1][i] = 0 #if current token is a word else: y_label[ind - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[ind - 1][i] = node.p[i] crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) crf_sent_labels = [str(item) for item in y_label] #when parameters are updated, hidden vectors are also updated for crf input trainer.modify(crf_sent_features, num) #attr_size = 3 * d + 3 attr_size = 3 * (d + 2 + 15 + 1) + 3 d_size = (len(tree.nodes) - 1) * attr_size #delta for hidden matrix delta_features = np.zeros(d_size) #check if we need to store the model if boolean == True: trainer.train(model=str(epoch)+str(seed_i)+'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) else: trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) grad_h = [] start = 0 #pass delta h to separate feature vectors for ind, node in enumerate(tree.nodes): if ind != 0: grad_h.append(-delta_features[start: start + attr_size]) start += attr_size for ind, node in enumerate(tree.nodes): if ind != 0: if tree.get(ind).is_word != 0: node.grad_h = grad_h[ind - 1][1: d + 1].reshape(d, 1) #check if the sentence only contains one word if len(tree.nodes) > 2: if ind == 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1) elif ind < len(sent) - 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1) if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1) else: if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1) prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads) [lambda_W, lambda_L] = lambdas reg_cost = 0.0 #regularization for relation matrices for key in rel_list: reg_cost += 0.5 * lambda_W * sum(rel_dict[key] ** 2) grads[0][key] = grads[0][key] / tree_size grads[0][key] += lambda_W * rel_dict[key] #regularization for transformation matrix and bias reg_cost += 0.5 * lambda_W * sum(Wv ** 2) grads[1] = grads[1] / tree_size grads[1] += lambda_W * Wv grads[2] = grads[2] / tree_size #regularization for word embedding reg_cost += 0.5 * lambda_L * sum(L ** 2) grads[3] = grads[3] / tree_size grads[3] += lambda_L * L cost = error_sum[0] + reg_cost return cost, grads, Wcrf
def par_objective(name_term, name_word, epoch, seed_i, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \ rel_list, lambdas, trainer, num, eta, dec, boolean): grads = init_crfrnn_grads(rel_list, d, c, len_voc) error_sum = np.zeros(1) num_nodes = 0 tree_size = 0 # compute error and gradient for each tree tree = data nodes = tree.get_nodes() for node in nodes: node.vec = L[:, node.ind].reshape( (d, 1) ) prop.forward_prop([rel_dict, Wv, b, L], tree, d, c) tree_size += len(nodes) #after a nn forward pass, compute crf sent = [] h_input = np.zeros((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1,), dtype = int) #add pos matrix pos_mat = np.zeros((len(tree.nodes) - 1, 15)) for ind, node in enumerate(tree.nodes): if ind != 0: #get pos vector pos = node.pos pos_vec = pos2vec(pos) for i in range(15): pos_mat[ind - 1, i] = pos_vec[i] if tree.get(ind).is_word == 0: y_label[ind - 1] = 0 sent.append(None) for i in range(d): h_input[ind - 1][i] = 0 else: y_label[ind - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[ind - 1][i] = node.p[i] crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) crf_sent_labels = [str(item) for item in y_label] trainer.modify(crf_sent_features, num) attr_size = 5 * (d + 2 + 15 + 1) + 5 d_size = (len(tree.nodes) - 1) * attr_size delta_features = np.zeros(d_size) #check if we need to store the model if boolean == True: trainer.train(model=str(epoch)+str(seed_i)+'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) else: trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) grad_h = [] start = 0 for ind, node in enumerate(tree.nodes): if ind != 0: grad_h.append(-delta_features[start: start + attr_size]) start += attr_size for ind, node in enumerate(tree.nodes): if ind != 0: if tree.get(ind).is_word != 0: node.grad_h = grad_h[ind - 1][1: d + 1].reshape(d, 1) #check if the sentence only contains one word if len(tree.nodes) > 2: if ind == 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][3 * d + 2 + 3 * 17: 4 * d + 2 + 3 * 17].reshape(d, 1) if len(tree.nodes) > 3 and tree.get(ind + 2).is_word != 0: node.grad_h += grad_h[ind + 1][4 * d + 3 + 4 * 17: 5 * d + 3 + 4 * 17].reshape(d, 1) elif ind < len(sent) - 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][3 * d + 2 + 3 * 17: 4 * d + 2 + 3 * 17].reshape(d, 1) if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1) if ind > 2 and tree.get(ind - 2).is_word != 0: node.grad_h += grad_h[ind - 3][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1) if ind < len(sent) - 2 and tree.get(ind + 2).is_word != 0: node.grad_h += grad_h[ind + 1][4 * d + 3 + 4 * 17: 5 * d + 3 + 4 * 17].reshape(d, 1) else: if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1) if len(nodes) > 3 and tree.get(ind - 2).is_word != 0: node.grad_h += grad_h[ind - 3][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d,1) prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads) [lambda_W, lambda_L] = lambdas reg_cost = 0.0 for key in rel_list: reg_cost += 0.5 * lambda_W * sum(rel_dict[key] ** 2) grads[0][key] = grads[0][key] / tree_size grads[0][key] += lambda_W * rel_dict[key] reg_cost += 0.5 * lambda_W * sum(Wv ** 2) grads[1] = grads[1] / tree_size grads[1] += lambda_W * Wv grads[2] = grads[2] / tree_size reg_cost += 0.5 * lambda_L * sum(L ** 2) grads[3] = grads[3] / tree_size grads[3] += lambda_L * L cost = error_sum[0] + reg_cost return cost, grads, Wcrf