def par_objective(seed_i, name_term, name_word, epoch, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \ rel_list, lambdas, trainer, num, eta, dec, boolean): #initialize gradients grads = init_crfrnn_grads(rel_list, d, c, len_voc) error_sum = np.zeros(1) num_nodes = 0 tree_size = 0 #compute for one instance tree = data nodes = tree.get_nodes() for node in nodes: node.vec = L[:, node.ind].reshape( (d, 1) ) prop.forward_prop([rel_dict, Wv, b, L], tree, d, c) tree_size += len(nodes) #after a nn forward pass, compute crf sent = [] #input matrix composed of hidden vector from RNN h_input = np.zeros((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1,), dtype = int) #add pos matrix pos_mat = np.zeros((len(tree.nodes) - 1, 15)) for ind, node in enumerate(tree.nodes): if ind != 0: #get pos vector pos = node.pos pos_vec = pos2vec(pos) for i in range(15): pos_mat[ind - 1, i] = pos_vec[i] #if current token is punctuation if tree.get(ind).is_word == 0: y_label[ind - 1] = 0 sent.append(None) for i in range(d): h_input[ind - 1][i] = 0 #if current token is a word else: y_label[ind - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[ind - 1][i] = node.p[i] crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) crf_sent_labels = [str(item) for item in y_label] #when parameters are updated, hidden vectors are also updated for crf input trainer.modify(crf_sent_features, num) #attr_size = 3 * d + 3 attr_size = 3 * (d + 2 + 15 + 1) + 3 d_size = (len(tree.nodes) - 1) * attr_size #delta for hidden matrix delta_features = np.zeros(d_size) #check if we need to store the model if boolean == True: trainer.train(model=str(epoch)+str(seed_i)+'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) else: trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) grad_h = [] start = 0 #pass delta h to separate feature vectors for ind, node in enumerate(tree.nodes): if ind != 0: grad_h.append(-delta_features[start: start + attr_size]) start += attr_size for ind, node in enumerate(tree.nodes): if ind != 0: if tree.get(ind).is_word != 0: node.grad_h = grad_h[ind - 1][1: d + 1].reshape(d, 1) #check if the sentence only contains one word if len(tree.nodes) > 2: if ind == 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1) elif ind < len(sent) - 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1) if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1) else: if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1) prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads) [lambda_W, lambda_L] = lambdas reg_cost = 0.0 #regularization for relation matrices for key in rel_list: reg_cost += 0.5 * lambda_W * sum(rel_dict[key] ** 2) grads[0][key] = grads[0][key] / tree_size grads[0][key] += lambda_W * rel_dict[key] #regularization for transformation matrix and bias reg_cost += 0.5 * lambda_W * sum(Wv ** 2) grads[1] = grads[1] / tree_size grads[1] += lambda_W * Wv grads[2] = grads[2] / tree_size #regularization for word embedding reg_cost += 0.5 * lambda_L * sum(L ** 2) grads[3] = grads[3] / tree_size grads[3] += lambda_L * L cost = error_sum[0] + reg_cost return cost, grads, Wcrf
def par_objective(epoch, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \ rel_list, lambdas, trainer, num, eta, dec, boolean): #initialize gradients grads = init_crfrnn_grads(rel_list, d, c, len_voc) error_sum = np.zeros(1) num_nodes = 0 tree_size = 0 #compute for one instance tree = data nodes = tree.get_nodes() for node in nodes: node.vec = L[:, node.ind].reshape((d, 1)) prop.forward_prop([rel_dict, Wv, b, L], tree, d, c) tree_size += len(nodes) #after a rnn forward pass, compute crf sent = [] #input matrix composed of hidden vector from RNN h_input = np.zeros((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1, ), dtype=int) for ind, node in enumerate(tree.nodes): if ind != 0: #if current token is punctuation if tree.get(ind).is_word == 0: y_label[ind - 1] = 0 sent.append(None) for i in range(d): h_input[ind - 1][i] = 0 #if current token is a word else: y_label[ind - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[ind - 1][i] = node.p[i] crf_sent_features = sent2features(d, sent, h_input) #when parameters are updated, hidden vectors are also updated for crf input #this is for updating CRF input features, num is the index of the instance trainer.modify(crf_sent_features, num) # crf feature dimension attr_size = 3 * (d + 1) + 3 d_size = (len(tree.nodes) - 1) * attr_size #delta for hidden matrix from crf delta_features = np.zeros(d_size) #check if we need to store the model if boolean == True: trainer.train(model=str(epoch) + str(num) + 'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) else: trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) grad_h = [] start = 0 #pass delta h to separate feature vectors to backpropagate to rnn for ind, node in enumerate(tree.nodes): if ind != 0: grad_h.append(-delta_features[start:start + attr_size]) start += attr_size for ind, node in enumerate(tree.nodes): if ind != 0: if tree.get(ind).is_word != 0: node.grad_h = grad_h[ind - 1][1:d + 1].reshape(d, 1) #check if the sentence only contains one word if len(tree.nodes) > 2: if ind == 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][2 * d + 2:3 * d + 2].reshape(d, 1) elif ind < len(sent) - 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][2 * d + 2:3 * d + 2].reshape(d, 1) if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2:2 * d + 2].reshape(d, 1) else: if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2:2 * d + 2].reshape(d, 1) prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads) [lambda_W, lambda_L] = lambdas reg_cost = 0.0 #regularization for relation matrices for key in rel_list: reg_cost += 0.5 * lambda_W * sum(rel_dict[key]**2) grads[0][key] = grads[0][key] / tree_size grads[0][key] += lambda_W * rel_dict[key] #regularization for transformation matrix and bias reg_cost += 0.5 * lambda_W * sum(Wv**2) grads[1] = grads[1] / tree_size grads[1] += lambda_W * Wv grads[2] = grads[2] / tree_size #regularization for word embedding reg_cost += 0.5 * lambda_L * sum(L**2) grads[3] = grads[3] / tree_size grads[3] += lambda_L * L cost = error_sum[0] + reg_cost return cost, grads, Wcrf
def par_objective(name_term, name_word, epoch, seed_i, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \ rel_list, lambdas, trainer, num, eta, dec, boolean): grads = init_crfrnn_grads(rel_list, d, c, len_voc) error_sum = np.zeros(1) num_nodes = 0 tree_size = 0 # compute error and gradient for each tree tree = data nodes = tree.get_nodes() for node in nodes: node.vec = L[:, node.ind].reshape( (d, 1) ) prop.forward_prop([rel_dict, Wv, b, L], tree, d, c) tree_size += len(nodes) #after a nn forward pass, compute crf sent = [] h_input = np.zeros((len(tree.nodes) - 1, d)) y_label = np.zeros((len(tree.nodes) - 1,), dtype = int) #add pos matrix pos_mat = np.zeros((len(tree.nodes) - 1, 15)) for ind, node in enumerate(tree.nodes): if ind != 0: #get pos vector pos = node.pos pos_vec = pos2vec(pos) for i in range(15): pos_mat[ind - 1, i] = pos_vec[i] if tree.get(ind).is_word == 0: y_label[ind - 1] = 0 sent.append(None) for i in range(d): h_input[ind - 1][i] = 0 else: y_label[ind - 1] = node.trueLabel sent.append(node.word) for i in range(d): h_input[ind - 1][i] = node.p[i] crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) crf_sent_labels = [str(item) for item in y_label] trainer.modify(crf_sent_features, num) attr_size = 5 * (d + 2 + 15 + 1) + 5 d_size = (len(tree.nodes) - 1) * attr_size delta_features = np.zeros(d_size) #check if we need to store the model if boolean == True: trainer.train(model=str(epoch)+str(seed_i)+'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) else: trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1) grad_h = [] start = 0 for ind, node in enumerate(tree.nodes): if ind != 0: grad_h.append(-delta_features[start: start + attr_size]) start += attr_size for ind, node in enumerate(tree.nodes): if ind != 0: if tree.get(ind).is_word != 0: node.grad_h = grad_h[ind - 1][1: d + 1].reshape(d, 1) #check if the sentence only contains one word if len(tree.nodes) > 2: if ind == 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][3 * d + 2 + 3 * 17: 4 * d + 2 + 3 * 17].reshape(d, 1) if len(tree.nodes) > 3 and tree.get(ind + 2).is_word != 0: node.grad_h += grad_h[ind + 1][4 * d + 3 + 4 * 17: 5 * d + 3 + 4 * 17].reshape(d, 1) elif ind < len(sent) - 1: if tree.get(ind + 1).is_word != 0: node.grad_h += grad_h[ind][3 * d + 2 + 3 * 17: 4 * d + 2 + 3 * 17].reshape(d, 1) if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1) if ind > 2 and tree.get(ind - 2).is_word != 0: node.grad_h += grad_h[ind - 3][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1) if ind < len(sent) - 2 and tree.get(ind + 2).is_word != 0: node.grad_h += grad_h[ind + 1][4 * d + 3 + 4 * 17: 5 * d + 3 + 4 * 17].reshape(d, 1) else: if tree.get(ind - 1).is_word != 0: node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1) if len(nodes) > 3 and tree.get(ind - 2).is_word != 0: node.grad_h += grad_h[ind - 3][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d,1) prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads) [lambda_W, lambda_L] = lambdas reg_cost = 0.0 for key in rel_list: reg_cost += 0.5 * lambda_W * sum(rel_dict[key] ** 2) grads[0][key] = grads[0][key] / tree_size grads[0][key] += lambda_W * rel_dict[key] reg_cost += 0.5 * lambda_W * sum(Wv ** 2) grads[1] = grads[1] / tree_size grads[1] += lambda_W * Wv grads[2] = grads[2] / tree_size reg_cost += 0.5 * lambda_L * sum(L ** 2) grads[3] = grads[3] / tree_size grads[3] += lambda_L * L cost = error_sum[0] + reg_cost return cost, grads, Wcrf