def par_objective(seed_i, name_term, name_word, epoch, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \
    rel_list, lambdas, trainer, num, eta, dec, boolean):
    
    #initialize gradients
    grads = init_crfrnn_grads(rel_list, d, c, len_voc)

    error_sum = np.zeros(1)
    num_nodes = 0
    tree_size = 0
    
    #compute for one instance
    tree = data
    nodes = tree.get_nodes()
    
    for node in nodes:
        node.vec = L[:, node.ind].reshape( (d, 1) )
    
    prop.forward_prop([rel_dict, Wv, b, L], tree, d, c)
    tree_size += len(nodes)
    
    #after a nn forward pass, compute crf
    sent = []
    #input matrix composed of hidden vector from RNN
    h_input = np.zeros((len(tree.nodes) - 1, d))
    y_label = np.zeros((len(tree.nodes) - 1,), dtype = int)
   
    #add pos matrix
    pos_mat = np.zeros((len(tree.nodes) - 1, 15))
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
           
            #get pos vector
            pos = node.pos
            pos_vec = pos2vec(pos)
            
            for i in range(15):
                pos_mat[ind - 1, i] = pos_vec[i]
            #if current token is punctuation
            if tree.get(ind).is_word == 0:
                y_label[ind - 1] = 0
                sent.append(None)
                
                for i in range(d):
                    h_input[ind - 1][i] = 0
            #if current token is a word
            else:
                y_label[ind - 1] = node.trueLabel
                sent.append(node.word)
                
                for i in range(d):
                    h_input[ind - 1][i] = node.p[i]

    crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) 
    crf_sent_labels = [str(item) for item in y_label]     
    #when parameters are updated, hidden vectors are also updated for crf input
    trainer.modify(crf_sent_features, num)

    
    #attr_size = 3 * d + 3
    attr_size = 3 * (d + 2 + 15 + 1) + 3
    d_size = (len(tree.nodes) - 1) * attr_size
    #delta for hidden matrix
    delta_features = np.zeros(d_size)
    
    #check if we need to store the model
    if boolean == True:
        trainer.train(model=str(epoch)+str(seed_i)+'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1)
    else:
        trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1)

    grad_h = []
    start = 0
    #pass delta h to separate feature vectors
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            grad_h.append(-delta_features[start: start + attr_size])
            start += attr_size
    
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            if tree.get(ind).is_word != 0:
                node.grad_h = grad_h[ind - 1][1: d + 1].reshape(d, 1)
                #check if the sentence only contains one word
                if len(tree.nodes) > 2:
                    if ind == 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1)
                            
                    elif ind < len(sent) - 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1)
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1)
                    else:
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1)

    
    prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads)
    [lambda_W, lambda_L] = lambdas
   
    reg_cost = 0.0
    #regularization for relation matrices
    for key in rel_list:
        reg_cost += 0.5 * lambda_W * sum(rel_dict[key] ** 2)
        grads[0][key] = grads[0][key] / tree_size
        grads[0][key] += lambda_W * rel_dict[key]
    #regularization for transformation matrix and bias
    reg_cost += 0.5 * lambda_W * sum(Wv ** 2)
    grads[1] = grads[1] / tree_size
    grads[1] += lambda_W * Wv
    grads[2] = grads[2] / tree_size
    #regularization for word embedding
    reg_cost += 0.5 * lambda_L * sum(L ** 2)
    grads[3] = grads[3] / tree_size
    grads[3] += lambda_L * L

    cost = error_sum[0] + reg_cost

    return cost, grads, Wcrf
Beispiel #2
0
def par_objective(epoch, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \
    rel_list, lambdas, trainer, num, eta, dec, boolean):

    #initialize gradients
    grads = init_crfrnn_grads(rel_list, d, c, len_voc)

    error_sum = np.zeros(1)
    num_nodes = 0
    tree_size = 0

    #compute for one instance
    tree = data
    nodes = tree.get_nodes()

    for node in nodes:
        node.vec = L[:, node.ind].reshape((d, 1))

    prop.forward_prop([rel_dict, Wv, b, L], tree, d, c)
    tree_size += len(nodes)

    #after a rnn forward pass, compute crf
    sent = []
    #input matrix composed of hidden vector from RNN
    h_input = np.zeros((len(tree.nodes) - 1, d))
    y_label = np.zeros((len(tree.nodes) - 1, ), dtype=int)

    for ind, node in enumerate(tree.nodes):
        if ind != 0:

            #if current token is punctuation
            if tree.get(ind).is_word == 0:
                y_label[ind - 1] = 0
                sent.append(None)

                for i in range(d):
                    h_input[ind - 1][i] = 0
            #if current token is a word
            else:
                y_label[ind - 1] = node.trueLabel
                sent.append(node.word)

                for i in range(d):
                    h_input[ind - 1][i] = node.p[i]

    crf_sent_features = sent2features(d, sent, h_input)
    #when parameters are updated, hidden vectors are also updated for crf input
    #this is for updating CRF input features, num is the index of the instance
    trainer.modify(crf_sent_features, num)
    # crf feature dimension
    attr_size = 3 * (d + 1) + 3
    d_size = (len(tree.nodes) - 1) * attr_size
    #delta for hidden matrix from crf
    delta_features = np.zeros(d_size)

    #check if we need to store the model
    if boolean == True:
        trainer.train(model=str(epoch) + str(num) + 'crf.model',
                      weight=Wcrf,
                      delta=delta_features,
                      inst=num,
                      eta=eta,
                      decay=dec,
                      loss=error_sum,
                      check=1)
    else:
        trainer.train(model='',
                      weight=Wcrf,
                      delta=delta_features,
                      inst=num,
                      eta=eta,
                      decay=dec,
                      loss=error_sum,
                      check=1)

    grad_h = []
    start = 0
    #pass delta h to separate feature vectors to backpropagate to rnn
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            grad_h.append(-delta_features[start:start + attr_size])
            start += attr_size

    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            if tree.get(ind).is_word != 0:
                node.grad_h = grad_h[ind - 1][1:d + 1].reshape(d, 1)
                #check if the sentence only contains one word
                if len(tree.nodes) > 2:
                    if ind == 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][2 * d + 2:3 * d +
                                                       2].reshape(d, 1)

                    elif ind < len(sent) - 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][2 * d + 2:3 * d +
                                                       2].reshape(d, 1)
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2:2 * d +
                                                           2].reshape(d, 1)
                    else:
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2:2 * d +
                                                           2].reshape(d, 1)

    prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads)
    [lambda_W, lambda_L] = lambdas

    reg_cost = 0.0
    #regularization for relation matrices
    for key in rel_list:
        reg_cost += 0.5 * lambda_W * sum(rel_dict[key]**2)
        grads[0][key] = grads[0][key] / tree_size
        grads[0][key] += lambda_W * rel_dict[key]
    #regularization for transformation matrix and bias
    reg_cost += 0.5 * lambda_W * sum(Wv**2)
    grads[1] = grads[1] / tree_size
    grads[1] += lambda_W * Wv
    grads[2] = grads[2] / tree_size
    #regularization for word embedding
    reg_cost += 0.5 * lambda_L * sum(L**2)
    grads[3] = grads[3] / tree_size
    grads[3] += lambda_L * L

    cost = error_sum[0] + reg_cost

    return cost, grads, Wcrf
def par_objective(name_term, name_word, epoch, seed_i, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \
    rel_list, lambdas, trainer, num, eta, dec, boolean):

    grads = init_crfrnn_grads(rel_list, d, c, len_voc)

    error_sum = np.zeros(1)
    num_nodes = 0
    tree_size = 0
    
    # compute error and gradient for each tree
    tree = data
    nodes = tree.get_nodes()
    
    for node in nodes:
        node.vec = L[:, node.ind].reshape( (d, 1) )
    
    prop.forward_prop([rel_dict, Wv, b, L], tree, d, c)
    tree_size += len(nodes)
    
    #after a nn forward pass, compute crf
    sent = []
    h_input = np.zeros((len(tree.nodes) - 1, d))
    y_label = np.zeros((len(tree.nodes) - 1,), dtype = int)
   
    #add pos matrix
    pos_mat = np.zeros((len(tree.nodes) - 1, 15))
  
    
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
           
            #get pos vector
            pos = node.pos
            pos_vec = pos2vec(pos)
            
            for i in range(15):
                pos_mat[ind - 1, i] = pos_vec[i]
            
            if tree.get(ind).is_word == 0:
                y_label[ind - 1] = 0
                sent.append(None)
                
                for i in range(d):
                    h_input[ind - 1][i] = 0
            
            else:
                y_label[ind - 1] = node.trueLabel
                sent.append(node.word)
                
                for i in range(d):
                    h_input[ind - 1][i] = node.p[i]
 
    crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) 
    crf_sent_labels = [str(item) for item in y_label]     

    trainer.modify(crf_sent_features, num)

    attr_size = 5 * (d + 2 + 15 + 1) + 5
    d_size = (len(tree.nodes) - 1) * attr_size
    
    delta_features = np.zeros(d_size)
    
    #check if we need to store the model
    if boolean == True:
        trainer.train(model=str(epoch)+str(seed_i)+'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1)
    else:
        trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1)

    grad_h = []
    start = 0
    
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            grad_h.append(-delta_features[start: start + attr_size])
            start += attr_size
    
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            if tree.get(ind).is_word != 0:
                node.grad_h = grad_h[ind - 1][1: d + 1].reshape(d, 1)
                #check if the sentence only contains one word
                if len(tree.nodes) > 2:
                    if ind == 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][3 * d + 2 + 3 * 17: 4 * d + 2 + 3 * 17].reshape(d, 1)
                        
                        if len(tree.nodes) > 3 and tree.get(ind + 2).is_word != 0:
                            node.grad_h += grad_h[ind + 1][4 * d + 3 + 4 * 17: 5 * d + 3 + 4 * 17].reshape(d, 1)
                            
                    elif ind < len(sent) - 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][3 * d + 2 + 3 * 17: 4 * d + 2 + 3 * 17].reshape(d, 1)
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1)
                            
                        if ind > 2 and tree.get(ind - 2).is_word != 0:
                            node.grad_h += grad_h[ind - 3][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1)
                        if ind < len(sent) - 2 and tree.get(ind + 2).is_word != 0:
                            node.grad_h += grad_h[ind + 1][4 * d + 3 + 4 * 17: 5 * d + 3 + 4 * 17].reshape(d, 1)
                            
                    else:
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1)
                            
                        if len(nodes) > 3 and tree.get(ind - 2).is_word != 0:
                            node.grad_h += grad_h[ind - 3][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d,1)

    prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads)

    [lambda_W, lambda_L] = lambdas
   
    reg_cost = 0.0
    for key in rel_list:
        reg_cost += 0.5 * lambda_W * sum(rel_dict[key] ** 2)
        grads[0][key] = grads[0][key] / tree_size
        grads[0][key] += lambda_W * rel_dict[key]

    reg_cost += 0.5 * lambda_W * sum(Wv ** 2)
    grads[1] = grads[1] / tree_size
    grads[1] += lambda_W * Wv
    grads[2] = grads[2] / tree_size

    reg_cost += 0.5 * lambda_L * sum(L ** 2)
    grads[3] = grads[3] / tree_size
    grads[3] += lambda_L * L

    cost = error_sum[0] + reg_cost

    return cost, grads, Wcrf