def trainer_initialization(m_trainer, trees, params, d, c, len_voc, rel_list, name_term, name_word):
    param_list = unroll_params_noWcrf(params, d, c, len_voc, rel_list)
    (rel_dict, Wv, b, L) = param_list
    
    crf_x = []
    crf_y = []
    sents = []
    
    for tree in trees:
        nodes = tree.get_nodes()
    
        for node in nodes:
            node.vec = L[:, node.ind].reshape( (d, 1) )
        
        prop.forward_prop(param_list, tree, d, c)
        
        sent = []
        h_input = np.ones((len(tree.nodes) - 1, d))
        y_label = np.zeros((len(tree.nodes) - 1,), dtype = int)
        
        #add pos matrix
        pos_mat = np.zeros((len(tree.nodes) - 1, 15))
        
        for ind, node in enumerate(tree.nodes):
            if ind != 0:
                
                #get pos vector
                pos = node.pos
                pos_vec = pos2vec(pos)
                
                for i in range(15):
                    pos_mat[ind - 1, i] = pos_vec[i]
                
                if tree.get(ind).is_word == 0:
                    y_label[ind - 1] = 0
                    sent.append(None)
                    
                    for i in range(d):
                        h_input[ind - 1][i] = 0
                else:
                    y_label[ind - 1] = node.trueLabel
                    sent.append(node.word)
                    
                    for i in range(d):
                        h_input[ind - 1][i] = node.p[i]

                
        y_label = np.asarray(y_label)

        crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word)
        crf_sent_labels = [str(item) for item in y_label] 
        m_trainer.append(crf_sent_features, crf_sent_labels)
        
    return m_trainer
예제 #2
0
def trainer_initialization(m_trainer, trees, params, d, c, len_voc, rel_list):
    param_list = unroll_params_noWcrf(params, d, c, len_voc, rel_list)
    (rel_dict, Wv, b, L) = param_list

    for tree in trees:
        nodes = tree.get_nodes()

        for node in nodes:
            node.vec = L[:, node.ind].reshape((d, 1))

        prop.forward_prop(param_list, tree, d, c)

        sent = []
        #input feature matrix to crf for the sentence
        h_input = np.ones((len(tree.nodes) - 1, d))
        y_label = np.zeros((len(tree.nodes) - 1, ), dtype=int)

        for ind, node in enumerate(tree.nodes):
            if ind != 0:

                if tree.get(ind).is_word == 0:
                    y_label[ind - 1] = 0
                    sent.append(None)

                    for i in range(d):
                        h_input[ind - 1][i] = 0
                else:
                    y_label[ind - 1] = node.trueLabel
                    sent.append(node.word)

                    for i in range(d):
                        h_input[ind - 1][i] = node.p[i]

        y_label = np.asarray(y_label)

        crf_sent_features = sent2features(d, sent, h_input)
        crf_sent_labels = [str(item) for item in y_label]
        m_trainer.append(crf_sent_features, crf_sent_labels)

    return m_trainer
예제 #3
0
def par_objective(epoch, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \
    rel_list, lambdas, trainer, num, eta, dec, boolean):

    #initialize gradients
    grads = init_crfrnn_grads(rel_list, d, c, len_voc)

    error_sum = np.zeros(1)
    num_nodes = 0
    tree_size = 0

    #compute for one instance
    tree = data
    nodes = tree.get_nodes()

    for node in nodes:
        node.vec = L[:, node.ind].reshape((d, 1))

    prop.forward_prop([rel_dict, Wv, b, L], tree, d, c)
    tree_size += len(nodes)

    #after a rnn forward pass, compute crf
    sent = []
    #input matrix composed of hidden vector from RNN
    h_input = np.zeros((len(tree.nodes) - 1, d))
    y_label = np.zeros((len(tree.nodes) - 1, ), dtype=int)

    for ind, node in enumerate(tree.nodes):
        if ind != 0:

            #if current token is punctuation
            if tree.get(ind).is_word == 0:
                y_label[ind - 1] = 0
                sent.append(None)

                for i in range(d):
                    h_input[ind - 1][i] = 0
            #if current token is a word
            else:
                y_label[ind - 1] = node.trueLabel
                sent.append(node.word)

                for i in range(d):
                    h_input[ind - 1][i] = node.p[i]

    crf_sent_features = sent2features(d, sent, h_input)
    #when parameters are updated, hidden vectors are also updated for crf input
    #this is for updating CRF input features, num is the index of the instance
    trainer.modify(crf_sent_features, num)
    # crf feature dimension
    attr_size = 3 * (d + 1) + 3
    d_size = (len(tree.nodes) - 1) * attr_size
    #delta for hidden matrix from crf
    delta_features = np.zeros(d_size)

    #check if we need to store the model
    if boolean == True:
        trainer.train(model=str(epoch) + str(num) + 'crf.model',
                      weight=Wcrf,
                      delta=delta_features,
                      inst=num,
                      eta=eta,
                      decay=dec,
                      loss=error_sum,
                      check=1)
    else:
        trainer.train(model='',
                      weight=Wcrf,
                      delta=delta_features,
                      inst=num,
                      eta=eta,
                      decay=dec,
                      loss=error_sum,
                      check=1)

    grad_h = []
    start = 0
    #pass delta h to separate feature vectors to backpropagate to rnn
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            grad_h.append(-delta_features[start:start + attr_size])
            start += attr_size

    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            if tree.get(ind).is_word != 0:
                node.grad_h = grad_h[ind - 1][1:d + 1].reshape(d, 1)
                #check if the sentence only contains one word
                if len(tree.nodes) > 2:
                    if ind == 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][2 * d + 2:3 * d +
                                                       2].reshape(d, 1)

                    elif ind < len(sent) - 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][2 * d + 2:3 * d +
                                                       2].reshape(d, 1)
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2:2 * d +
                                                           2].reshape(d, 1)
                    else:
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2:2 * d +
                                                           2].reshape(d, 1)

    prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads)
    [lambda_W, lambda_L] = lambdas

    reg_cost = 0.0
    #regularization for relation matrices
    for key in rel_list:
        reg_cost += 0.5 * lambda_W * sum(rel_dict[key]**2)
        grads[0][key] = grads[0][key] / tree_size
        grads[0][key] += lambda_W * rel_dict[key]
    #regularization for transformation matrix and bias
    reg_cost += 0.5 * lambda_W * sum(Wv**2)
    grads[1] = grads[1] / tree_size
    grads[1] += lambda_W * Wv
    grads[2] = grads[2] / tree_size
    #regularization for word embedding
    reg_cost += 0.5 * lambda_L * sum(L**2)
    grads[3] = grads[3] / tree_size
    grads[3] += lambda_L * L

    cost = error_sum[0] + reg_cost

    return cost, grads, Wcrf
예제 #4
0
def evaluate(epoch,
             inst_ind,
             data,
             rel_dict,
             Wv,
             b,
             We,
             vocab,
             rel_list,
             d,
             c,
             mixed=False):
    #output labels
    tagger = pycrfsuite.Tagger()
    tagger.open(str(epoch) + str(inst_ind) + 'crf.model')

    #word2vec
    dic_file = open('util/data_semEval/w2v_sample.txt', 'r')
    dic = dic_file.readlines()

    dictionary = {}

    for line in dic:
        word_vector = line.split(",")
        word = ','.join(word_vector[:len(word_vector) - d - 1])

        vector_list = []
        for element in word_vector[len(word_vector) - d - 1:len(word_vector) -
                                   1]:
            vector_list.append(float(element))

        vector = np.asarray(vector_list)
        dictionary[word] = vector

    test_trees = data

    bad_trees = []
    for ind, tree in enumerate(test_trees):
        if tree.get(0).is_word == 0:
            # print tree.get_words()
            bad_trees.append(ind)
            continue

    # print 'removed', len(bad_trees)
    for ind in bad_trees[::-1]:
        test_trees = np.delete(test_trees, ind)

    true = []
    predict = []

    count = 0

    for ind, tree in enumerate(test_trees):
        nodes = tree.get_nodes()
        sent = []
        h_input = np.zeros((len(tree.nodes) - 1, d))
        y_label = np.zeros((len(tree.nodes) - 1, ), dtype=int)

        for index, node in enumerate(nodes):
            if node.word.lower() in vocab:
                node.vec = We[:, node.ind].reshape((d, 1))
            elif node.word.lower() in dictionary.keys():
                if mixed:
                    node.vec = (dictionary[node.word.lower()].append(
                        2 * np.random.rand(50) - 1)).reshape((d, 1))
                else:
                    node.vec = dictionary[node.word.lower()].reshape(d, 1)
            else:
                node.vec = np.random.rand(d, 1)
                count += 1

        prop.forward_prop([rel_dict, Wv, b, We], tree, d, c, labels=False)

        for index, node in enumerate(tree.nodes):

            if index != 0:

                if tree.get(index).is_word == 0:
                    y_label[index - 1] = 0
                    sent.append(None)

                    for i in range(d):
                        h_input[index - 1][i] = 0
                else:
                    y_label[index - 1] = node.trueLabel
                    sent.append(node.word)

                    for i in range(d):
                        h_input[index - 1][i] = node.p[i]

        crf_sent_features = sent2features(d, sent, h_input)
        for item in y_label:
            true.append(str(item))

        #predict
        prediction = tagger.tag(crf_sent_features)
        for label in prediction:
            predict.append(label)
def par_objective(seed_i, name_term, name_word, epoch, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \
    rel_list, lambdas, trainer, num, eta, dec, boolean):
    
    #initialize gradients
    grads = init_crfrnn_grads(rel_list, d, c, len_voc)

    error_sum = np.zeros(1)
    num_nodes = 0
    tree_size = 0
    
    #compute for one instance
    tree = data
    nodes = tree.get_nodes()
    
    for node in nodes:
        node.vec = L[:, node.ind].reshape( (d, 1) )
    
    prop.forward_prop([rel_dict, Wv, b, L], tree, d, c)
    tree_size += len(nodes)
    
    #after a nn forward pass, compute crf
    sent = []
    #input matrix composed of hidden vector from RNN
    h_input = np.zeros((len(tree.nodes) - 1, d))
    y_label = np.zeros((len(tree.nodes) - 1,), dtype = int)
   
    #add pos matrix
    pos_mat = np.zeros((len(tree.nodes) - 1, 15))
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
           
            #get pos vector
            pos = node.pos
            pos_vec = pos2vec(pos)
            
            for i in range(15):
                pos_mat[ind - 1, i] = pos_vec[i]
            #if current token is punctuation
            if tree.get(ind).is_word == 0:
                y_label[ind - 1] = 0
                sent.append(None)
                
                for i in range(d):
                    h_input[ind - 1][i] = 0
            #if current token is a word
            else:
                y_label[ind - 1] = node.trueLabel
                sent.append(node.word)
                
                for i in range(d):
                    h_input[ind - 1][i] = node.p[i]

    crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) 
    crf_sent_labels = [str(item) for item in y_label]     
    #when parameters are updated, hidden vectors are also updated for crf input
    trainer.modify(crf_sent_features, num)

    
    #attr_size = 3 * d + 3
    attr_size = 3 * (d + 2 + 15 + 1) + 3
    d_size = (len(tree.nodes) - 1) * attr_size
    #delta for hidden matrix
    delta_features = np.zeros(d_size)
    
    #check if we need to store the model
    if boolean == True:
        trainer.train(model=str(epoch)+str(seed_i)+'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1)
    else:
        trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1)

    grad_h = []
    start = 0
    #pass delta h to separate feature vectors
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            grad_h.append(-delta_features[start: start + attr_size])
            start += attr_size
    
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            if tree.get(ind).is_word != 0:
                node.grad_h = grad_h[ind - 1][1: d + 1].reshape(d, 1)
                #check if the sentence only contains one word
                if len(tree.nodes) > 2:
                    if ind == 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1)
                            
                    elif ind < len(sent) - 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1)
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1)
                    else:
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1)

    
    prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads)
    [lambda_W, lambda_L] = lambdas
   
    reg_cost = 0.0
    #regularization for relation matrices
    for key in rel_list:
        reg_cost += 0.5 * lambda_W * sum(rel_dict[key] ** 2)
        grads[0][key] = grads[0][key] / tree_size
        grads[0][key] += lambda_W * rel_dict[key]
    #regularization for transformation matrix and bias
    reg_cost += 0.5 * lambda_W * sum(Wv ** 2)
    grads[1] = grads[1] / tree_size
    grads[1] += lambda_W * Wv
    grads[2] = grads[2] / tree_size
    #regularization for word embedding
    reg_cost += 0.5 * lambda_L * sum(L ** 2)
    grads[3] = grads[3] / tree_size
    grads[3] += lambda_L * L

    cost = error_sum[0] + reg_cost

    return cost, grads, Wcrf
def par_objective(name_term, name_word, epoch, seed_i, data, rel_dict, Wv, b, L, Wcrf, d, c, len_voc, \
    rel_list, lambdas, trainer, num, eta, dec, boolean):

    grads = init_crfrnn_grads(rel_list, d, c, len_voc)

    error_sum = np.zeros(1)
    num_nodes = 0
    tree_size = 0
    
    # compute error and gradient for each tree
    tree = data
    nodes = tree.get_nodes()
    
    for node in nodes:
        node.vec = L[:, node.ind].reshape( (d, 1) )
    
    prop.forward_prop([rel_dict, Wv, b, L], tree, d, c)
    tree_size += len(nodes)
    
    #after a nn forward pass, compute crf
    sent = []
    h_input = np.zeros((len(tree.nodes) - 1, d))
    y_label = np.zeros((len(tree.nodes) - 1,), dtype = int)
   
    #add pos matrix
    pos_mat = np.zeros((len(tree.nodes) - 1, 15))
  
    
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
           
            #get pos vector
            pos = node.pos
            pos_vec = pos2vec(pos)
            
            for i in range(15):
                pos_mat[ind - 1, i] = pos_vec[i]
            
            if tree.get(ind).is_word == 0:
                y_label[ind - 1] = 0
                sent.append(None)
                
                for i in range(d):
                    h_input[ind - 1][i] = 0
            
            else:
                y_label[ind - 1] = node.trueLabel
                sent.append(node.word)
                
                for i in range(d):
                    h_input[ind - 1][i] = node.p[i]
 
    crf_sent_features = sent2features(d, sent, h_input, pos_mat, name_term, name_word) 
    crf_sent_labels = [str(item) for item in y_label]     

    trainer.modify(crf_sent_features, num)

    attr_size = 5 * (d + 2 + 15 + 1) + 5
    d_size = (len(tree.nodes) - 1) * attr_size
    
    delta_features = np.zeros(d_size)
    
    #check if we need to store the model
    if boolean == True:
        trainer.train(model=str(epoch)+str(seed_i)+'crf.model', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1)
    else:
        trainer.train(model='', weight=Wcrf, delta=delta_features, inst=num, eta=eta, decay=dec, loss=error_sum, check=1)

    grad_h = []
    start = 0
    
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            grad_h.append(-delta_features[start: start + attr_size])
            start += attr_size
    
    for ind, node in enumerate(tree.nodes):
        if ind != 0:
            if tree.get(ind).is_word != 0:
                node.grad_h = grad_h[ind - 1][1: d + 1].reshape(d, 1)
                #check if the sentence only contains one word
                if len(tree.nodes) > 2:
                    if ind == 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][3 * d + 2 + 3 * 17: 4 * d + 2 + 3 * 17].reshape(d, 1)
                        
                        if len(tree.nodes) > 3 and tree.get(ind + 2).is_word != 0:
                            node.grad_h += grad_h[ind + 1][4 * d + 3 + 4 * 17: 5 * d + 3 + 4 * 17].reshape(d, 1)
                            
                    elif ind < len(sent) - 1:
                        if tree.get(ind + 1).is_word != 0:
                            node.grad_h += grad_h[ind][3 * d + 2 + 3 * 17: 4 * d + 2 + 3 * 17].reshape(d, 1)
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1)
                            
                        if ind > 2 and tree.get(ind - 2).is_word != 0:
                            node.grad_h += grad_h[ind - 3][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d, 1)
                        if ind < len(sent) - 2 and tree.get(ind + 2).is_word != 0:
                            node.grad_h += grad_h[ind + 1][4 * d + 3 + 4 * 17: 5 * d + 3 + 4 * 17].reshape(d, 1)
                            
                    else:
                        if tree.get(ind - 1).is_word != 0:
                            node.grad_h += grad_h[ind - 2][d + 2 + 17: 2 * d + 2 + 17].reshape(d, 1)
                            
                        if len(nodes) > 3 and tree.get(ind - 2).is_word != 0:
                            node.grad_h += grad_h[ind - 3][2 * d + 2 + 2 * 17: 3 * d + 2 + 2 * 17].reshape(d,1)

    prop.backprop([rel_dict, Wv, b], tree, d, c, len_voc, grads)

    [lambda_W, lambda_L] = lambdas
   
    reg_cost = 0.0
    for key in rel_list:
        reg_cost += 0.5 * lambda_W * sum(rel_dict[key] ** 2)
        grads[0][key] = grads[0][key] / tree_size
        grads[0][key] += lambda_W * rel_dict[key]

    reg_cost += 0.5 * lambda_W * sum(Wv ** 2)
    grads[1] = grads[1] / tree_size
    grads[1] += lambda_W * Wv
    grads[2] = grads[2] / tree_size

    reg_cost += 0.5 * lambda_L * sum(L ** 2)
    grads[3] = grads[3] / tree_size
    grads[3] += lambda_L * L

    cost = error_sum[0] + reg_cost

    return cost, grads, Wcrf