예제 #1
0
def constructTree(tree):
    ## tree: {index1:{'parent':, 'maxL':, 'vec':}
    ## 1. ini tree node
    index2node = {}
    for i in tree:
        node = TD_RvNN.Node_tweet(idx=i)
        index2node[i] = node
    ## 2. construct tree
    for j in tree:
        indexC = j
        indexP = tree[j]['parent']
        nodeC = index2node[indexC]
        wordFreq, wordIndex = str2matrix(tree[j]['vec'], tree[j]['maxL'])
        #print tree[j]['maxL']
        nodeC.index = wordIndex
        nodeC.word = wordFreq
        #nodeC.time = tree[j]['post_t']
        ## not root node ##
        if not indexP == 'None':
            nodeP = index2node[int(indexP)]
            nodeC.parent = nodeP
            nodeP.children.append(nodeC)
        ## root node ##
        else:
            root = nodeC
    ## 3. convert tree to DNN input
    parent_num = tree[j]['parent_num']
    ini_x, ini_index = str2matrix("0:0", tree[j]['maxL'])
    #x_word, x_index, tree = tree_gru_u2b.gen_nn_inputs(root, ini_x, ini_index)
    x_word, x_index, tree = TD_RvNN.gen_nn_inputs(root, ini_x)
    return x_word, x_index, tree, parent_num
예제 #2
0
def construct_tree(tree):
    # tree: {index1:{'parent':, 'maxL':, 'vec':}

    # 1. ini tree node
    index2node = {}
    for i in tree:
        node = TD_RvNN.NodeTweet(idx=i)
        index2node[i] = node

    # 2. construct tree
    _j = 0
    root = None
    for _j in tree:
        index_c = _j
        index_p = tree[_j]['parent']
        node_c = index2node[index_c]
        word_freq, word_index = str2matrix(tree[_j]['vec'], tree[_j]['maxL'])
        node_c.index = word_index
        node_c.word = word_freq

        # not root node ##
        if not index_p == 'None':
            node_p = index2node[int(index_p)]
            node_c.parent = node_p
            node_p.children.append(node_c)

        # root node ##
        else:
            root = node_c

    # 3. convert tree to DNN input
    parent_num = tree[_j]['parent_num']
    ini_x, ini_index = str2matrix("0:0", tree[_j]['maxL'])
    x_word, x_index, tree = TD_RvNN.gen_nn_inputs(root, ini_x)
    return x_word, x_index, tree, parent_num
예제 #3
0
          str(parent_num_train[0]))
    #print index_train[0]
    #print word_train[0]
    #print tree_train[0]
    #exit(0)
    return tree_train, word_train, index_train, parent_num_train, y_train, tree_test, word_test, index_test, parent_num_test, y_test


##################################### MAIN ####################################
## 1. load tree & word & index & label
tree_train, word_train, index_train, parent_num_train, y_train, tree_test, word_test, index_test, parent_num_test, y_test = loadData(
)

## 2. ini RNN model
t0 = time.time()
model = TD_RvNN.RvNN(vocabulary_size, hidden_dim, Nclass)
t1 = time.time()
print('Recursive model established,' + str((t1 - t0) / 60))

#if os.path.isfile(modelPath):
#   load_model_Recursive_gruEmb(modelPath, model)
#   lr = 0.0001

######debug here######
#print len(tree_test[121]), len(index_test[121]), len(word_test[121])
#print tree_test[121]
#exit(0)
#loss, pred_y = model.train_step_up(word_test[121], index_test[121], tree_test[121], y_test[121], lr)
#print loss, pred_y
#exit(0)
'''i=568
예제 #4
0
def run(_vocabulary_size, _hidden_dim, _n_class, _n_epoch, _learning_rate,
        _label_path, _tree_path, _train_path, _test_path, _eid_pool):

    # 1. load tree & word & index & label
    tree_train, word_train, index_train, parent_num_train, y_train, \
    tree_test, word_test, index_test, parent_num_test, y_test = load_data(
        _label_path, _tree_path, _train_path, _test_path, _eid_pool
    )

    # 2. ini RNN model
    t0 = time.time()
    model = TD_RvNN.RvNN(_vocabulary_size, _hidden_dim, _n_class)
    t1 = time.time()
    print('Recursive model established,', (t1 - t0) / 60)

    # if os.path.isfile(modelPath):
    #   load_model_Recursive_gruEmb(modelPath, model)
    #   lr = 0.0001

    # debug here
    # print len(tree_test[121]), len(index_test[121]), len(word_test[121])
    # print tree_test[121]
    # exit(0)
    # loss, pred_y = model.train_step_up(word_test[121], index_test[121], tree_test[121], y_test[121], lr)
    # print loss, pred_y
    # exit(0)
    '''i=568
    loss, pred_y = model.train_step_up(word_train[i], index_train[i], parent_num_train[i], tree_train[i], y_train[i], lr)
    print loss, pred_y
    print len(tree_train[i]), len(word_train[i]), parent_num_train[i]
    print tree_train[i]
    print word_train[i]
    print 'final_state:',model._evaluate(word_train[i], index_train[i], parent_num_train[i], tree_train[i])
    tree_states=model._evaluate2(word_train[i], index_train[i], parent_num_train[i], tree_train[i])
    print 'tree_states:', tree_states
    print tree_states[-1:].mean(axis=0)
    tree_states_test=model._evaluate3(word_train[i], index_train[i], tree_train[i])
    print 'l:',len(tree_states_test)
    print 'lo:',tree_states_test[parent_num_train[i]:]'''
    #

    # 3. looping SGD
    losses_5, losses = [], []
    num_examples_seen = 0
    for epoch in range(_n_epoch):

        # one SGD
        indexes = [i for i in range(len(y_train))]
        for i in indexes:
            '''print i,":", len(tree_train[i])
            print tree_train[i]
            tree_state = model._state(word_train[i], index_train[i], child_num_train[i], tree_train[i])
            print len(tree_state)
            print tree_state
            evl = model._evaluate(word_train[i], index_train[i], child_num_train[i], tree_train[i])
            print len(evl) 
            print evl'''
            loss, pred_y = model.train_step_up(word_train[i], index_train[i],
                                               parent_num_train[i],
                                               tree_train[i], y_train[i],
                                               _learning_rate)
            # print loss, pred_y
            losses.append(round(float(loss), 2))
            '''if math.isnan(loss):
            #   continue 
               print loss, pred_y
               print i
               print len(tree_train[i]), len(word_train[i]), parent_num_train[i]
               print tree_train[i]
               print word_train[i]
               print 'final_state:',model._evaluate(word_train[i], index_train[i], parent_num_train[i], tree_train[i])'''
            num_examples_seen += 1
        print("epoch=%d: loss=%f" % (epoch, np.mean(losses)))
        # floss.write(str(time)+": epoch="+str(epoch)+" loss="+str(loss) +'\n')
        sys.stdout.flush()
        # print losses
        # exit(0)

        # cal loss & evaluate
        if epoch % 5 == 0:
            losses_5.append((num_examples_seen, np.mean(losses)))
            time_now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print("%s: Loss after num_examples_seen=%d epoch=%d: %f" %
                  (time_now, num_examples_seen, epoch, np.mean(losses)))
            # floss.write(str(time)+": epoch="+str(epoch)+" loss="+str(loss) +'\n')
            # floss.flush()
            sys.stdout.flush()
            prediction = []
            for j in range(len(y_test)):
                # print j
                prediction.append(
                    model.predict_up(word_test[j], index_test[j],
                                     parent_num_test[j], tree_test[j]))
            res = evaluation_4class(prediction, y_test)
            print('results:', res)
            # floss.write(str(res)+'\n')
            # floss.flush()
            sys.stdout.flush()

            # Adjust the learning rate if loss increases
            if len(losses_5) > 1 and losses_5[-1][1] > losses_5[-2][1]:
                _learning_rate = _learning_rate * 0.5
                print("Setting learning rate to %f" % _learning_rate)
                # floss.write("Setting learning rate to:"+str(lr)+'\n')
                # floss.flush()
                sys.stdout.flush()
            # save_model_Recursive_gruEmb(modelPath, model)
        # floss.flush()
        losses = []
예제 #5
0
tree_file = 'resource/data.TD_RvNN.vol_5000.txt'
label_file = 'resource/Twitter15_label_All.txt'
train_file = 'nfold/RNNtrainSet_Twitter152_tree.txt'
test_file = 'nfold/RNNtestSet_Twitter152_tree.txt'

vocab_size = 5000
embed_size = 512
hidden_size = 100
num_class = 4
epoches = 600
lr = 0.005
# lr = 1

# tree_train, word_train, index_train, parent_num_train, y_train, tree_test, word_test, index_test, parent_num_test, y_test = TD_RvNN.loadData(label_file, tree_file, train_file, test_file)

tree_train, word_train, index_train, parent_num_train, y_train, tree_test, word_test, index_test, parent_num_test, y_test = TD_RvNN.loadData(
)
# print("train no:", len(tree_train), len(word_train), len(index_train),len(parent_num_train), len(y_train))
# print("test no:", len(tree_test), len(word_test), len(index_test), len(parent_num_test), len(y_test))
# print("dim1 for 0:", len(tree_train[0]), len(word_train[0]), len(index_train[0]))
# print("case 0:", tree_train[0][0], word_train[0][0], index_train[0][0], parent_num_train[0])

model = model.RvNN(vocab_size=vocab_size,
                   embed_size=embed_size,
                   hidden_size=hidden_size,
                   num_class=num_class).to(device)

loss_func = nn.MSELoss(reduction='sum')

model_optimizer = torch.optim.SGD(
    # params=filter(lambda p: p.requires_grad, model.parameters()),
    params=model.parameters(),