Ejemplo n.º 1
0
                         window=5,
                         min_count=5)
        model.save(w2v_fp)
    wv = model.wv
    data.filter_words(set(model.wv.vocab.keys()))
    data.cut_train_and_test(balance=True)

    ## rnn
    batch_size = 1024
    num_batch_per_epoch = data.get_num_of_batch(batch_size)
    hidden_size = 100
    num_epoch = 100
    lr = 0.002

    rnn = RNN("gru", hidden_size, embedding_size, lr=lr)
    rnn.build_graph()

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    for epoch in range(num_epoch):
        for _ in range(num_batch_per_epoch):
            batch = data.get_train_batch(batch_size)
            batch_X = [
                np.array([wv[w] for w in tweet.words]) for tweet in batch
            ]
            batch_y = [int(tweet.label == "bull") for tweet in batch]
            _, _ = rnn.train(batch_X, batch_y, sess)

        ## accuracy check, train, valid, test sets
        train_X = [
            np.array([wv[w] for w in tweet.words]) for tweet in data.train
Ejemplo n.º 2
0
    w2i = data.word2index()
    data.cut_train_and_test(balance=True)

    ## rnn
    batch_size = 128
    num_batch_per_epoch = data.get_num_of_batch(batch_size)
    hidden_size = 100
    num_epoch = 100
    lr = 0.002
    embedding_size = 100

    ## embedding

    rnn = RNN("gru", hidden_size, embedding_size, lr=lr)
    rnn.build_graph(embedding=True, vocab_size=len(w2i), embedding_size=100)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    for epoch in range(num_epoch):
        for _ in range(num_batch_per_epoch):
            batch = data.get_train_batch(batch_size)
            batch_X = [np.array(tweet.word_indexes) for tweet in batch]
            batch_y = [int(tweet.label=="bull") for tweet in batch]
            _, _= rnn.train(batch_X, batch_y, sess, embedding=True)

        ## accuracy check, train, valid, test sets
        train_X = [np.array(tweet.word_indexes) for tweet in data.train]
        train_y = [int(tweet.label == "bull") for tweet in data.train]
        train_ent, train_acc = rnn.cal_accuracy(train_X, train_y, sess, embedding=True)