for x_batch, y_batch in batch_val:
        real_seq_len = seq_length(x_batch)
        feed_dict = model.feed_data(x_batch, y_batch, real_seq_len, 1.0)
        pre_lab = session.run(model.predict, feed_dict=feed_dict)
        pre_label.extend(pre_lab)
    return pre_label


if __name__ == '__main__':

    start = time.time()
    pm = pm
    categories, cat_to_id = read_category()
    wordid = get_wordid(pm.vocab_filename)
    pm.vocab_size = len(wordid)
    pm.pre_training = get_word2vec(pm.vector_word_npz)

    model = Lstm_CNN()
    pre_label = val()
    contents_list = list(zip(pre_label, [num for item in pm.contents_finally for num in item]))

    # 在当前目录下创建文件
    i, j = 1, 1
    finally_data_dir = './data/Finally_Data'
    if os.path.exists(os.path.join(finally_data_dir, 'No_Value.txt')):
        os.remove(os.path.join(finally_data_dir, 'No_Value.txt'))
    if os.path.exists(os.path.join(finally_data_dir, 'Value.txt')):
        os.remove(os.path.join(finally_data_dir, 'Value.txt'))
    if not os.path.exists(finally_data_dir):
        os.makedirs(finally_data_dir)
    with open(os.path.join(finally_data_dir, 'No_Value.txt'), "a", encoding='utf-8') as f1:
예제 #2
0
파일: Dpcnn.py 프로젝트: NLPxiaoxu/Easy_NLP
num_epochs = 5  #epochs
batch_size = 64  #batch_size

keep_p = 0.5  #keep_prob

train_filename = './data/cnews.train.txt'  #train data
test_filename = './data/cnews.test.txt'  #test data
val_filename = './data/cnews.val.txt'  #validation data
vocab_filename = './data/vocab_word.txt'  #vocabulary
vector_word_filename = './data/vector_word.txt'  #vector_word trained by word2vec
vector_word_npz = './data/vector_word.npz'  # save vector_word to numpy file

categories, cat_to_id = read_category()
wordid = get_wordid(vocab_filename)
embeddings = get_word2vec(vector_word_npz)

############################################################


class data_loader():
    def __init__(self):

        self.train_x, self.train_y = process(train_filename,
                                             wordid,
                                             cat_to_id,
                                             max_length=300)
        self.test_x, self.test_y = process(test_filename,
                                           wordid,
                                           cat_to_id,
                                           max_length=300)