Exemple #1
0
    def __init__(self):
        self.config = TCNNConfig()
        self.categories, self.cat_to_id = read_category()
        self.words, self.word_to_id = read_vocab(vocab_dir)
        self.config.vocab_size = len(self.words)
        self.model = TextCNN(self.config)

        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())
        saver = tf.train.Saver()
        saver.restore(sess=self.session, save_path=save_path)  # 读取保存的模型
Exemple #2
0
    print(
        metrics.classification_report(y_test_cls,
                                      y_pred_cls,
                                      target_names=categories))

    # 混淆矩阵
    print('Confusion Matrix...')
    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
    print(cm)

    time_dif = get_time_dif(start_time)
    print('Time usage:', time_dif)


if __name__ == '__main__':
    # if len(sys.argv) != 2 or sys.argv[1] not in ['train','test']:
    #     raise ValueError('usage:python run_rnn.py [train / test]')

    print('Configuring RNN model...')
    config = TRNNConfig()
    if not os.path.exists(vocab_dir):
        build_vocab(train_dir, vocab_dir, config.vocab_size)
    categories, cat_to_id = read_category()
    words, word_to_id = read_vocab(vocab_dir)
    config.vocab_size = len(words)
    model = TextRNN(config)

    if sys.argv[1] == 'train':
        train()
    else:
        test()
Exemple #3
0
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)


if __name__ == '__main__':
    if len(sys.argv) != 2 or sys.argv[1] not in ['train', 'test']:
        raise ValueError("""usage: python run_cnn.py [train / test]""")

    print('Configuring CNN model...')
    config = TCNNConfig()
    if not os.path.exists(vocab_dir_c):  # 如果不存在字表,重建
        data_loader.build_vocab(train_dir, vocab_dir_c, config.vocab_size_c)
    if not os.path.exists(vocab_dir_w):  # 如果不存在词汇表,重建
        data_loader_wordlevel.build_vocab(train_dir, vocab_dir_w,
                                          config.vocab_size_w)
    categories, cat_to_id = data_loader.read_category()
    characters, character_to_id = data_loader.read_vocab(vocab_dir_c)
    words, word_to_id = data_loader_wordlevel.read_vocab(vocab_dir_w)
    config.vocab_size_c = len(characters)
    config.vocab_size_w = len(words)
    # max_train = data_loader.get_maxlength(train_dir)
    # max_val = data_loader.get_maxlength(val_dir)
    # # 用所有集合中最大的序列长度
    # temp_val = max(max_train, max_val)
    # # 如果有的集合中序列长度超过了1014,就还是用1014吧
    # print("最长长度: %i" % temp_val)
    # # config.seq_length = min(temp_val, 1500)
    config.seq_length_c = 1500
    config.seq_length_w = 800
    # config.seq_length_c = 1014
    # config.seq_length_w = 1014