Esempio n. 1
0
    logger = logging.getLogger("brc")
    logger.setLevel(logging.INFO)
    brc_data = DatasetReader(
        test_file=args.input,
        bert_dir='/home/wujindou/chinese_L-12_H-768_A-12',  #
        prefix='bert_meizhuang'  #test_file = None,
    )
    from data.vocab import Vocab
    vocab = Vocab(lower=True)
    import sys
    for word in brc_data.word_iter(None):
        vocab.add(word)
        for char in word:
            vocab.add_char(char)
    logger.info(' char size {}'.format(vocab.get_char_vocab_size()))
    logger.info(' vocab size {} '.format(vocab.get_word_vocab()))
    #
    unfiltered_vocab_size = vocab.size()
    unfiltered_char_size = vocab.get_char_vocab_size()
    vocab.filter_tokens_by_cnt(min_cnt=2)
    vocab.filter_chars_by_cnt(min_cnt=2)

    filtered_num = unfiltered_vocab_size - vocab.size()
    logger.info('After filter {} tokens, the final vocab size is {}'.format(
        filtered_num, vocab.size()))

    filtered_num = unfiltered_char_size - vocab.get_char_vocab_size()
    logger.info('After filter {} tokens, the final vocab size is {}'.format(
        filtered_num, vocab.get_char_vocab_size()))

    logger.info('after load embedding vocab size is {}'.format(vocab.size()))
Esempio n. 2
0
from flask import render_template

from tensorflow.python.keras.backend import set_session
import requests
import sys
sys.path.append('../')
import os
app = Flask(__name__)
from data.vocab import Vocab
os.environ["CUDA_VISIBLE_DEVICES"] = " "
vocab_file = '../examples/politic_vocab5.txt'  # vocab.load_from_file('vocab_bool.txt')
vocab = Vocab(lower=True)
from data.data_reader_new import DatasetReader
from model.text_cnn import TextCNN
if os.path.exists(vocab_file): vocab.load_from_file(vocab_file)
print(vocab.get_word_vocab())


@app.route('/')
def search_index():
    return render_template('index.html')


model = TextCNN(vocab,
                num_class=2,
                pretrained_word_embedding=vocab.embeddings,
                word_embedding_size=300)
model.load(
    "/search/odin/jdwu/classification/cls_checkpoints/politic/best_weights")