예제 #1
0
class Test():
    def __init__(self):
        self.word_list = ["a", "b", "c", "d", "z", "y", "x", "w"]
        self.vocab = Vocab()
        self.vocab.add_wordlist(self.word_list)
        self.dl = DataLoader()
        self.dl.load_all('20.txt', batch_size=2)

    def test_sentences(self):
        print('test sentences')
        texts = ["a x d z w w a", "a b c d w x y z", "w w x x a a"]
        sentences = [Sentence(text, self.vocab) for text in texts]
        tensors = [sentence.to_tensor() for sentence in sentences]

        print('vocab')
        print(self.vocab.w2id)

        print('encode')
        tokens = [self.vocab.encode(text) for text in texts]
        print(tokens)

        print('decode')
        decoded = [self.vocab.decode(l) for l in tokens]
        print(decoded)

        print('sentences')
        print(sentences)
        print('tensors')
        print(tensors)
        return sentences

    def test_batch_sentence(self):
        print('test batch sentnece')
        batch_size = 3
        sentences = self.test_sentences()
        batch = BatchSentence(sentences, batch_size)
        print('Batch')
        print(batch.next_batch())
        print(batch.next_batch())

    def test_batch_labels(self):
        print('test batch labels')
        labels = [1, 0, 1, 1, 0]
        batch_size = 3
        batch = BatchLabel(labels, batch_size)
        print(batch.next_batch())
        print(batch.next_batch())

    def test_load_data_from_file(self, path):
        pass
        #print('test load data from file')
        #dl = DataLoader()
        #datas,vocab = dl.load(path)
        #print('vocab')
        #print(vocab)
        #print('num data')
        #print(len(datas))
        #print(datas[0:2])

    def test_load_batch_from_file(self):

        labels, titles, bodys, comments = tuple(
            zip(*self.dl.preprocessed_datas))

        vocab = self.dl.vocab
        batch_labels, batch_title, batch_body, batch_comment = self.dl.batch_data
        print('vocab')
        print(vocab.w2id)
        print('batchs')
        print('- ' * 30)
        print('labels')
        print('b')
        for b in batch_labels:
            print(b)
        print('bb')
        for b in batch_labels:
            print(b)
        print('titles')
        print('t')
        for t in batch_title:
            print(t)
        print('tt')
        for t in batch_title:
            print(t)
        print('body')
        print(next(batch_body))
        print('comment')
        print(next(batch_comment))

    def test_build_net(self):
        net =  PIXNETNET(self.dl.vocab,embedding_dim=50,title_gru_units=50,body_gru_units=50,\
                 response_gru_units=50)

    def test_forward(self):
        net =  PIXNETNET(self.dl.vocab,embedding_dim=50,title_gru_units=50,body_gru_units=50,\
                 response_gru_units=50)
        _, batch_title, batch_body, batch_comment = self.dl.batch_data
        p = net.forward(batch_title.next_batch(), batch_body.next_batch(),
                        batch_comment.next_batch())
        print(p)

    def test_train(self):
        save_root_dir = './tmp'
        print('test train')
        net =  PIXNETNET(self.dl.vocab,embedding_dim=50,title_gru_units=50,body_gru_units=50,\
                 response_gru_units=50)
        optimizer = optim.Adam(net.parameters())
        batch_label, batch_title, batch_body, batch_comment = self.dl.batch_data
        X = BatchX(batch_title, batch_body, batch_comment)
        y = batch_label

        trainer = Trainer(net, optimizer)

        trainer.train(10,X,y,X,y,save_every = 10,\
              save_dir='%s/model'%(save_root_dir),evaluate_every=10)
예제 #2
0
import numpy as np

model = load_model(os.path.join('weight', args.model))

# load corpus and vocab
vocab = Vocab(20000)  # 100k
emoji_vocab = EmojiVocab(40)
# corpus = Corpus(vocab, emoji_vocab, debug=True)
print(emoji_vocab.w2i.keys())
# punc_dict = set(['、', '。', '「', '」', '・', ')', '(', ',', '?', '!', '…', '〜', '.', '‐', '『', '』', '―', ':', '“', '”'])

original = "<eos> on behalf of smartnet. we are very sorry for the inconvenience this causes you. i can assure you " \
           "that we are analyzing how this issue occurred and ways to completely prevent recurrence in the future. " \
           "thank you for your patience <eos> "
input = original.split()
encoded_input = [vocab.encode(x) for x in input]
print(encoded_input)

pred = model.predict(np.array(encoded_input).reshape((1, -1)))

y = []
for i in range(pred.shape[1]):
    y.append(pred[0][i].argsort()[-5:][::-1])

decoded = []
for i in range(len(encoded_input)):
    decoded.append(vocab.decode(encoded_input[i]))
    # decoded += [emoji_vocab.decode(x) for x in y[i]]
    print(emoji_vocab.decode(y[i][0]))
    if emoji_vocab.decode(y[i][0]) == "<blank>":
        decoded += [" "]