Ejemplo n.º 1
0
def main(opt):
    loader = BatchLoaderUnk(opt.tokens, opt.data_dir, opt.batch_size, opt.seq_length, opt.max_word_l, opt.n_words, opt.n_chars)
    opt.word_vocab_size = min(opt.n_words, len(loader.idx2word))
    opt.char_vocab_size = min(opt.n_chars, len(loader.idx2char))
    opt.max_word_l = loader.max_word_l
    print('Word vocab size: ', opt.word_vocab_size, \
        ', Char vocab size: ', opt.char_vocab_size, \
        ', Max word length (incl. padding): ', opt.max_word_l)

    # define the model
    if not opt.skip_train:
        print('creating an LSTM-CNN with ', opt.num_layers, ' layers')
        model = LSTMCNN(opt)
            # make sure output directory exists
        if not os.path.exists(opt.checkpoint_dir):
            os.makedirs(opt.checkpoint_dir)
        pickle.dump(opt, open('{}/{}.pkl'.format(opt.checkpoint_dir, opt.savefile), "wb"))
        model.save('{}/{}.json'.format(opt.checkpoint_dir, opt.savefile))
        model.fit_generator(loader.next_batch(Train), loader.split_sizes[Train], opt.max_epochs,
                            loader.next_batch(Validation), loader.split_sizes[Validation], opt)
        model.save_weights('{}/{}.h5'.format(opt.checkpoint_dir, opt.savefile), overwrite=True)
    else:
        model = load_model('{}/{}.json'.format(opt.checkpoint_dir, opt.savefile))
        model.load_weights('{}/{}.h5'.format(opt.checkpoint_dir, opt.savefile))
        print(model.summary())

    # evaluate on full test set.
    test_perp = model.evaluate_generator(loader.next_batch(Test), loader.split_sizes[Test])
    print('Perplexity on test set: ', exp(test_perp))
Ejemplo n.º 2
0
Archivo: train.py Proyecto: jarfo/kchar
def main(opt):
    loader = BatchLoaderUnk(opt.tokens, opt.data_dir, opt.batch_size, opt.seq_length, opt.max_word_l, opt.n_words, opt.n_chars)
    opt.word_vocab_size = min(opt.n_words, len(loader.idx2word))
    opt.char_vocab_size = min(opt.n_chars, len(loader.idx2char))
    opt.max_word_l = loader.max_word_l
    print('Word vocab size:', opt.word_vocab_size,
        ', Char vocab size:', opt.char_vocab_size,
        ', Max word length (incl. padding):', opt.max_word_l)

    # define the model
    if not opt.skip_train:
        print('creating an LSTM-CNN with', opt.num_layers, 'layers')
        model = LSTMCNN(opt)
            # make sure output directory exists
        if not os.path.exists(opt.checkpoint_dir):
            os.makedirs(opt.checkpoint_dir)
        pickle.dump(opt, open('{}/{}.pkl'.format(opt.checkpoint_dir, opt.savefile), "wb"))
        model.save('{}/{}.json'.format(opt.checkpoint_dir, opt.savefile))
        model.fit_generator(loader.next_batch(Train), loader.split_sizes[Train], opt.max_epochs,
                            loader.next_batch(Validation), loader.split_sizes[Validation], opt)
        model.save_weights('{}/{}.h5'.format(opt.checkpoint_dir, opt.savefile), overwrite=True)
    else:
        model = load_model('{}/{}.json'.format(opt.checkpoint_dir, opt.savefile))
        model.load_weights('{}/{}.h5'.format(opt.checkpoint_dir, opt.savefile))
        print(model.summary())

    # evaluate on full test set.
    test_perp = model.evaluate_generator(loader.next_batch(Test), loader.split_sizes[Test])
    print('Perplexity on test set:', exp(test_perp))
Ejemplo n.º 3
0
class evaluator:
    def __init__(self, name, vocabulary, init, extract, layer):
        self.opt = pickle.load(open('{}.pkl'.format(name), "rb"))
        self.opt.batch_size = 1
        self.opt.seq_length = 1
        self.reader = Vocabulary(self.opt.tokens, vocabulary, max_word_l=self.opt.max_word_l)
        if extract:
            self.model = LSTMCNN_print(self.opt, extract, layer)
            print self.model.summary()
            self.model.load_weights('{}.h5'.format(name), by_name=True)
        else:
            self.model = LSTMCNN(self.opt)
            self.model.load_weights('{}.h5'.format(name))
            print self.model.summary()
        if init:
            self.state_mean = np.load(init)
        else:
            self.state_mean = None

    def logprob(self, line):
        x, y = self.reader.get_input(line)
        nwords = len(y)
        if self.state_mean is not None:
            self.model.set_states_value(self.state_mean)
        return self.model.evaluate(x, y, batch_size=1, verbose=0), nwords

    def get_embedding(self, line):
        x, y = self.reader.get_input(line)
        if self.state_mean is not None:
            self.model.set_states_value(self.state_mean)
        return self.model.predict(x, batch_size=1, verbose=0)
Ejemplo n.º 4
0
class evaluator:
    def __init__(self, name, vocabulary, init):
        self.opt = pickle.load(open('{}.pkl'.format(name), "rb"))
        self.opt.batch_size = 1
        self.opt.seq_length = 1
        self.reader = Vocabulary(self.opt.tokens, vocabulary, max_word_l=self.opt.max_word_l)
        self.model = LSTMCNN(self.opt)
        self.model.load_weights('{}.h5'.format(name))
        if init:
            self.state_mean = np.load(init)
        else:
            self.state_mean = None

    def logprob(self, line):
        x, y = self.reader.get_input(line)
        nwords = len(y)
        if self.state_mean is not None:
            self.model.set_states_value(self.state_mean)
        return self.model.evaluate(x, y, batch_size=1, verbose=0), nwords
Ejemplo n.º 5
0
class evaluator:
    def __init__(self, name, vocabulary, init):
        self.opt = pickle.load(open('{}.pkl'.format(name), "rb"))
        self.opt.batch_size = 1
        self.opt.seq_length = 1
        self.reader = Vocabulary(self.opt.tokens,
                                 vocabulary,
                                 max_word_l=self.opt.max_word_l)
        self.model = LSTMCNN(self.opt)
        self.model.load_weights('{}.h5'.format(name))
        if init:
            self.state_mean = np.load(init)
        else:
            self.state_mean = None

    def logprob(self, line):
        x, y = self.reader.get_input(line)
        nwords = len(y)
        if self.state_mean is not None:
            self.model.set_states_value(self.state_mean)
        return self.model.evaluate(x, y, batch_size=1, verbose=0), nwords
Ejemplo n.º 6
0
class Evaluator:
    def __init__(self, name, vocabulary, init, k):
        self.opt = pickle.load(open('{}.pkl'.format(name), "rb"))
        self.opt.seq_length = 1
        self.logk = 0
        while (1 << self.logk) < k:
            self.logk += 1
        self.opt.batch_size = 1 << self.logk
        self.reader = Vocabulary(self.opt.tokens,
                                 vocabulary,
                                 max_word_l=self.opt.max_word_l)
        self.model = LSTMCNN(self.opt)
        self.model.load_weights('{}.h5'.format(name))

        self.hyp_score = np.zeros(k, dtype='float32')
        self.hyp_samples = np.empty((k, 0), dtype='int32')
        self.hyp_prob = np.empty((k, 0), dtype='float32')
        if init:
            self.state_mean = np.load(init)
        else:
            self.state_mean = None

    def clear(self, reset):
        self.hyp_score.fill(0)
        self.hyp_samples.resize((self.opt.batch_size, 0))
        self.hyp_prob.resize((self.opt.batch_size, 0))
        if reset:
            self.model.reset_states()
            if self.state_mean != None:
                self.model.set_states_value(self.state_mean)

    @property
    def delay(self):
        return self.opt.delay

    def gen_sample(self, word, prb=None):
        x = self.reader.get_input(word, self.opt.batch_size, prb)

        # previous hyp spk id
        ncol = self.hyp_samples.shape[1]
        if ncol > 0:
            spk = self.hyp_samples[:, -1].astype('float32')
        else:
            spk = np.zeros(self.opt.batch_size, dtype='float32')
            spk[1::2] = 1
        x['spk'] = spk[:, np.newaxis]

        # spk prediction
        y = self.model.predict(x, batch_size=self.opt.batch_size)[:, 0, 0]

        # sort new scores
        if ncol < self.logk - 1:
            spk_indices = np.arange(self.opt.batch_size) % (1 <<
                                                            (ncol + 2)) >= (
                                                                1 <<
                                                                (ncol + 1))
            spk_prob = y.copy()
            spk_prob[spk_indices == 0] = 1 - y[spk_indices == 0]
            # update states
            self.hyp_samples = np.append(self.hyp_samples,
                                         spk_indices[:, np.newaxis],
                                         axis=1)
            self.hyp_prob = np.append(self.hyp_prob,
                                      spk_prob[:, np.newaxis],
                                      axis=1)
            self.hyp_score += -np.log(spk_prob)
        else:
            spk_prob = np.concatenate((1 - y, y))
            cand_score = np.concatenate(
                (self.hyp_score, self.hyp_score)) - np.log(spk_prob)
            ranks = cand_score.argsort()[:self.opt.batch_size]
            batch_indices = ranks % self.opt.batch_size
            spk_indices = ranks / self.opt.batch_size
            # update states
            self.model.reindex_states(batch_indices)
            self.hyp_samples = np.append(self.hyp_samples[batch_indices],
                                         spk_indices[:, np.newaxis],
                                         axis=1)
            self.hyp_prob = np.append(self.hyp_prob[batch_indices],
                                      spk_prob[ranks, np.newaxis],
                                      axis=1)
            self.hyp_score = cand_score[ranks]

    def get_prob(self, word, prb):
        assert (self.opt.batch_size == 1)
        x = self.reader.get_input(word, self.opt.batch_size, prb)
        y = self.model.predict(x, batch_size=self.opt.batch_size)[0, 0, 0]
        self.hyp_samples = np.append(self.hyp_samples, [[y > 0.5]], axis=1)
        self.hyp_prob = np.append(self.hyp_prob, [[y]], axis=1)
        self.hyp_score += -np.log(np.maximum([y], [1 - y]))