Beispiel #1
0
    def read_lstm_model(self, params, train):

        assert train == False  # reading a model to continue training is currently not supported

        words_file = params['config_path'] + params['words_file']
        model_file = params['config_path'] + params['model_file']
        unit = int(params['unit'])
        deep = (params['deep'] == 'yes')
        drop_ratio = float(params['drop_ratio'])

        #read and normalize target word embeddings
        w, word2index, index2word = self.read_words(words_file)
        s = numpy.sqrt((w * w).sum(1))
        s[s == 0.] = 1.
        w /= s.reshape((s.shape[0], 1))  # normalize

        context_word_units = unit
        lstm_hidden_units = IN_TO_OUT_UNITS_RATIO * unit
        target_word_units = IN_TO_OUT_UNITS_RATIO * unit

        cs = [1 for _ in range(len(word2index))
              ]  # dummy word counts - not used for eval
        loss_func = L.NegativeSampling(
            target_word_units, cs,
            NEGATIVE_SAMPLING_NUM)  # dummy loss func - not used for eval

        model = BiLstmContext(deep, self.gpu, word2index, context_word_units,
                              lstm_hidden_units, target_word_units, loss_func,
                              train, drop_ratio)
        S.load_npz(model_file, model)

        return w, word2index, index2word, model
Beispiel #2
0
# if args.gpu >= 0:
#     cuda.check_cuda_available()
#     cuda.get_device(args.gpu).use()
# xp = cuda.cupy if args.gpu >= 0 else np
xp = np
    
reader = SentenceReaderDir(args.indir, args.trimfreq, args.batchsize)
print('n_vocab: %d' % (len(reader.word2index)-3)) # excluding the three special tokens
print('corpus size: %d' % (reader.total_words))

cs = [reader.trimmed_word2count[w] for w in range(len(reader.trimmed_word2count))]
loss_func = L.NegativeSampling(target_word_units, cs, NEGATIVE_SAMPLING_NUM, args.ns_power)

if args.context == 'lstm':
    model = BiLstmContext(args.deep, args.gpu, reader.word2index, context_word_units, lstm_hidden_units, target_word_units, loss_func, True, args.dropout)
else:
    raise Exception('Unknown context type: {}'.format(args.context))

optimizer = O.Adam(alpha=args.alpha)
optimizer.setup(model)

if args.grad_clip:
    optimizer.add_hook(GradientClipping(args.grad_clip))

STATUS_INTERVAL = 1000000

for epoch in range(args.epoch):
    begin_time = time.time()
    cur_at = begin_time
    word_count = 0
target_word_units = IN_TO_OUT_UNITS_RATIO*args.unit

if args.gpu >= 0:
    cuda.check_cuda_available()
    cuda.get_device(args.gpu).use()
xp = cuda.cupy if args.gpu >= 0 else np
    
reader = SentenceReaderDir(args.indir, args.trimfreq, args.batchsize)
print('n_vocab: %d' % (len(reader.word2index)-3)) # excluding the three special tokens
print('corpus size: %d' % (reader.total_words))

cs = [reader.trimmed_word2count[w] for w in range(len(reader.trimmed_word2count))]
loss_func = L.NegativeSampling(target_word_units, cs, NEGATIVE_SAMPLING_NUM, args.ns_power)

if args.context == 'lstm':
    model = BiLstmContext(args.deep, args.gpu, reader.word2index, context_word_units, lstm_hidden_units, target_word_units, loss_func, True, args.dropout)
else:
    raise Exception('Unknown context type: {}'.format(args.context))

optimizer = O.Adam()
optimizer.setup(model)

STATUS_INTERVAL = 1000000

for epoch in range(args.epoch):
    begin_time = time.time()
    cur_at = begin_time
    word_count = 0
    next_count = STATUS_INTERVAL
    accum_loss = 0.0
    last_accum_loss = 0.0