def read_lstm_model(self, params, train): assert train == False # reading a model to continue training is currently not supported words_file = params['config_path'] + params['words_file'] model_file = params['config_path'] + params['model_file'] unit = int(params['unit']) deep = (params['deep'] == 'yes') drop_ratio = float(params['drop_ratio']) #read and normalize target word embeddings w, word2index, index2word = self.read_words(words_file) s = numpy.sqrt((w * w).sum(1)) s[s == 0.] = 1. w /= s.reshape((s.shape[0], 1)) # normalize context_word_units = unit lstm_hidden_units = IN_TO_OUT_UNITS_RATIO * unit target_word_units = IN_TO_OUT_UNITS_RATIO * unit cs = [1 for _ in range(len(word2index)) ] # dummy word counts - not used for eval loss_func = L.NegativeSampling( target_word_units, cs, NEGATIVE_SAMPLING_NUM) # dummy loss func - not used for eval model = BiLstmContext(deep, self.gpu, word2index, context_word_units, lstm_hidden_units, target_word_units, loss_func, train, drop_ratio) S.load_npz(model_file, model) return w, word2index, index2word, model
# if args.gpu >= 0: # cuda.check_cuda_available() # cuda.get_device(args.gpu).use() # xp = cuda.cupy if args.gpu >= 0 else np xp = np reader = SentenceReaderDir(args.indir, args.trimfreq, args.batchsize) print('n_vocab: %d' % (len(reader.word2index)-3)) # excluding the three special tokens print('corpus size: %d' % (reader.total_words)) cs = [reader.trimmed_word2count[w] for w in range(len(reader.trimmed_word2count))] loss_func = L.NegativeSampling(target_word_units, cs, NEGATIVE_SAMPLING_NUM, args.ns_power) if args.context == 'lstm': model = BiLstmContext(args.deep, args.gpu, reader.word2index, context_word_units, lstm_hidden_units, target_word_units, loss_func, True, args.dropout) else: raise Exception('Unknown context type: {}'.format(args.context)) optimizer = O.Adam(alpha=args.alpha) optimizer.setup(model) if args.grad_clip: optimizer.add_hook(GradientClipping(args.grad_clip)) STATUS_INTERVAL = 1000000 for epoch in range(args.epoch): begin_time = time.time() cur_at = begin_time word_count = 0
target_word_units = IN_TO_OUT_UNITS_RATIO*args.unit if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() xp = cuda.cupy if args.gpu >= 0 else np reader = SentenceReaderDir(args.indir, args.trimfreq, args.batchsize) print('n_vocab: %d' % (len(reader.word2index)-3)) # excluding the three special tokens print('corpus size: %d' % (reader.total_words)) cs = [reader.trimmed_word2count[w] for w in range(len(reader.trimmed_word2count))] loss_func = L.NegativeSampling(target_word_units, cs, NEGATIVE_SAMPLING_NUM, args.ns_power) if args.context == 'lstm': model = BiLstmContext(args.deep, args.gpu, reader.word2index, context_word_units, lstm_hidden_units, target_word_units, loss_func, True, args.dropout) else: raise Exception('Unknown context type: {}'.format(args.context)) optimizer = O.Adam() optimizer.setup(model) STATUS_INTERVAL = 1000000 for epoch in range(args.epoch): begin_time = time.time() cur_at = begin_time word_count = 0 next_count = STATUS_INTERVAL accum_loss = 0.0 last_accum_loss = 0.0