def init_state(self, model=None, vocab=None): self.model = model self.vocab = vocab n_units = self.model.embed.W.data.shape[1] self.state = make_initial_state(n_units, batchsize=1, train=False) for e, (c, i) in enumerate(self.vocab.items()): self.ivocab[i] = c
def prediction(args, vocab="", model=""): output = "" np.random.seed(args.seed) # load vocabulary if vocab == "": vocab = pickle.load(open(args.vocabulary, 'rb')) ivocab = {} for c, i in vocab.items(): ivocab[i] = c # load model if model == "": model = pickle.load(open(args.model, 'rb')) n_units = model.embed.W.shape[1] if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() # initialize generator state = make_initial_state(n_units, batchsize=1, train=False) if args.gpu >= 0: for key, value in state.items(): value.data = cuda.to_gpu(value.data) prev_char = np.array([0], dtype=np.int32) if args.gpu >= 0: prev_char = cuda.to_gpu(prev_char) if len(args.primetext) > 0: for i in args.primetext: output += i prev_char = np.ones((1, ), dtype=np.int32) * vocab[i] if args.gpu >= 0: prev_char = cuda.to_gpu(prev_char) state, prob = model.predict(prev_char, state) for i in xrange(args.length): state, prob = model.predict(prev_char, state) if args.sample > 0: probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice(range(len(probability)), p=probability) else: index = np.argmax(cuda.to_cpu(prob.data)) output += ivocab[index] prev_char = np.array([index], dtype=np.int32) if args.gpu >= 0: prev_char = cuda.to_gpu(prev_char) return output
def prediction(args, vocab="", model=""): output = "" np.random.seed(args.seed) # load vocabulary if vocab == "": vocab = pickle.load(open(args.vocabulary, 'rb')) ivocab = {} for c, i in vocab.items(): ivocab[i] = c # load model if model == "": model = pickle.load(open(args.model, 'rb')) n_units = model.embed.W.shape[1] if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() # initialize generator state = make_initial_state(n_units, batchsize=1, train=False) if args.gpu >= 0: for key, value in state.items(): value.data = cuda.to_gpu(value.data) prev_char = np.array([0], dtype=np.int32) if args.gpu >= 0: prev_char = cuda.to_gpu(prev_char) if len(args.primetext) > 0: for i in args.primetext: output += i prev_char = np.ones((1,), dtype=np.int32) * vocab[i] if args.gpu >= 0: prev_char = cuda.to_gpu(prev_char) state, prob = model.predict(prev_char, state) for i in xrange(args.length): state, prob = model.predict(prev_char, state) if args.sample > 0: probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice(range(len(probability)), p=probability) else: index = np.argmax(cuda.to_cpu(prob.data)) output += ivocab[index] prev_char = np.array([index], dtype=np.int32) if args.gpu >= 0: prev_char = cuda.to_gpu(prev_char) return output
# load vocabulary vocab = pickle.load(open(args.vocabulary, 'rb')) ivocab = {} for c, i in vocab.items(): ivocab[i] = c # load model model = pickle.load(open(args.model, 'rb')) n_units = model.embed.W.shape[1] if args.gpu >= 0: cuda.init() model.to_gpu() # initialize generator state = make_initial_state(n_units, batchsize=1, train=False) if args.gpu >= 0: for key, value in state.items(): value.data = cuda.to_gpu(value.data) prev_char = np.array([0]) if args.gpu >= 0: prev_char = cuda.to_gpu(prev_char) sys.stdout = codecs.getwriter('utf_8')(sys.stdout) if len(args.primetext) > 0: for i in unicode(args.primetext, 'utf-8'): sys.stdout.write(i) prev_char = np.ones((1, )).astype(np.int32) * vocab[i] if args.gpu >= 0:
else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = int(whole_len / batchsize) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in list(state.items()): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print('going to train {} iterations'.format(jump * n_epochs)) for i in range(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in range(batchsize)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in range(batchsize)]) if args.gpu >=0:
else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.init() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(np.zeros(()).astype(np.float32)) print 'going to train {} iterations'.format(jump * n_epochs) for i in xrange(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in xrange(batchsize)]) if args.gpu >=0:
cuda.get_device(args.gpu).use() model.to_gpu() # initialize generator if args.gpu >= 0: for key, value in list(state.items()): value.data = cuda.to_gpu(value.data) if args.gpu >= 0: prev_char = cuda.to_gpu(prev_char) # print('\n dumping...', i, end= " ") from copy import deepcopy as DC """ stateを初期化 """ i = vocab["iPhone"] prev_char = np.array([i], dtype=np.int32) prev_char_stack = [] prev_ichar_stack = [ivocab[i]] state = make_initial_state(n_units, batchsize=1, train=False) state, prob = model.forward_one_step(prev_char, prev_char, state, train=False) beam = {ivocab[i]: [DC(state), DC(prob), 1.]} for i in range(30): probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) prob_with_index = [] for e, p in enumerate(probability): prob_with_index.append( [e, p, ivocab[e] ] ) prob_with_index.sort(key=lambda x:-1 * x[1] ) index1 = prob_with_index[0][0] chosen_p1 = probability[index1] prev_char1 = np.array([index1], dtype=np.int32) prev_char = prev_char1
def sample(model, vocabulary,seed,sample,primetext,length,gpu): np.random.seed(seed) # load vocabulary vocab = pickle.load(open(vocabulary, 'rb')) ivocab = {} for c, i in vocab.items(): ivocab[i] = c # load model model = pickle.load(open(model, 'rb')) n_units = model.embed.W.data.shape[1] if gpu >= 0: cuda.get_device(gpu).use() model.to_gpu() # initialize generator state = make_initial_state(n_units, batchsize=1, train=False) if gpu >= 0: for key, value in state.items(): value.data = cuda.to_gpu(value.data) prev_char = np.array([0], dtype=np.int32) if gpu >= 0: prev_char = cuda.to_gpu(prev_char) if len(primetext) > 0: for i in unicode(primetext, 'utf-8'): #sys.stdout.write(i) prev_char = np.ones((1,), dtype=np.int32) * vocab[i] if gpu >= 0: prev_char = cuda.to_gpu(prev_char) state, prob = model.forward_one_step(prev_char, prev_char, state, train=False) strtxt = '' out = '' count = 0 checker = 0 para = 0 for i in xrange(length): state, prob = model.forward_one_step(prev_char, prev_char, state, train=False) if sample > 0: probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice(range(len(probability)), p=probability) else: index = np.argmax(cuda.to_cpu(prob.data)) strtxt = strtxt + ivocab[index] #sys.stdout.write(ivocab[index]) if ivocab[index] == unicode('\xe3\x80\x82','utf-8'): count = count + 1 prev_char = np.array([index], dtype=np.int32) if gpu >= 0: prev_char = cuda.to_gpu(prev_char) for i in strtxt: para = para + 1 if i == unicode('\xe3\x80\x82','utf-8'): checker = checker + 1 if checker == count: break strtxt = strtxt[0:para] return strtxt
def main(): # arguments parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='data/dazai') parser.add_argument('--checkpoint_dir', type=str, default='model') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--rnn_size', type=int, default=128) parser.add_argument('--learning_rate', type=float, default=2e-3) parser.add_argument('--learning_rate_decay', type=float, default=0.97) parser.add_argument('--learning_rate_decay_after', type=int, default=10) parser.add_argument('--decay_rate', type=float, default=0.95) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--seq_length', type=int, default=50) parser.add_argument('--batchsize', type=int, default=50) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--grad_clip', type=int, default=5) parser.add_argument('--init_from', type=str, default='') parser.add_argument('--enable_checkpoint', type=bool, default=True) parser.add_argument('--file_name', type=str, default='input.txt') args = parser.parse_args() if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip xp = cuda.cupy if args.gpu >= 0 else np train_data, words, vocab = load_data(args.data_dir, args.file_name) pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) #optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook( chainer.optimizer.GradientClipping(grad_clip)) #勾配の上限を設定 whole_len = train_data.shape[0] #jump = whole_len / batchsize jump = int(whole_len / batchsize) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(xp.zeros(()).astype(np.float32)) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(xp.zeros(()).astype(np.float32)) print('going to train {} iterations'.format(jump * n_epochs / bprop_len)) sum_perp = 0 count = 0 iteration = 0 for i in range(jump * n_epochs): x_batch = xp.array([ train_data[(jump * j + i) % whole_len] for j in xrange(batchsize) ]) y_batch = xp.array([ train_data[(jump * j + i + 1) % whole_len] for j in xrange(batchsize) ]) if args.gpu >= 0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout) accum_loss += loss_i count += 1 if (i + 1) % bprop_len == 0: # Run truncated BPTT iteration += 1 sum_perp += accum_loss.data now = time.time() #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at)) print('{}/{}, train_loss = {}, time = {:.2f}'.format( (i + 1) / bprop_len, jump * n_epochs / bprop_len, accum_loss.data / bprop_len, now - cur_at)) cur_at = now model.cleargrads() #optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate #accum_loss = Variable(xp.zeros(()).astype(np.float32)) if args.gpu >= 0: accum_loss = Variable(xp.zeros(()).astype(np.float32)) #accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) #optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) % 1000 == 0: print('epoch: ', epoch) print('iteration: ', iteration) print('training perplexity: ', np.exp(float(sum_perp) / count)) sum_perp = 0 count = 0 if args.enable_checkpoint: if (i + 1) % 10000 == 0: fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i) / jump)) pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb')) pickle.dump( copy.deepcopy(model).to_cpu(), open('%s/latest.chainermodel' % (args.checkpoint_dir), 'wb')) if (i + 1) % jump == 0: epoch += 1 if epoch >= args.learning_rate_decay_after: optimizer.lr *= args.learning_rate_decay print('decayed learning rate by a factor {} to {}'.format( args.learning_rate_decay, optimizer.lr)) sys.stdout.flush()
else: model = CharRNN(len(vocab), RNN_RNN_SIZE) if RNN_GPU >= 0: cuda.get_device(RNN_GPU).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=RNN_LEARNING_RATE, alpha=RNN_DECAY_RATE, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = whole_len / RNN_BATCHSIZE epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(RNN_RNN_SIZE, batchsize=RNN_BATCHSIZE) if RNN_GPU >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print 'going to train {} iterations'.format(jump * RNN_EPOCHS) for i in xrange(jump * RNN_EPOCHS): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in xrange(RNN_BATCHSIZE)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in xrange(RNN_BATCHSIZE)]) if RNN_GPU >=0:
def init_state(): TextList.state = make_initial_state(n_units, batchsize=1, train=False)
def run(): np.random.seed(RNN_SEED) # load vocabulary vocab = pickle.load(open(VOCAB_PATH, 'rb')) ivocab = {} for c, i in vocab.items(): ivocab[i] = c # load model model = pickle.load(open(MODEL_FILE_PATH, 'rb')) n_units = model.embed.W.data.shape[1] if RNN_GPU >= 0: cuda.get_device(RNN_GPU).use() model.to_gpu() # initialize generator state = make_initial_state(n_units, batchsize=1, train=False) if RNN_GPU >= 0: for key, value in state.items(): value.data = cuda.to_gpu(value.data) prev_char = np.array([0], dtype=np.int32) if RNN_GPU >= 0: prev_char = cuda.to_gpu(prev_char) if len(RNN_PRIMETEXT) > 0: for i in unicode(RNN_PRIMETEXT, 'utf-8'): sys.stdout.write(i) if RNN_TRAIN_MODE == "Word": sys.stdout.write(" ") prev_char = np.ones((1,), dtype=np.int32) * vocab[i] if RNN_GPU >= 0: prev_char = cuda.to_gpu(prev_char) state, prob = model.forward_one_step(prev_char, prev_char, state, train=False) for i in xrange(RNN_LENGTH): state, prob = model.forward_one_step(prev_char, prev_char, state, train=False) if RNN_SAMPLE > 0: probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice(range(len(probability)), p=probability) else: index = np.argmax(cuda.to_cpu(prob.data)) sys.stdout.write(ivocab[index]) if RNN_TRAIN_MODE == "Word": sys.stdout.write(" ") prev_char = np.array([index], dtype=np.int32) if RNN_GPU >= 0: prev_char = cuda.to_gpu(prev_char) print
inputs_index4 = [] ivocab1 = {} ivocab2 = {} ivocab3 = {} ivocab4 = {} model1 = pickle.load(open('./cv/latest_ensembl1_128.chainermodel', 'rb')) model2 = pickle.load(open('./cv/latest_ensembl2_128.chainermodel', 'rb')) model3 = pickle.load(open('./cv/latest_ensembl3_128.chainermodel', 'rb')) model4 = pickle.load(open('./cv/latest_ensembl4_128.chainermodel', 'rb')) n_units1 = model1.embed.W.data.shape[1] n_units2 = model2.embed.W.data.shape[1] n_units3 = model3.embed.W.data.shape[1] n_units4 = model4.embed.W.data.shape[1] # initialize generator state1 = make_initial_state(n_units1, batchsize=1, train=False) state2 = make_initial_state(n_units2, batchsize=1, train=False) state3 = make_initial_state(n_units3, batchsize=1, train=False) state4 = make_initial_state(n_units4, batchsize=1, train=False) for vocab, inputs_index in [ (vocab1, inputs_index1), (vocab2, inputs_index2), (vocab3, inputs_index3), (vocab4, inputs_index4) ]: for word in inputs: if vocab.get(word) != None: inputs_index.append( vocab.get(word) ) else: #print word, "is not found." inputs_index.append( 'UNK' ) for vocab, ivocab in [ (vocab1, ivocab1), (vocab2, ivocab2), (vocab3, ivocab3), (vocab4, ivocab4)]: for e, (c, i) in enumerate(vocab.items()): ivocab[i] = c