def main(): '''main function''' parser = argparse.ArgumentParser() parser.add_argument('--state', required=True, help='训练还是预测, train or eval') parser.add_argument('--txt', required=True, help='进行训练的txt文件') parser.add_argument('--batch', default=128, type=int, help='训练的batch size') parser.add_argument('--epoch', default=5000, type=int, help='跑多少个epoch') parser.add_argument('--len', default=100, type=int, help='输入模型的序列长度') parser.add_argument('--max_vocab', default=5000, type=int, help='最多存储的字符数目') parser.add_argument('--embed', default=512, type=int, help='词向量的维度') parser.add_argument('--hidden', default=512, type=int, help='RNN的输出维度') parser.add_argument('--n_layer', default=2, type=int, help='RNN的层数') parser.add_argument('--dropout', default=0.5, type=float, help='RNN中drop的概率') parser.add_argument('--begin', default='我', help='给出生成文本的开始') parser.add_argument('--pred_len', default=20, type=int, help='生成文本的长度') parser.add_argument('--checkpoint', help='载入模型的位置') opt = parser.parse_args() print(opt) convert = TextConverter(opt.txt, max_vocab=opt.max_vocab) model = CharRNN(convert.vocab_size, opt.embed, opt.hidden, opt.n_layer, opt.dropout) model.initialize(ctx=ctx) if opt.state == 'train': dataset = TextData(opt.txt, opt.len, convert.text_to_arr) dataloader = g.data.DataLoader(dataset, opt.batch, shuffle=True) lr_sch = mx.lr_scheduler.FactorScheduler(int(1000 * len(dataloader)), factor=0.1) optimizer = g.Trainer(model.collect_params(), 'adam', { 'learning_rate': 1e-3, 'clip_gradient': 3, 'lr_scheduler': lr_sch }) cross_entorpy = g.loss.SoftmaxCrossEntropyLoss() train(opt.epoch, model, dataloader, optimizer, cross_entorpy) elif opt.state == 'eval': pred_text = sample(model, opt.checkpoint, convert.word_to_int, convert.arr_to_text, opt.begin, opt.pred_len) print(pred_text) with open('./generate.txt', 'a') as f: f.write(pred_text) f.write('\n') else: print('Error state, must choose from train and eval!')
def main(): '''main function''' parser = argparse.ArgumentParser() parser.add_argument('--state', required=True, help='训练还是预测, train or eval') parser.add_argument('--txt', required=True, help='进行训练的txt文件') parser.add_argument('--batch', default=128, type=int, help='训练的batch size') parser.add_argument('--epoch', default=5000, type=int, help='跑多少个epoch') parser.add_argument('--len', default=100, type=int, help='输入模型的序列长度') parser.add_argument('--max_vocab', default=5000, type=int, help='最多存储的字符数目') parser.add_argument('--embed', default=512, type=int, help='词向量的维度') parser.add_argument('--hidden', default=512, type=int, help='RNN的输出维度') parser.add_argument('--n_layer', default=2, type=int, help='RNN的层数') parser.add_argument('--dropout', default=0.5, help='RNN中drop的概率') parser.add_argument('--begin', default='我', help='给出生成文本的开始') parser.add_argument('--pred_len', default=20, type=int, help='生成文本的长度') parser.add_argument('--checkpoint', help='载入模型的位置') opt = parser.parse_args() print(opt) convert = TextConverter(opt.txt, max_vocab=opt.max_vocab) model = CharRNN(convert.vocab_size, opt.embed, opt.hidden, opt.n_layer, opt.dropout) model.load_state_dict(torch.load('./poetry_checkpoints/model_300.pth')) if use_gpu: model = model.cuda() if opt.state == 'train': model.train() dataset = TextData(opt.txt, opt.len, convert.text_to_arr) dataloader = data.DataLoader(dataset, opt.batch, shuffle=True, num_workers=4) optimizer = optim.Adam(model.parameters(), lr=1e-4) criterion = nn.CrossEntropyLoss(size_average=False) train(opt.epoch, model, dataloader, optimizer, criterion) elif opt.state == 'eval': pred_text = sample(model, opt.checkpoint, convert.word_to_int, convert.arr_to_text, opt.begin, opt.pred_len) print(pred_text) with open('./generate.txt', 'a') as f: f.write(pred_text) f.write('\n') else: print('Error state, must choose from train and eval!')
def __init__(self, target_ids, model=None, rnn_size=128, gpu=-1): self.twitter = Twitter(target_ids) self.train_count=0 self.gpu = gpu self.model_path = "dada/model{}.pkl".format(target_ids) self.tweet_path = "TimeLine/TimeLine"+target_ids self.vocab_path = "data/vocab{}.bin".format(target_ids) self.vocab = pickle.load(open(self.vocab_path,"rb")) if os.path.exists(self.vocab_path) else {} if os.path.exists(self.model_path): self.model = pickle.load(open(self.model_path, 'rb')) else: self.model = CharRNN(len(self.vocab), rnn_size)
if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip train_data, words, vocab = load_data(args) pickle.dump(vocab, open('%s/vocab.bin'%args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = int(whole_len / batchsize) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0:
if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip train_data, words, vocab = load_data(args) pickle.dump(vocab, open('%s/vocab.bin'%args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.init() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model.collect_parameters()) whole_len = train_data.shape[0] jump = whole_len / batchsize cur_log_perp = cuda.zeros(()) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize)
loss_file = open('%s/loss.txt' % args.checkpoint_dir, 'w') n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip train_data, words, vocab = load_data(args) pickle.dump(vocab, open('%s/vocab.bin'%args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.init() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0:
# encoding the text and map each character to an integer and vice versa # 1. int2char, which maps integers to characters # 2. char2int, which maps characters to integers chars = tuple(set(text)) int2char = dict(enumerate(chars)) char2int = {ch: ii for ii, ch in int2char.items()} encoded = np.array([char2int[i] for i in text]) # Check if GPU is available train_on_gpu = torch.cuda.is_available() n_hidden = 512 n_layers = 2 net = CharRNN(chars, n_hidden, n_layers) net.load_state_dict(torch.load('weights.ckpt')) print(net) # Declaring the hyperparameters batch_size = 128 seq_length = 100 # Defining a method to generate the next character def predict(net, char, h=None, top_k=None): ''' Given a character, predict the next character. Returns the predicted character and the hidden state. ''' # tensor inputs
loss_file = open('%s/loss.txt' % args.checkpoint_dir, 'w') n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip train_data, words, vocab = load_data(args) pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.init() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model.collect_parameters()) whole_len = train_data.shape[0] jump = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at
def main(): # arguments parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='data/dazai') parser.add_argument('--checkpoint_dir', type=str, default='model') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--rnn_size', type=int, default=128) parser.add_argument('--learning_rate', type=float, default=2e-3) parser.add_argument('--learning_rate_decay', type=float, default=0.97) parser.add_argument('--learning_rate_decay_after', type=int, default=10) parser.add_argument('--decay_rate', type=float, default=0.95) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--seq_length', type=int, default=50) parser.add_argument('--batchsize', type=int, default=50) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--grad_clip', type=int, default=5) parser.add_argument('--init_from', type=str, default='') parser.add_argument('--enable_checkpoint', type=bool, default=True) parser.add_argument('--file_name', type=str, default='input.txt') args = parser.parse_args() if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip xp = cuda.cupy if args.gpu >= 0 else np train_data, words, vocab = load_data(args.data_dir, args.file_name) pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) #optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook( chainer.optimizer.GradientClipping(grad_clip)) #勾配の上限を設定 whole_len = train_data.shape[0] #jump = whole_len / batchsize jump = int(whole_len / batchsize) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(xp.zeros(()).astype(np.float32)) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(xp.zeros(()).astype(np.float32)) print('going to train {} iterations'.format(jump * n_epochs / bprop_len)) sum_perp = 0 count = 0 iteration = 0 for i in range(jump * n_epochs): x_batch = xp.array([ train_data[(jump * j + i) % whole_len] for j in xrange(batchsize) ]) y_batch = xp.array([ train_data[(jump * j + i + 1) % whole_len] for j in xrange(batchsize) ]) if args.gpu >= 0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout) accum_loss += loss_i count += 1 if (i + 1) % bprop_len == 0: # Run truncated BPTT iteration += 1 sum_perp += accum_loss.data now = time.time() #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at)) print('{}/{}, train_loss = {}, time = {:.2f}'.format( (i + 1) / bprop_len, jump * n_epochs / bprop_len, accum_loss.data / bprop_len, now - cur_at)) cur_at = now model.cleargrads() #optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate #accum_loss = Variable(xp.zeros(()).astype(np.float32)) if args.gpu >= 0: accum_loss = Variable(xp.zeros(()).astype(np.float32)) #accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) #optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) % 1000 == 0: print('epoch: ', epoch) print('iteration: ', iteration) print('training perplexity: ', np.exp(float(sum_perp) / count)) sum_perp = 0 count = 0 if args.enable_checkpoint: if (i + 1) % 10000 == 0: fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i) / jump)) pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb')) pickle.dump( copy.deepcopy(model).to_cpu(), open('%s/latest.chainermodel' % (args.checkpoint_dir), 'wb')) if (i + 1) % jump == 0: epoch += 1 if epoch >= args.learning_rate_decay_after: optimizer.lr *= args.learning_rate_decay print('decayed learning rate by a factor {} to {}'.format( args.learning_rate_decay, optimizer.lr)) sys.stdout.flush()
pickle.dump(model_copy, open(CHECKPOINT_PATH + "CharRNN-Epoch-%s.model" % epoch, 'wb')) pickle.dump(model_copy, open(MODEL_PATH + "CharRNN-Latest.model", 'wb')) print(RNN_TRAIN_MODE) train_data, words, vocab = load_data(mode=RNN_TRAIN_MODE) pickle.dump(vocab, open(VOCAB_PATH, 'wb')) if len(RNN_INIT_FROM) > 0: model = pickle.load(open(RNN_INIT_FROM, 'rb')) else: model = CharRNN(len(vocab), RNN_RNN_SIZE) if RNN_GPU >= 0: cuda.get_device(RNN_GPU).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=RNN_LEARNING_RATE, alpha=RNN_DECAY_RATE, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = whole_len / RNN_BATCHSIZE epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(RNN_RNN_SIZE, batchsize=RNN_BATCHSIZE) if RNN_GPU >= 0:
class Clone: def __init__(self, target_ids, model=None, rnn_size=128, gpu=-1): self.twitter = Twitter(target_ids) self.train_count=0 self.gpu = gpu self.model_path = "dada/model{}.pkl".format(target_ids) self.tweet_path = "TimeLine/TimeLine"+target_ids self.vocab_path = "data/vocab{}.bin".format(target_ids) self.vocab = pickle.load(open(self.vocab_path,"rb")) if os.path.exists(self.vocab_path) else {} if os.path.exists(self.model_path): self.model = pickle.load(open(self.model_path, 'rb')) else: self.model = CharRNN(len(self.vocab), rnn_size) def train_loop(self, num=100): for i in range(num): if (i % 5 ) == 0: self.make_dataset() self.train() self.tweet() def train(self, **kwargs): print ("Start {} times learning.".format(self.train_count)) self._train(*kwargs) print ("{} times learning done.".format(self.train_count)) self.train_count+=1 def make_dataset(self): self.twitter.save_timeline() dataset, result, vocab = TextTools.make_dataset(self.tweet_path, self.vocab) self.dataset = dataset self.result = result self.vocab = vocab count = 0 pickle.dump(self.vocab, open(self.vocab_path, 'wb')) while len(self.model.l3.b) < len(self.vocab): self.model.add_unit() count+=1 if count: print (count,"units added") def tweet(self): if time.localtime()[3] > 6: pass twit=self.Speak() twit=twit.rsplit(u"。") tNum=np.random.randint(1, min(4, len(twit))) tList=random.sample( range(len(twit)), tNum) ttt=[twit[i] for i in tList] tweet=ttt[0] try: Twitter.twit_post(tweet) except ReadTimeout as e: print ("ReadTimeout") print ("waiting 5 mins") time.sleep(5*60) except ConnectionError as e: print ("ConnectionError") print ("waiting 5mins") time.sleep(5*60) def Speak(self, seed=1, sample=1, length=200): ivocab = {} for c, i in self.vocab.items(): ivocab[i[0]] = c n_units = self.model.embed.W.data.shape[1] if self.gpu >= 0: cuda.get_device(gpu).use() self.model.to_gpu() # initialize generator state = self.model.make_initial_state(batchsize=1, train=False) if self.gpu >= 0: for key, value in state.items(): value.data = cuda.to_gpu(value.data) prev_char = np.array([0], dtype=np.int32) if self.gpu >= 0: prev_char = cuda.to_gpu(prev_char) primetext=random.sample(self.vocab.keys(),1) if len(primetext) > 0: sys.stdout.write(primetext[0]) prev_char = np.array([self.vocab[primetext[0]][0]], dtype=np.int32) if self.gpu >= 0: prev_char = cuda.to_gpu(prev_char) twit="" for i in range(length): state, prob = self.model.forward_one_step(prev_char,prev_char,state,train=False) if sample > 0: probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice(range(len(probability)), p=probability) else: index = np.argmax(cuda.to_cpu(prob.data)) prob.data[index]=0 index2 = np.argmax(cuda.to_cpu(prob.data)) prob.data[index2]=0 index3 = np.argmax(cuda.to_cpu(prob.data)) prob.data[index3]=0 index = random.sample([index,index2,index3],1)[0] if index != 0: twit+=ivocab[index] prev_char = np.array([index], dtype=np.int32) if self.gpu >= 0: prev_char = cuda.to_gpu(prev_char) return twit def _train(self, **kwargs): gpu = -1 if "gpu" not in kwargs else kwargs["gpu"] lr = 2e-3 if "lr" not in kwargs else kwargs["lr"] lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"] lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"] decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"] dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"] bprop_len = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"] batchsize = 50 if "batchsize" not in kwargs else kwargs["batchsize"] grad_clip = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"] n_epochs = 5 if "epochs" not in kwargs else kwargs["epochs"] if gpu >= 0: cuda.get_device(gpu).use() self.model.to_gpu() optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8) optimizer.setup(self.model) train_data = self.dataset whole_len = train_data.shape[0] jump = whole_len // batchsize epoch = 0 start_at = time.time() cur_at = start_at state = self.model.make_initial_state(batchsize=batchsize) if gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data)#plist else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print ('going to train {} iterations'.format(jump * n_epochs)) for i in range(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in range(batchsize)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in range(batchsize)]) if gpu >=0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout) accum_loss += loss_i if (i + 1) % bprop_len == 0: # Run truncated BPTT now = time.time() sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at)) sys.stderr.flush() cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) % 10000 == 0: pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb')) if (i + 1) % jump == 0: epoch += 1 if epoch >= lr_decay_after: optimizer.lr *= lr_decay print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr)) sys.stdout.flush() pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
parser.add_argument('--init_from', type=str, default='') args = parser.parse_args() n_epochs = 1000 n_units = 1024 batchsize = 50 bprop_len = 50 grad_clip = 5 dataset, word2num, num2word = load_data() if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(word2num) + 1, n_units) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model) whole_len = dataset.shape[0] jump = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at
'No GPU available, training on CPU; consider making n_epochs very small.' ) # define the network with PyTorch from CharRNN import CharRNN # train the function from train import train # instantiating the model # set model hyperparameters n_hidden = 256 n_layers = 2 net = CharRNN(chars, n_hidden, n_layers) print(net) # set training hyperparameters batch_size = 10 seq_length = 100 n_epochs = 20 # train the model train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=10)
break for l in range(len(keyList)): key = keyList[l] img = original[:, colListDefault[l][0]:colListDefault[l][1], :] y = discrimination_one_image(img) word = unichr(int(num2code[key], 16)) wordList.append(word) imgMatrix.append(img) yMatrix.append(max(y)) accMatrix.append(max(max(y))) ## 文脈補正 word2num = pickle.load(open('word2num.pkl', 'rb')) model = CharRNN(len(word2num) + 1, 1024) serializers.load_npz("latestmodel20171108.npz", model) for i in range(len(wordList) - 1): x_batch = np.array([word2num[wordList[i]]]) y_batch = np.array([word2num[wordList[i + 1]]]) x = chainer.Variable(x_batch) t = chainer.Variable(y_batch) y = model(x, train=False) loss = F.softmax_cross_entropy(y, t).data if loss < 10: continue if accMatrix[i + 1] > 0.99: continue if loss < 15 and accMatrix[i + 1] > 0.9: continue yArgSort = np.argsort(yMatrix[i + 1])[::-1]
# -*- coding: utf-8 -*- from chainer import cuda, Variable, optimizers import chainer.functions as F from CharRNN import CharRNN from chainer import serializers model = CharRNN(len(word2num) + 1, n_units) serializers.load_npz("latestmodel20171108.npz", model) sentence = [] sentence.append('す') sentence.append('る') sentence.append('検') sentence.append('討') sentence.append('会') sentence.append('」') sentence.append('は') sentence.append('2') sentence.append('日') sentence.append('、') sentence.append('孑') loss = 0 for i in range(len(sentence) - 1): sent = sentence[i] sentNext = sentence[i + 1] x_batch = np.array([word2num[sent.decode('utf-8')]]) y_batch = np.array([word2num[sentNext.decode('utf-8')]]) x = Variable(x_batch)