def main(_): if FLAGS.num_gpus >= 1 and FLAGS.mode == "train": dev = "/gpu:0" elif FLAGS.num_gpus >= 2 and FLAGS.mode == "eval": dev = "/gpu:1" else: dev = "/cpu:0" if FLAGS.mode == "train": batch_size = FLAGS.batch_size elif FLAGS.mode == "eval": batch_size = 100 num_classes = 10 hps = HParams( batch_size=batch_size, num_classes=num_classes, min_lrn_rate=0.0001, lrn_rate=0.1, num_units=5, weight_decay_rate=0.0002, relu_leakiness=0.1, optimizer="mom", ) with tf.device(dev): if FLAGS.mode == "train": train(hps) elif FLAGS.mode == "eval": evaluate(hps)
def __init__(self): self.FLAGS = FLAGS self.batch_size = self.FLAGS.batch_size # training data self.vocab, self.ivocab, self.data = self.load_data( self.FLAGS.data_dir) self.dic_size = len(self.vocab) self.PAD_ID = self.vocab['PAD'] self.GO_ID = self.vocab['GO'] self.EOS_ID = self.vocab['</S>'] self.UNK_ID = self.vocab['UNK'] # development data pkl_file = open(self.FLAGS.data_dir + '/text_dev.pkl', 'rb') self.dev_data = pickle.load(pkl_file) pkl_file.close() print(np.shape(self.data)) #print(np.shape(self.dev_data)) self.batch_check = np.zeros(len(self.data)) self.batch_check2 = np.zeros(len(self.data), dtype=np.float32) # construct HParams self.hps = HParams(vocab_size=len(self.vocab), emb_size=self.FLAGS.emb_size, hidden_size=self.FLAGS.hidden_size, device=self.FLAGS.device, learning_rate=self.FLAGS.learning_rate, max_gradient_norm=self.FLAGS.max_gradient_norm, buckets=[(8, 9)], batch_size=self.FLAGS.batch_size, num_topic=self.FLAGS.num_topic, mode='train') print("Params sets: ") print("___________________") print("learning_rate:%s max_gradient_norm:%s " % (str(self.FLAGS.learning_rate), self.FLAGS.max_gradient_norm)) print("batch_size:%d" % (self.FLAGS.batch_size)) print("hidden_size:%d emb_size:%d " % (self.FLAGS.hidden_size, self.FLAGS.emb_size)) print("steps_per_checkpoint:%d" % (self.FLAGS.steps_per_checkpoint)) print("steps_per_sample:%d" % (self.FLAGS.steps_per_sample)) print("sample_num:%d" % (self.FLAGS.sample_num)) print("device:%s" % (self.FLAGS.device)) print("Vocabulary size: %d data size: %d " % (len(self.vocab), len(self.data))) print("___________________") self.buckets = self.buckets = [(8, 9)]
def __init__(self): self.FLAGS = FLAGS print(self.FLAGS.data_dir) self.vocab, self.ivocab = self.load_dic( self.FLAGS.data_dir) self.dic_size = len(self.vocab) self.hps = HParams( vocab_size=len(self.vocab), emb_size=self.FLAGS.emb_size, hidden_size=self.FLAGS.hidden_size, device=self.FLAGS.device, learning_rate=self.FLAGS.learning_rate, max_gradient_norm=self.FLAGS.max_gradient_norm, buckets=[(8, 9)], batch_size=self.FLAGS.batch_size, num_topic = self.FLAGS.num_topic, mode='decode' ) self.tool = PoetryTool() self.load_already=False
def main(): args = parser.parse_args() if os.path.isfile(args.base_model+'/hparams.json'): with open(args.base_model+'/hparams.json', encoding='utf-8') as f: params = json.loads(f.read()) hparams = HParams(**params) elif 'small' in args.base_model: hparams = HParams(**{ "n_vocab": n_vocab, "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_layer": 12 }) elif 'medium' in args.base_model: hparams = HParams(**{ "n_vocab": n_vocab, "n_ctx": 1024, "n_embd": 1024, "n_head": 16, "n_layer": 24 }) elif 'large' in args.base_model: hparams = HParams(**{ "n_vocab": n_vocab, "n_ctx": 1024, "n_embd": 1280, "n_head": 20, "n_layer": 36 }) else: raise ValueError('invalid model name.') config = tf.ConfigProto() if int(args.gpu) >= 0: config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = args.gpu with tf.Session(config=config,graph=tf.Graph()) as sess: context = tf.placeholder(tf.int32, [None, None]) output = model.model(hparams=hparams, X=context, past=None, reuse=tf.AUTO_REUSE) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=context[:, 1:], logits=output['logits'][:, :-1])) saver = tf.train.Saver() ckpt = tf.train.latest_checkpoint(args.base_model) saver.restore(sess, ckpt) train_vars = tf.trainable_variables() global_step = tf.Variable(0, trainable=False) if args.warmup_steps > 0: learning_rate = tf.compat.v1.train.polynomial_decay( learning_rate=1e-10, end_learning_rate=args.learning_rate, global_step=global_step, decay_steps=args.warmup_steps ) else: learning_rate = args.learning_rate if args.optim=='adam': opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.98, epsilon=1e-7) elif args.optim=='adagrad': opt = tf.train.AdagradOptimizer(learning_rate=learning_rate) elif args.optim=='sgd': opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) else: raise ValueError('invalid optimizer name.') train_vars = tf.trainable_variables() opt_grads = tf.gradients(loss, train_vars) opt_grads = list(zip(opt_grads, train_vars)) opt_apply = opt.apply_gradients(opt_grads) summaries = tf.summary.scalar('loss', loss) summary_log = tf.summary.FileWriter( os.path.join(CHECKPOINT_DIR, args.run_name)) saver = tf.train.Saver( var_list=train_vars, max_to_keep=5, keep_checkpoint_every_n_hours=2) sess.run(tf.global_variables_initializer()) ckpt = tf.train.latest_checkpoint(args.base_model) saver.restore(sess, ckpt) print('Loading checkpoint', ckpt) print('Loading dataset...') global_chunks = [] with np.load(args.dataset) as npz: for inditem, item in enumerate(npz.files): token_chunk = npz[item] current_token = [] for ind in range(0,len(token_chunk)): current_token.append(np.uint16(token_chunk[ind])) if len(current_token) == hparams.n_ctx: global_chunks.append(current_token) current_token = [] global_chunk_index = np.random.permutation(len(global_chunks)) global_chunk_step = 0 print('Training...') def sample_feature(): nonlocal global_chunks,global_chunk_index,global_chunk_step p_input_ids = [] for b in range(args.batch_size): # FULL-SENTENCES idx = global_chunk_index[global_chunk_step] global_chunk_step += 1 if global_chunk_step >= len(global_chunk_index): global_chunk_step = 0 global_chunk_index = np.random.permutation(len(global_chunks)) sampled_token = global_chunks[idx] # Make Sequence ids = copy(global_chunks[idx]) p_input_ids.append(ids) return {context:p_input_ids} counter = 1 counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter') hparams_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'hparams.json') if os.path.exists(counter_path): # Load the step number if we're resuming a run # Add 1 so we don't immediately try to save again with open(counter_path, 'r', encoding='utf-8') as fp: counter = int(fp.read()) + 1 maketree(os.path.join(CHECKPOINT_DIR, args.run_name)) def save(): maketree(os.path.join(CHECKPOINT_DIR, args.run_name)) print( 'Saving', os.path.join(CHECKPOINT_DIR, args.run_name, 'model-{}').format(counter)) saver.save( sess, os.path.join(CHECKPOINT_DIR, args.run_name, 'model'), global_step=counter) with open(counter_path, 'w', encoding='utf-8') as fp: fp.write(str(counter) + '\n') with open(hparams_path, 'w', encoding='utf-8') as fp: fp.write(json.dumps({ "n_vocab": int(hparams.n_vocab), "n_ctx": int(hparams.n_ctx), "n_embd": int(hparams.n_embd), "n_head": int(hparams.n_head), "n_layer": int(hparams.n_layer), })) avg_loss = (0.0, 0.0) start_time = time.time() try: while True: if counter % args.save_every == 0: save() (_, v_loss, v_summary) = sess.run( (opt_apply, loss, summaries), feed_dict=sample_feature()) summary_log.add_summary(v_summary, counter) avg_loss = (avg_loss[0] * 0.99 + v_loss, avg_loss[1] * 0.99 + 1.0) print( '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}' .format( counter=counter, time=time.time() - start_time, loss=v_loss, avg=avg_loss[0] / avg_loss[1])) counter = counter+1 if args.warmup_steps > 0: global_step = global_step+1 except KeyboardInterrupt: print('interrupted') save()
bpe = f.read().split('\n') with open('emoji.json', encoding='utf-8') as f: emoji = json.loads(f.read()) enc = BPEEncoder_ja(bpe, emoji) n_vocab = len(enc) eot_token = enc.encode('<|endoftext|>')[0] sep_token = enc.encode('<|byte0|>')[0] temperature = args.temperature top_k = args.top_k top_p = args.top_p min_answer_len = args.min_answer_len if os.path.isfile(args.model + '/hparams.json'): with open(args.model + '/hparams.json') as f: params = json.loads(f.read()) hparams = HParams(**params) n_prediction = params['n_prediction'] elif 'small' in args.model: hparams = HParams( **{ "n_vocab": n_vocab, "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_layer": 12 }) n_prediction = args.max_answer_len elif 'medium' in args.model: hparams = HParams( **{ "n_vocab": n_vocab,
parser.add_argument('--gpu', type=str, default='0') args = parser.parse_args() with open('ja-bpe.txt', encoding='utf-8') as f: bpe = f.read().split('\n') with open('emoji.json', encoding='utf-8') as f: emoji = json.loads(f.read()) enc = BPEEncoder_ja(bpe, emoji) n_vocab = len(enc) if os.path.isfile(args.model + '/hparams.json'): with open(args.model + '/hparams.json') as f: params = json.loads(f.read()) hparams = HParams(**params) max_length = params['n_prediction'] else: raise ValueError('invalid model name.') length = hparams.n_ctx - max_length - 1 temperature = args.temperature top_k = args.top_k top_p = args.top_p SEP_TOKEN = enc.encode('<|byte0|>')[0] def generate_one(sess, output): context_tokens = enc.encode(args.context) if len(context_tokens) > length: context_tokens = context_tokens[:length]
class GeneratorUI(object): def __init__(self): self.FLAGS = FLAGS print(self.FLAGS.data_dir) self.vocab, self.ivocab = self.load_dic( self.FLAGS.data_dir) self.dic_size = len(self.vocab) self.hps = HParams( vocab_size=len(self.vocab), emb_size=self.FLAGS.emb_size, hidden_size=self.FLAGS.hidden_size, device=self.FLAGS.device, learning_rate=self.FLAGS.learning_rate, max_gradient_norm=self.FLAGS.max_gradient_norm, buckets=[(8, 9)], batch_size=self.FLAGS.batch_size, num_topic = self.FLAGS.num_topic, mode='decode' ) self.tool = PoetryTool() self.load_already=False def load_dic(self, file_dir): """ loading training data, including vocab, inverting vocab and corpus """ vocab_file = open(file_dir + '/vocab.pkl', 'rb') dic = pickle.load(vocab_file,encoding='utf8') vocab_file.close() ivocab_file = open(file_dir + '/ivocab.pkl', 'rb') idic = pickle.load(ivocab_file,encoding='utf8') ivocab_file.close() return dic, idic def load_model(self, session, beam_size): """load parameters in session.""" decode_hps = self.hps._replace(batch_size=beam_size) model = PoemModel(decode_hps) ckpt = tf.train.get_checkpoint_state("model/") if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) else: raise ValueError("%s not found! " % ckpt.model_checkpoint_path) return model def generate_one(self, all_topic): # generate poems using cmd line beam_size = input("please input beam size>") beam_size = int(beam_size) self.sess = tf.InteractiveSession(graph=tf.Graph()) self.model = self.load_model(self.sess, beam_size) self.generator = Generator( self.vocab, self.ivocab, self.hps, self.model, self.sess) while True: sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() ans, info = self.generator.generate_one(sentence, beam_size=beam_size, all_topic=all_topic, manu=False) if len(ans) == 0: print("generation failed!") print(info) continue for sen in ans: print(sen) def generate_whole_file(self, infile, outfile, all_topic, beam_size): # generate poems given the first sentences from a file self.sess = tf.InteractiveSession(graph=g1) self.model = self.load_model(self.sess, beam_size) self.generator = Generator( self.vocab, self.ivocab, self.hps, self.model, self.sess) fin = open(infile, 'r') lines = fin.readlines() fin.close() for manu in range(10): fout = open(outfile+str(manu)+".txt", 'w') for line in lines: line = line.strip() if len(line)<5: continue if line == "failed!": continue #try: ans, info = self.generator.generate_one(line, manu, beam_size, all_topic, False) if len(ans) == 0: fout.write(info + "\n") else: fout.write(" ".join(ans) + "\n") fout.flush() #except: # print(line) fout.close()
def main(): args = parser.parse_args() if 'small' in args.base_model: hparams = HParams( **{ "n_vocab": n_vocab, "n_ctx": 1024, "n_embd": 768, "n_head": 12, "n_layer": 12 }) elif 'medium' in args.base_model: hparams = HParams( **{ "n_vocab": n_vocab, "n_ctx": 1024, "n_embd": 1024, "n_head": 16, "n_layer": 24 }) elif 'large' in args.base_model: hparams = HParams( **{ "n_vocab": n_vocab, "n_ctx": 1024, "n_embd": 1280, "n_head": 20, "n_layer": 36 }) else: raise ValueError('invalid model name.') max_answer_len = args.max_answer_len batch_size = args.batch_size max_seq_length = hparams.n_ctx if args.train_type == 'QtoA': index_q = 0 index_a = 1 max_q = max_seq_length - args.max_answer_len max_a = args.max_answer_len elif args.train_type == 'AtoQ': index_q = 1 index_a = 0 max_q = args.max_answer_len max_a = max_seq_length - args.max_answer_len else: raise ValueError('invalid train type.') config = tf.ConfigProto() if int(args.gpu) >= 0: config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = args.gpu with tf.Session(config=config) as sess: input_ids = tf.placeholder(tf.int32, [batch_size, None]) masked_lm_positions = tf.placeholder(tf.int32, [batch_size, None]) masked_lm_ids = tf.placeholder(tf.int32, [batch_size, None]) masked_lm_weights = tf.placeholder(tf.float32, [batch_size, None]) output = model.model(hparams=hparams, X=input_ids, past=None, reuse=tf.AUTO_REUSE) (loss, _, _) = get_masked_lm_output(hparams=hparams, logits=output['logits'], positions=masked_lm_positions, label_ids=masked_lm_ids, label_weights=masked_lm_weights) train_vars = tf.trainable_variables() global_step = tf.Variable(0, trainable=False) if args.warmup_steps > 0: learning_rate = tf.compat.v1.train.polynomial_decay( learning_rate=1e-10, end_learning_rate=args.learning_rate, global_step=global_step, decay_steps=args.warmup_steps) else: learning_rate = args.learning_rate if args.optim == 'adam': opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.99, epsilon=1e-7) elif args.optim == 'adagrad': opt = tf.train.AdagradOptimizer(learning_rate=learning_rate) elif args.optim == 'sgd': opt = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) else: raise ValueError('invalid optimizer name.') train_vars = tf.trainable_variables() opt_grads = tf.gradients(loss, train_vars) opt_grads = list(zip(opt_grads, train_vars)) opt_apply = opt.apply_gradients(opt_grads) summaries = tf.summary.scalar('loss', loss) summary_log = tf.summary.FileWriter( os.path.join(CHECKPOINT_DIR, args.run_name)) saver = tf.train.Saver(var_list=train_vars, max_to_keep=5, keep_checkpoint_every_n_hours=2) sess.run(tf.global_variables_initializer()) ckpt = tf.train.latest_checkpoint(args.base_model) saver.restore(sess, ckpt) print('Loading checkpoint', ckpt) print('Loading dataset...') global_chunks = [] for fn in glob.glob(args.dataset): with open(fn, 'rb') as f: for p in pickle.load(f): if len(p[0]) > 0 and len([1]) > 0: if p[0][-1] != eot_token: p[0].append(eot_token) if p[1][-1] != eot_token: p[1].append(eot_token) global_chunks.append(p) global_chunk_index = np.random.permutation(len(global_chunks)) global_chunk_step = 0 print('There is', len(global_chunks), 'chunks.') print('Training...') def sample_feature(): nonlocal global_chunks, global_chunk_index, global_chunk_step p_input_ids = [] p_masked_lm_positions = [] p_masked_lm_ids = [] p_masked_lm_weights = [] for b in range(batch_size): idx = global_chunk_index[global_chunk_step] global_chunk_step += 1 if global_chunk_step >= len(global_chunk_index): global_chunk_step = 0 global_chunk_index = np.random.permutation( len(global_chunks)) sampled_tokens = global_chunks[idx] # Make Sequence ids = copy(sampled_tokens[index_q]) if len(ids) > max_q: ids = ids[:max_q] ids[-1] = sep_token lm_ids = copy(sampled_tokens[index_a]) if len(lm_ids) > max_a: lm_ids = lm_ids[:max_a] lm_weights = [1.0] * len(lm_ids) lm_positions = list( range(len(ids) - 1, len(ids) - 1 + len(lm_ids), 1)) while len(lm_positions) < max_answer_len: lm_positions.append(0) lm_ids.append(0) lm_weights.append(0.0) ids = ids + lm_ids while len(ids) < max_seq_length: ids.append(eot_token) p_input_ids.append(ids) p_masked_lm_positions.append(lm_positions) p_masked_lm_ids.append(lm_ids) p_masked_lm_weights.append(lm_weights) return { input_ids: p_input_ids, masked_lm_positions: p_masked_lm_positions, masked_lm_ids: p_masked_lm_ids, masked_lm_weights: p_masked_lm_weights } counter = 1 counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter') hparams_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'hparams.json') if os.path.exists(counter_path): # Load the step number if we're resuming a run # Add 1 so we don't immediately try to save again with open(counter_path, 'r') as fp: counter = int(fp.read()) + 1 def save(): maketree(os.path.join(CHECKPOINT_DIR, args.run_name)) print( 'Saving', os.path.join(CHECKPOINT_DIR, args.run_name, 'model-{}').format(counter)) saver.save(sess, os.path.join(CHECKPOINT_DIR, args.run_name, 'model'), global_step=counter) with open(counter_path, 'w') as fp: fp.write(str(counter) + '\n') with open(hparams_path, 'w') as fp: fp.write( json.dumps({ "n_vocab": int(hparams.n_vocab), "n_ctx": int(hparams.n_ctx), "n_embd": int(hparams.n_embd), "n_head": int(hparams.n_head), "n_layer": int(hparams.n_layer), "n_prediction": int(max_answer_len), })) avg_loss = (0.0, 0.0) start_time = time.time() try: while True: if counter % args.save_every == 0: save() (_, v_loss, v_summary) = sess.run((opt_apply, loss, summaries), feed_dict=sample_feature()) summary_log.add_summary(v_summary, counter) avg_loss = (avg_loss[0] * 0.99 + v_loss, avg_loss[1] * 0.99 + 1.0) print( '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}' .format(counter=counter, time=time.time() - start_time, loss=v_loss, avg=avg_loss[0] / avg_loss[1])) counter = counter + 1 if args.warmup_steps > 0: global_step = global_step + 1 if args.max_train_steps > 0 and args.max_train_steps <= counter: save() break except KeyboardInterrupt: print('interrupted') save()