def pretrain(self, sess, generator, train_discriminator): # samples = generate_samples(sess, BATCH_SIZE, generated_num) self.gen_loader.create_batches(self.positive_samples) results = OrderedDict({'exp_name': self.PREFIX}) # pre-train generator print('Start pre-training...') start = time.time() for epoch in tqdm(range(self.PRE_EPOCH_NUM)): print(' gen pre-train') loss = self.pre_train_epoch(sess, generator, self.gen_loader) if epoch == 10 or epoch % 40 == 0: samples = self.generate_samples(sess, generator, self.BATCH_SIZE, self.SAMPLE_NUM) self.gen_loader.create_batches(samples) print('\t train_loss {}'.format(loss)) mm.compute_results(samples, self.train_samples, self.ord_dict, results) samples = self.generate_samples(sess, generator, self.BATCH_SIZE, self.SAMPLE_NUM) self.gen_loader.create_batches(samples) samples = self.generate_samples(sess, generator, self.BATCH_SIZE, self.SAMPLE_NUM) self.gen_loader.create_batches(samples) print('Start training discriminator...') for i in tqdm(range(self.dis_alter_epoch)): print(' discriminator pre-train') d_loss, acc = train_discriminator() end = time.time() print('Total time was {:.4f}s'.format(end - start)) return
def train(self, ckpt_dir='checkpoints/'): """Trains the model. If necessary, also includes pretraining.""" if not self.PRETRAINED and not self.SESS_LOADED: self.sess.run(tf.global_variables_initializer()) self.pretrain() if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) ckpt_file = os.path.join(ckpt_dir, '{}_pretrain_ckpt'.format(self.PREFIX)) saver = tf.train.Saver() path = saver.save(self.sess, ckpt_file) if self.verbose: print('Pretrain saved at {}'.format(path)) if not hasattr(self, 'rollout'): self.rollout = Rollout(self.generator, 0.8, self.PAD_NUM) if self.verbose: print('\nSTARTING TRAINING') print('============================\n') results_rows = [] for nbatch in tqdm(range(self.TOTAL_BATCH)): results = OrderedDict({'exp_name': self.PREFIX}) metric = self.EDUCATION[nbatch] if metric in self.AV_METRICS.keys(): reward_func = self.AV_METRICS[metric] else: raise ValueError('Metric {} not found!'.format(metric)) if self.kwargs[metric] is not None: def batch_reward(samples): decoded = [ mm.decode(sample, self.ord_dict) for sample in samples ] pct_unique = len(list(set(decoded))) / float(len(decoded)) rewards = reward_func(decoded, self.train_samples, **self.kwargs[metric]) weights = np.array([ pct_unique / float(decoded.count(sample)) for sample in decoded ]) return rewards * weights else: def batch_reward(samples): decoded = [ mm.decode(sample, self.ord_dict) for sample in samples ] pct_unique = len(list(set(decoded))) / float(len(decoded)) rewards = reward_func(decoded, self.train_samples) weights = np.array([ pct_unique / float(decoded.count(sample)) for sample in decoded ]) return rewards * weights if nbatch % 10 == 0: gen_samples = self.generate_samples(self.BIG_SAMPLE_NUM) else: gen_samples = self.generate_samples(self.SAMPLE_NUM) self.gen_loader.create_batches(gen_samples) results['Batch'] = nbatch print('Batch n. {}'.format(nbatch)) print('============================\n') # results mm.compute_results(gen_samples, self.train_samples, self.ord_dict, results) for it in range(self.GEN_ITERATIONS): samples = self.generator.generate(self.sess) rewards = self.rollout.get_reward(self.sess, samples, 16, self.discriminator, batch_reward, self.LAMBDA) nll = self.generator.generator_step(self.sess, samples, rewards) print('Rewards') print('~~~~~~~~~~~~~~~~~~~~~~~~\n') np.set_printoptions(precision=3, suppress=True) mean_r, std_r = np.mean(rewards), np.std(rewards) min_r, max_r = np.min(rewards), np.max(rewards) print('Mean: {:.3f}'.format(mean_r)) print(' +/- {:.3f}'.format(std_r)) print('Min: {:.3f}'.format(min_r)) print('Max: {:.3f}'.format(max_r)) np.set_printoptions(precision=8, suppress=False) results['neg-loglike'] = nll self.rollout.update_params() # generate for discriminator if self.LAMBDA != 0: print('\nDISCRIMINATOR TRAINING') print('============================\n') for i in range(self.DIS_EPOCHS): print('Discriminator epoch {}...'.format(i + 1)) negative_samples = self.generate_samples(self.POSITIVE_NUM) dis_x_train, dis_y_train = self.dis_loader.load_train_data( self.positive_samples, negative_samples) dis_batches = self.dis_loader.batch_iter( zip(dis_x_train, dis_y_train), self.DIS_BATCH_SIZE, self.DIS_EPOCHS) for batch in dis_batches: x_batch, y_batch = zip(*batch) feed = { self.discriminator.input_x: x_batch, self.discriminator.input_y: y_batch, self.discriminator.dropout_keep_prob: self.DIS_DROPOUT } _, step, d_loss, accuracy = self.sess.run([ self.dis_train_op, self.dis_global_step, self.discriminator.loss, self.discriminator.accuracy ], feed) results['D_loss_{}'.format(i)] = d_loss results['Accuracy_{}'.format(i)] = accuracy print('\nDiscriminator trained.') results_rows.append(results) if nbatch % self.EPOCH_SAVES == 0 or \ nbatch == self.TOTAL_BATCH - 1: if results_rows is not None: df = pd.DataFrame(results_rows) df.to_csv('{}_results.csv'.format(self.folder), index=False) if nbatch is None: label = 'final' else: label = str(nbatch) # save models model_saver = tf.train.Saver() ckpt_dir = os.path.join(self.CHK_PATH, self.folder) if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) ckpt_file = os.path.join( ckpt_dir, '{}_{}.ckpt'.format(self.PREFIX, label)) path = model_saver.save(self.sess, ckpt_file) print('\nModel saved at {}'.format(path)) print('\n######### FINISHED #########')
def train(self): print( '#########################################################################' ) print('Start Reinforcement Training Generator...') results_rows = [] for nbatch in tqdm(range(self.TOTAL_BATCH)): results = OrderedDict({'exp_name': self.PREFIX}) batch_reward = self.make_reward(self.train_samples, nbatch) if nbatch % 1 == 0 or nbatch == self.TOTAL_BATCH - 1: print('* Making samples') if nbatch % 10 == 0: gen_samples = self.generate_samples( self.sess, self.generator, self.BATCH_SIZE, self.BIG_SAMPLE_NUM) else: gen_samples = self.generate_samples( self.sess, self.generator, self.BATCH_SIZE, self.SAMPLE_NUM) self.gen_loader.create_batches(gen_samples) print('batch_num: {}'.format(nbatch)) results['Batch'] = nbatch # results mm.compute_results(gen_samples, self.train_samples, self.ord_dict, results) print( '#########################################################################' ) print('-> Training generator with RL.') print('G Epoch {}'.format(nbatch)) for it in range(self.TRAIN_ITER): samples = self.generator.generate(self.sess) rewards = self.rollout.get_reward(self.sess, samples, 16, self.discriminator, batch_reward, self.D_WEIGHT) nll = self.generator.generator_step(self.sess, samples, rewards) # results self.print_rewards(rewards) print('neg-loglike: {}'.format(nll)) results['neg-loglike'] = nll self.rollout.update_params() # generate for discriminator print('-> Training Discriminator') for i in range(self.D): print('D_Epoch {}'.format(i)) d_loss, accuracy = self.train_discriminator() results['D_loss_{}'.format(i)] = d_loss results['Accuracy_{}'.format(i)] = accuracy print('results') results_rows.append(results) if nbatch % self.params["EPOCH_SAVES"] == 0: self.save_results(self.sess, self.PREFIX, self.PREFIX + '_model', results_rows, nbatch) # write results self.save_results(self.sess, self.PREFIX, self.PREFIX + '_model', results_rows) print('\n:*** FINISHED ***')