# Build Model and Reward from config actor = Actor(config) print("Starting training...") with tf.Session() as sess: #tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: tf.global_variables_initializer().run() #tf.initialize_all_variables().run() print_config() solver = Solver(actor.max_length) training_set = DataGenerator(solver) nb_epoch=2 for i in tqdm(range(nb_epoch)): # epoch i # Get feed_dict coord_batch = training_set.next_batch(actor.batch_size, actor.max_length, actor.input_dimension, seed=1) feed = {actor.input_coordinates: coord_batch} #permutation = sess.run(actor.positions,feed_dict=feed) #print('\n Permutation \n',permutation) distances, reward = sess.run([actor.distances, actor.reward],feed_dict=feed) permutation = sess.run(actor.positions,feed_dict=feed) lp = sess.run(actor.log_softmax,feed_dict=feed) loss1 = sess.run(actor.loss1,feed_dict=feed) #ask = sess.run(actor.ptr.masked_distribution,feed_dict=feed) #print(' Masked distribution (last) \n',mask) print(' Permutation \n',permutation) print(' Reward \n',reward) print(' LP \n',lp) print(' Loss1 \n',loss1)
print("Starting training...") with tf.Session() as sess: tf.global_variables_initializer().run() #tf.initialize_all_variables().run() print_config() nb_epoch = 1000 average_loss = 0 test_loss = 0 for i in tqdm(range(nb_epoch)): # Generate instances training_set = DataGenerator(solver) coord_batch, dist_batch, input_batch, initial_tour_length = training_set.next_batch(batch_size, max_length, input_dimension, scale, n_components) optimal_tour_length = training_set.solve_batch(coord_batch) # Construct feed_dict feed = {critic.input_description: input_batch, critic.initial_tour_length: initial_tour_length, critic.optimal_tour_length: optimal_tour_length} # Run session predicted_tour_length, loss, train_step = sess.run([critic.predicted_tour_length, critic.loss, critic.train_step], feed_dict = feed) #predicted_improvement, optimal_improvement, loss, train_step = sess.run([critic.predicted_improvement, critic.optimal_improvement, critic.loss, critic.train_step], feed_dict=feed) #predicted_reward, optimal_reward, loss, train_step = sess.run([critic.predicted_reward, critic.optimal_reward, critic.loss, critic.train_step], feed_dict=feed) average_loss += loss[0] if i > 898: test_loss += loss[0]
def train(self): self.build_graph('train') self.loss = self.compute_loss(self.preds, self.labels) self.learning_rate = tf.Variable(0.0, trainable=False) new_lr = tf.placeholder(tf.float32, shape=[], name='new_learning_rate') lr_update = tf.assign(self.learning_rate, new_lr) self.get_train_op() self.collect_summaries() with tf.name_scope("parameter_count"): parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in tf.trainable_variables()]) self.saver = tf.train.Saver([var for var in tf.trainable_variables()] + \ [self.global_step]) sv = tf.train.Supervisor(logdir=self.opt.checkpoint_dir, save_summaries_secs=0, saver=None) with sv.managed_session() as sess: print 'Trainable variables:' for var in tf.trainable_variables(): print var.name, print print 'parameter count =', sess.run(parameter_count) if self.opt.continue_train: print 'Resume training from previous checkpoint' checkpoint = tf.train.latest_checkpoint( self.opt.checkpoint_dir) self.saver.restore(sess, checkpoint) start_time = time.time() acc_loss = 0 lr_value = self.opt.learning_rate data_generator = DataGenerator(self.opt) self.total = data_generator.total self.opt.steps_per_epoch = int(self.total // self.opt.batch_size) for step in xrange(1, self.opt.max_steps): fetches = { 'train': self.train_op, 'global_step': self.global_step, 'incr_global_step': self.incr_global_step, 'loss': self.loss, 'lr': lr_update } if step % self.opt.summary_freq == 0: fetches['summary'] = sv.summary_op # fetches['label'] = self.labels # fetches['name'] = self.image_paths # fetches['preds'] = self.preds feed_input, feed_label = data_generator.next_batch() results = sess.run(fetches, feed_dict={ new_lr: lr_value, self.input: feed_input, self.labels: feed_label }) gs = results['global_step'] acc_loss += results['loss'] if step % self.opt.summary_freq == 0: sv.summary_writer.add_summary(results['summary'], gs) train_epoch = int(math.ceil(gs / self.opt.steps_per_epoch)) train_step = gs - (train_epoch - 1) * self.opt.steps_per_epoch avg_loss = acc_loss / self.opt.summary_freq print 'Epoch: {:2d} {:5d}/{:5d} time: {:4.4f}s/iter loss: {:.3f}' \ .format(train_epoch, train_step, self.opt.steps_per_epoch, \ (time.time()-start_time)/self.opt.summary_freq, \ avg_loss) start_time = time.time() acc_loss = 0 # print results['label'] # print self.preprocess_label(results['label'],self.opt.label_alpha,self.opt.label_beta) # print results['preds'] if gs % self.opt.lr_step == 0: lr_value = self.opt.learning_rate * ( self.opt.learning_rate_decay**int( gs / self.opt.lr_step)) print '[*] Learning Rate Update to', lr_value if step % self.opt.save_latest_freq == 0: self.save(sess, self.opt.checkpoint_dir, gs) #if step % self.opt.steps_per_epoch == 0: # self.save(sess, self.opt.checkpoint_dir, gs) self.save(sess, self.opt.checkpoint_dir, gs + 1) print 'optimize done'