# Build Model and Reward from config
    actor = Actor(config)

    print("Starting training...")
    with tf.Session() as sess: #tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        tf.global_variables_initializer().run() #tf.initialize_all_variables().run()
        print_config()

        solver = Solver(actor.max_length)
        training_set = DataGenerator(solver)

        nb_epoch=2
        for i in tqdm(range(nb_epoch)): # epoch i

            # Get feed_dict
            coord_batch = training_set.next_batch(actor.batch_size, actor.max_length, actor.input_dimension, seed=1)
            feed = {actor.input_coordinates: coord_batch}

            #permutation = sess.run(actor.positions,feed_dict=feed)
            #print('\n Permutation \n',permutation)
            distances, reward = sess.run([actor.distances, actor.reward],feed_dict=feed)
            permutation = sess.run(actor.positions,feed_dict=feed)
            lp = sess.run(actor.log_softmax,feed_dict=feed)
            loss1 = sess.run(actor.loss1,feed_dict=feed)

            #ask = sess.run(actor.ptr.masked_distribution,feed_dict=feed)
            #print(' Masked distribution (last) \n',mask)
            print(' Permutation \n',permutation)
            print(' Reward \n',reward)
            print(' LP \n',lp)
            print(' Loss1 \n',loss1)
    print("Starting training...")
    with tf.Session() as sess:
        tf.global_variables_initializer().run() #tf.initialize_all_variables().run()
        print_config()

        nb_epoch = 1000

        average_loss = 0
        test_loss = 0

        for i in tqdm(range(nb_epoch)):

            # Generate instances
            training_set = DataGenerator(solver)
            coord_batch, dist_batch, input_batch, initial_tour_length = training_set.next_batch(batch_size, max_length, input_dimension, scale, n_components)
            optimal_tour_length = training_set.solve_batch(coord_batch)


            # Construct feed_dict
            feed = {critic.input_description: input_batch, critic.initial_tour_length: initial_tour_length, critic.optimal_tour_length: optimal_tour_length}

            # Run session
            predicted_tour_length, loss, train_step = sess.run([critic.predicted_tour_length, critic.loss, critic.train_step], feed_dict = feed)
            #predicted_improvement, optimal_improvement, loss, train_step = sess.run([critic.predicted_improvement, critic.optimal_improvement, critic.loss, critic.train_step], feed_dict=feed)
            #predicted_reward, optimal_reward, loss, train_step = sess.run([critic.predicted_reward, critic.optimal_reward, critic.loss, critic.train_step], feed_dict=feed)

            average_loss += loss[0]
            if i > 898:
                test_loss += loss[0]
예제 #3
0
    def train(self):
        self.build_graph('train')
        self.loss = self.compute_loss(self.preds, self.labels)

        self.learning_rate = tf.Variable(0.0, trainable=False)
        new_lr = tf.placeholder(tf.float32, shape=[], name='new_learning_rate')
        lr_update = tf.assign(self.learning_rate, new_lr)

        self.get_train_op()
        self.collect_summaries()
        with tf.name_scope("parameter_count"):
            parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                            for v in tf.trainable_variables()])
        self.saver = tf.train.Saver([var for var in tf.trainable_variables()] + \
                                    [self.global_step])
        sv = tf.train.Supervisor(logdir=self.opt.checkpoint_dir,
                                 save_summaries_secs=0,
                                 saver=None)
        with sv.managed_session() as sess:
            print 'Trainable variables:'
            for var in tf.trainable_variables():
                print var.name,
            print
            print 'parameter count =', sess.run(parameter_count)
            if self.opt.continue_train:
                print 'Resume training from previous checkpoint'
                checkpoint = tf.train.latest_checkpoint(
                    self.opt.checkpoint_dir)
                self.saver.restore(sess, checkpoint)
            start_time = time.time()
            acc_loss = 0
            lr_value = self.opt.learning_rate
            data_generator = DataGenerator(self.opt)
            self.total = data_generator.total
            self.opt.steps_per_epoch = int(self.total // self.opt.batch_size)

            for step in xrange(1, self.opt.max_steps):
                fetches = {
                    'train': self.train_op,
                    'global_step': self.global_step,
                    'incr_global_step': self.incr_global_step,
                    'loss': self.loss,
                    'lr': lr_update
                }

                if step % self.opt.summary_freq == 0:
                    fetches['summary'] = sv.summary_op
                # fetches['label'] = self.labels
                # fetches['name'] = self.image_paths
                # fetches['preds'] = self.preds

                feed_input, feed_label = data_generator.next_batch()

                results = sess.run(fetches,
                                   feed_dict={
                                       new_lr: lr_value,
                                       self.input: feed_input,
                                       self.labels: feed_label
                                   })
                gs = results['global_step']
                acc_loss += results['loss']

                if step % self.opt.summary_freq == 0:
                    sv.summary_writer.add_summary(results['summary'], gs)

                    train_epoch = int(math.ceil(gs / self.opt.steps_per_epoch))
                    train_step = gs - (train_epoch -
                                       1) * self.opt.steps_per_epoch

                    avg_loss = acc_loss / self.opt.summary_freq

                    print 'Epoch: {:2d} {:5d}/{:5d}  time: {:4.4f}s/iter  loss: {:.3f}' \
                            .format(train_epoch, train_step, self.opt.steps_per_epoch, \
                                    (time.time()-start_time)/self.opt.summary_freq, \
                                    avg_loss)

                    start_time = time.time()
                    acc_loss = 0

                # print results['label']
                # print self.preprocess_label(results['label'],self.opt.label_alpha,self.opt.label_beta)
                # print results['preds']

                if gs % self.opt.lr_step == 0:
                    lr_value = self.opt.learning_rate * (
                        self.opt.learning_rate_decay**int(
                            gs / self.opt.lr_step))
                    print '[*] Learning Rate Update to', lr_value

                if step % self.opt.save_latest_freq == 0:
                    self.save(sess, self.opt.checkpoint_dir, gs)
                #if step % self.opt.steps_per_epoch == 0:
                #    self.save(sess, self.opt.checkpoint_dir, gs)
            self.save(sess, self.opt.checkpoint_dir, gs + 1)
        print 'optimize done'