예제 #1
0
def main(_):
  """Run td3/ddpg evaluation."""
  contrib_eager_python_tfe.enable_eager_execution()

  if FLAGS.use_gpu:
    tf.device('/device:GPU:0').__enter__()

  tf.gfile.MakeDirs(FLAGS.log_dir)
  summary_writer = contrib_summary.create_file_writer(
      FLAGS.log_dir, flush_millis=10000)

  env = gym.make(FLAGS.env)
  if FLAGS.wrap_for_absorbing:
    env = lfd_envs.AbsorbingWrapper(env)

  obs_shape = env.observation_space.shape
  act_shape = env.action_space.shape

  with tf.variable_scope('actor'):
    actor = Actor(obs_shape[0], act_shape[0])

  random_reward, _ = do_rollout(
      env, actor, None, num_trajectories=10, sample_random=True)

  reward_scale = contrib_eager_python_tfe.Variable(1, name='reward_scale')
  saver = contrib_eager_python_tfe.Saver(actor.variables + [reward_scale])

  last_checkpoint = tf.train.latest_checkpoint(FLAGS.load_dir)
  with summary_writer.as_default():
    while True:
      last_checkpoint = wait_for_next_checkpoint(FLAGS.load_dir,
                                                 last_checkpoint)

      total_numsteps = int(last_checkpoint.split('-')[-1])

      saver.restore(last_checkpoint)

      average_reward, average_length = do_rollout(
          env, actor, None, noise_scale=0.0, num_trajectories=FLAGS.num_trials)

      logging.info(
          'Evaluation: average episode length %d, average episode reward %f',
          average_length, average_reward)

      print('Evaluation: average episode length {}, average episode reward {}'.
            format(average_length, average_reward))

      with contrib_summary.always_record_summaries():
        if reward_scale.numpy() != 1.0:
          contrib_summary.scalar(
              'reward/scaled', (average_reward - random_reward) /
              (reward_scale.numpy() - random_reward),
              step=total_numsteps)
        contrib_summary.scalar('reward', average_reward, step=total_numsteps)
        contrib_summary.scalar('length', average_length, step=total_numsteps)
예제 #2
0
 def restore_model(self, checkpoint_name=None):
     """ Function to restore trained model."""
     with tf.device(self._device):
         dummy_input = tf.constant(
             tf.zeros((5, 37, 37, self._num_input_channels)))
         dummy_pred = self.call(dummy_input, dummy_input, training=False)
         checkpoint_path = join(self._exp_dir, 'checkpoints')
         if checkpoint_name is None:
             checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
         else:
             checkpoint_path = join(checkpoint_path, checkpoint_name)
         tfe.Saver(self.variables).restore(checkpoint_path)
def main(_):
    tf.enable_eager_execution()

    envs = [
        'HalfCheetah-v1', 'Hopper-v1', 'Ant-v1', 'Walker2d-v1', 'Reacher-v1'
    ]
    for ienv, env in enumerate(envs):
        print('Processing environment %d of %d: %s' %
              (ienv + 1, len(envs), env))
        h5_filename = os.path.join(FLAGS.src_data_dir, '%s.h5' % env)
        trajectories = h5py.File(h5_filename, 'r')

        if (set(trajectories.keys()) != set(
            ['a_B_T_Da', 'len_B', 'obs_B_T_Do', 'r_B_T'])):
            raise ValueError('Unexpected key set in file %s' % h5_filename)

        replay_buffer = ReplayBuffer()

        if env.find('Reacher') > -1:
            max_len = 50
        else:
            max_len = 1000

        for i in range(50):
            print('  Processing trajectory %d of 50 (len = %d)' %
                  (i + 1, trajectories['len_B'][i]))
            for j in range(trajectories['len_B'][i]):
                mask = 1
                if j + 1 == trajectories['len_B'][i]:
                    if trajectories['len_B'][i] == max_len:
                        mask = 1
                    else:
                        mask = 0
                replay_buffer.push_back(
                    trajectories['obs_B_T_Do'][i][j],
                    trajectories['a_B_T_Da'][i][j],
                    trajectories['obs_B_T_Do'][i][(j + 1) %
                                                  trajectories['len_B'][i]],
                    [trajectories['r_B_T'][i][j]], [mask],
                    j == trajectories['len_B'][i] - 1)

        replay_buffer_var = contrib_eager_python_tfe.Variable(
            '', name='expert_replay_buffer')
        saver = contrib_eager_python_tfe.Saver([replay_buffer_var])
        odir = os.path.join(FLAGS.dst_data_dir, env)
        print('Saving results to checkpoint in directory: %s' % odir)
        tf.gfile.MakeDirs(odir)
        replay_buffer_var.assign(pickle.dumps(replay_buffer))
        saver.save(os.path.join(odir, 'expert_replay_buffer'))
예제 #4
0
def main(_):
    tfe.enable_eager_execution()

    if not FLAGS.data_path:
        raise ValueError("Must specify --data_path")
    corpus = Corpus(FLAGS.data_path)
    # TODO(ashankar): Remove _batchify and _get_batch and use the Datasets API
    # instead.
    train_data = _batchify(corpus.train, FLAGS.batch_size)
    eval_data = _batchify(corpus.valid, 10)

    have_gpu = tfe.num_gpus() > 0
    use_cudnn_rnn = not FLAGS.no_use_cudnn_rnn and have_gpu

    with tfe.restore_variables_on_create(
            tf.train.latest_checkpoint(FLAGS.logdir)):
        with tf.device("/device:GPU:0" if have_gpu else None):
            # Make learning_rate a Variable so it can be included in the checkpoint
            # and we can resume training with the last saved learning_rate.
            learning_rate = tfe.Variable(20.0, name="learning_rate")
            sys.stderr.write("learning_rate=%f\n" % learning_rate.numpy())
            model = PTBModel(corpus.vocab_size(), FLAGS.embedding_dim,
                             FLAGS.hidden_dim, FLAGS.num_layers, FLAGS.dropout,
                             use_cudnn_rnn)
            optimizer = tf.train.GradientDescentOptimizer(learning_rate)

            best_loss = None
            for _ in range(FLAGS.epoch):
                train(model, optimizer, train_data, FLAGS.seq_len, FLAGS.clip)
                eval_loss = evaluate(model, eval_data)
                if not best_loss or eval_loss < best_loss:
                    if FLAGS.logdir:
                        tfe.Saver(model.trainable_weights +
                                  [learning_rate]).save(
                                      os.path.join(FLAGS.logdir, "ckpt"))
                    best_loss = eval_loss
                else:
                    learning_rate.assign(learning_rate / 4.0)
                    sys.stderr.write(
                        "eval_loss did not reduce in this epoch, "
                        "changing learning rate to %f for the next epoch\n" %
                        learning_rate.numpy())
예제 #5
0
        x = self.block1(inputs)
        x = self.block2(x)

        x = self.pool(x)
        output = self.classifier(x)
        output = tf.keras.activations.softmax(output)

        return output


if __name__ == '__main__':
    model = CNN(num_classes)

    model.compile(optimizer=tf.train.AdamOptimizer(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit(x_train,
              y_train_ohe,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test_ohe),
              verbose=1)

    scores = model.evaluate(x_test,
                            y_test_ohe,
                            batch_size=batch_size,
                            verbose=1)
    print('Final test loss and accuracy :', scores)

    saver = tfe.Saver(model.variables)
    saver.save('weights/k_04_02_cnn_block/weights.ckpt')
예제 #6
0
def main(_):
    """Run td3/ddpg training."""
    contrib_eager_python_tfe.enable_eager_execution()

    if FLAGS.use_gpu:
        tf.device('/device:GPU:0').__enter__()

    tf.gfile.MakeDirs(FLAGS.log_dir)
    summary_writer = contrib_summary.create_file_writer(FLAGS.log_dir,
                                                        flush_millis=10000)

    tf.set_random_seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)
    random.seed(FLAGS.seed)

    env = gym.make(FLAGS.env)
    env.seed(FLAGS.seed)

    if FLAGS.env in ['HalfCheetah-v2', 'Ant-v1']:
        rand_actions = int(1e4)
    else:
        rand_actions = int(1e3)

    obs_shape = env.observation_space.shape
    act_shape = env.action_space.shape

    if FLAGS.algo == 'td3':
        model = ddpg_td3.DDPG(obs_shape[0],
                              act_shape[0],
                              use_td3=True,
                              policy_update_freq=2,
                              actor_lr=1e-3)
    else:
        model = ddpg_td3.DDPG(obs_shape[0],
                              act_shape[0],
                              use_td3=False,
                              policy_update_freq=1,
                              actor_lr=1e-4)

    replay_buffer_var = contrib_eager_python_tfe.Variable('',
                                                          name='replay_buffer')
    gym_random_state_var = contrib_eager_python_tfe.Variable(
        '', name='gym_random_state')
    np_random_state_var = contrib_eager_python_tfe.Variable(
        '', name='np_random_state')
    py_random_state_var = contrib_eager_python_tfe.Variable(
        '', name='py_random_state')

    saver = contrib_eager_python_tfe.Saver(
        model.variables + [replay_buffer_var] +
        [gym_random_state_var, np_random_state_var, py_random_state_var])
    tf.gfile.MakeDirs(FLAGS.save_dir)

    reward_scale = contrib_eager_python_tfe.Variable(1, name='reward_scale')
    eval_saver = contrib_eager_python_tfe.Saver(model.actor.variables +
                                                [reward_scale])
    tf.gfile.MakeDirs(FLAGS.eval_save_dir)

    last_checkpoint = tf.train.latest_checkpoint(FLAGS.save_dir)
    if last_checkpoint is None:
        replay_buffer = ReplayBuffer()
        total_numsteps = 0
        prev_save_timestep = 0
        prev_eval_save_timestep = 0
    else:
        saver.restore(last_checkpoint)
        replay_buffer = pickle.loads(zlib.decompress(
            replay_buffer_var.numpy()))
        total_numsteps = int(last_checkpoint.split('-')[-1])
        assert len(replay_buffer) == total_numsteps
        prev_save_timestep = total_numsteps
        prev_eval_save_timestep = total_numsteps
        env.unwrapped.np_random.set_state(
            pickle.loads(gym_random_state_var.numpy()))
        np.random.set_state(pickle.loads(np_random_state_var.numpy()))
        random.setstate(pickle.loads(py_random_state_var.numpy()))

    with summary_writer.as_default():
        while total_numsteps < FLAGS.training_steps:
            rollout_reward, rollout_timesteps = do_rollout(
                env,
                model.actor,
                replay_buffer,
                noise_scale=FLAGS.exploration_noise,
                rand_actions=rand_actions)
            total_numsteps += rollout_timesteps

            logging.info('Training: total timesteps %d, episode reward %f',
                         total_numsteps, rollout_reward)

            print('Training: total timesteps {}, episode reward {}'.format(
                total_numsteps, rollout_reward))

            with contrib_summary.always_record_summaries():
                contrib_summary.scalar('reward',
                                       rollout_reward,
                                       step=total_numsteps)
                contrib_summary.scalar('length',
                                       rollout_timesteps,
                                       step=total_numsteps)

            if len(replay_buffer) >= FLAGS.min_samples_to_start:
                for _ in range(rollout_timesteps):
                    time_step = replay_buffer.sample(
                        batch_size=FLAGS.batch_size)
                    batch = TimeStep(*zip(*time_step))
                    model.update(batch)

                if total_numsteps - prev_save_timestep >= FLAGS.save_interval:
                    replay_buffer_var.assign(
                        zlib.compress(pickle.dumps(replay_buffer)))
                    gym_random_state_var.assign(
                        pickle.dumps(env.unwrapped.np_random.get_state()))
                    np_random_state_var.assign(
                        pickle.dumps(np.random.get_state()))
                    py_random_state_var.assign(pickle.dumps(random.getstate()))

                    saver.save(os.path.join(FLAGS.save_dir, 'checkpoint'),
                               global_step=total_numsteps)
                    prev_save_timestep = total_numsteps

                if total_numsteps - prev_eval_save_timestep >= FLAGS.eval_save_interval:
                    eval_saver.save(os.path.join(FLAGS.eval_save_dir,
                                                 'checkpoint'),
                                    global_step=total_numsteps)
                    prev_eval_save_timestep = total_numsteps
예제 #7
0
 def save_model(self):
     """ Function to save trained model."""
     tfe.Saver(self.variables).save(join(self._exp_dir, 'checkpoints',
                                         'checkpoints'),
                                    global_step=self._global_step)
def main(_):
  """Run td3/ddpg training."""
  contrib_eager_python_tfe.enable_eager_execution()

  if FLAGS.use_gpu:
    tf.device('/device:GPU:0').__enter__()

  tf.gfile.MakeDirs(FLAGS.log_dir)
  summary_writer = contrib_summary.create_file_writer(
      FLAGS.log_dir, flush_millis=10000)

  tf.set_random_seed(FLAGS.seed)
  np.random.seed(FLAGS.seed)
  random.seed(FLAGS.seed)

  env = gym.make(FLAGS.env)
  env.seed(FLAGS.seed)
  if FLAGS.learn_absorbing:
    env = lfd_envs.AbsorbingWrapper(env)

  if FLAGS.env in ['HalfCheetah-v2', 'Ant-v1']:
    rand_actions = int(1e4)
  else:
    rand_actions = int(1e3)

  obs_shape = env.observation_space.shape
  act_shape = env.action_space.shape

  subsampling_rate = env._max_episode_steps // FLAGS.trajectory_size  # pylint: disable=protected-access
  lfd = gail.GAIL(
      obs_shape[0] + act_shape[0],
      subsampling_rate=subsampling_rate,
      gail_loss=FLAGS.gail_loss)

  if FLAGS.algo == 'td3':
    model = ddpg_td3.DDPG(
        obs_shape[0],
        act_shape[0],
        use_td3=True,
        policy_update_freq=2,
        actor_lr=FLAGS.actor_lr,
        get_reward=lfd.get_reward,
        use_absorbing_state=FLAGS.learn_absorbing)
  else:
    model = ddpg_td3.DDPG(
        obs_shape[0],
        act_shape[0],
        use_td3=False,
        policy_update_freq=1,
        actor_lr=FLAGS.actor_lr,
        get_reward=lfd.get_reward,
        use_absorbing_state=FLAGS.learn_absorbing)

  random_reward, _ = do_rollout(
      env, model.actor, None, num_trajectories=10, sample_random=True)

  replay_buffer_var = contrib_eager_python_tfe.Variable(
      '', name='replay_buffer')
  expert_replay_buffer_var = contrib_eager_python_tfe.Variable(
      '', name='expert_replay_buffer')

  # Save and restore random states of gym/numpy/python.
  # If the job is preempted, it guarantees that it won't affect the results.
  # And the results will be deterministic (on CPU) and reproducible.
  gym_random_state_var = contrib_eager_python_tfe.Variable(
      '', name='gym_random_state')
  np_random_state_var = contrib_eager_python_tfe.Variable(
      '', name='np_random_state')
  py_random_state_var = contrib_eager_python_tfe.Variable(
      '', name='py_random_state')

  reward_scale = contrib_eager_python_tfe.Variable(1, name='reward_scale')

  saver = contrib_eager_python_tfe.Saver(
      model.variables + lfd.variables +
      [replay_buffer_var, expert_replay_buffer_var, reward_scale] +
      [gym_random_state_var, np_random_state_var, py_random_state_var])

  tf.gfile.MakeDirs(FLAGS.save_dir)

  eval_saver = contrib_eager_python_tfe.Saver(model.actor.variables +
                                              [reward_scale])
  tf.gfile.MakeDirs(FLAGS.eval_save_dir)

  last_checkpoint = tf.train.latest_checkpoint(FLAGS.save_dir)
  if last_checkpoint is None:
    expert_saver = contrib_eager_python_tfe.Saver([expert_replay_buffer_var])
    last_checkpoint = os.path.join(FLAGS.expert_dir, 'expert_replay_buffer')
    expert_saver.restore(last_checkpoint)
    expert_replay_buffer = pickle.loads(expert_replay_buffer_var.numpy())
    expert_reward = expert_replay_buffer.get_average_reward()

    logging.info('Expert reward %f', expert_reward)
    print('Expert reward {}'.format(expert_reward))

    reward_scale.assign(expert_reward)
    expert_replay_buffer.subsample_trajectories(FLAGS.num_expert_trajectories)
    if FLAGS.learn_absorbing:
      expert_replay_buffer.add_absorbing_states(env)

    # Subsample after adding absorbing states, because otherwise we can lose
    # final states.

    print('Original dataset size {}'.format(len(expert_replay_buffer)))
    expert_replay_buffer.subsample_transitions(subsampling_rate)
    print('Subsampled dataset size {}'.format(len(expert_replay_buffer)))
    replay_buffer = ReplayBuffer()
    total_numsteps = 0
    prev_save_timestep = 0
    prev_eval_save_timestep = 0
  else:
    saver.restore(last_checkpoint)
    replay_buffer = pickle.loads(zlib.decompress(replay_buffer_var.numpy()))
    expert_replay_buffer = pickle.loads(
        zlib.decompress(expert_replay_buffer_var.numpy()))
    total_numsteps = int(last_checkpoint.split('-')[-1])
    prev_save_timestep = total_numsteps
    prev_eval_save_timestep = total_numsteps
    env.unwrapped.np_random.set_state(
        pickle.loads(gym_random_state_var.numpy()))
    np.random.set_state(pickle.loads(np_random_state_var.numpy()))
    random.setstate(pickle.loads(py_random_state_var.numpy()))

  with summary_writer.as_default():
    while total_numsteps < FLAGS.training_steps:
      # Decay helps to make the model more stable.
      # TODO(agrawalk): Use tf.train.exponential_decay
      model.actor_lr.assign(
          model.initial_actor_lr * pow(0.5, total_numsteps // 100000))
      logging.info('Learning rate %f', model.actor_lr.numpy())
      rollout_reward, rollout_timesteps = do_rollout(
          env,
          model.actor,
          replay_buffer,
          noise_scale=FLAGS.exploration_noise,
          rand_actions=rand_actions,
          sample_random=(model.actor_step.numpy() == 0),
          add_absorbing_state=FLAGS.learn_absorbing)
      total_numsteps += rollout_timesteps

      logging.info('Training: total timesteps %d, episode reward %f',
                   total_numsteps, rollout_reward)

      print('Training: total timesteps {}, episode reward {}'.format(
          total_numsteps, rollout_reward))

      with contrib_summary.always_record_summaries():
        contrib_summary.scalar(
            'reward/scaled', (rollout_reward - random_reward) /
            (reward_scale.numpy() - random_reward),
            step=total_numsteps)
        contrib_summary.scalar('reward', rollout_reward, step=total_numsteps)
        contrib_summary.scalar('length', rollout_timesteps, step=total_numsteps)

      if len(replay_buffer) >= FLAGS.min_samples_to_start:
        for _ in range(rollout_timesteps):
          time_step = replay_buffer.sample(batch_size=FLAGS.batch_size)
          batch = TimeStep(*zip(*time_step))

          time_step = expert_replay_buffer.sample(batch_size=FLAGS.batch_size)
          expert_batch = TimeStep(*zip(*time_step))

          lfd.update(batch, expert_batch)

        for _ in range(FLAGS.updates_per_step * rollout_timesteps):
          time_step = replay_buffer.sample(batch_size=FLAGS.batch_size)
          batch = TimeStep(*zip(*time_step))
          model.update(
              batch,
              update_actor=model.critic_step.numpy() >=
              FLAGS.policy_updates_delay)

        if total_numsteps - prev_save_timestep >= FLAGS.save_interval:
          replay_buffer_var.assign(zlib.compress(pickle.dumps(replay_buffer)))
          expert_replay_buffer_var.assign(
              zlib.compress(pickle.dumps(expert_replay_buffer)))
          gym_random_state_var.assign(
              pickle.dumps(env.unwrapped.np_random.get_state()))
          np_random_state_var.assign(pickle.dumps(np.random.get_state()))
          py_random_state_var.assign(pickle.dumps(random.getstate()))
          saver.save(
              os.path.join(FLAGS.save_dir, 'checkpoint'),
              global_step=total_numsteps)
          prev_save_timestep = total_numsteps

        if total_numsteps - prev_eval_save_timestep >= FLAGS.eval_save_interval:
          eval_saver.save(
              os.path.join(FLAGS.eval_save_dir, 'checkpoint'),
              global_step=total_numsteps)
          prev_eval_save_timestep = total_numsteps
예제 #9
0
def main(_):
    """Run td3/ddpg training."""
    tfe.enable_eager_execution()

    if FLAGS.use_gpu:
        tf.device('/device:GPU:0').__enter__()

    if FLAGS.expert_dir.find(FLAGS.env) == -1:
        raise ValueError('Expert directory must contain the environment name')

    tf.set_random_seed(FLAGS.seed)
    np.random.seed(FLAGS.seed)
    random.seed(FLAGS.seed)

    env = gym.make(FLAGS.env)
    env.seed(FLAGS.seed)

    obs_shape = env.observation_space.shape
    act_shape = env.action_space.shape

    expert_replay_buffer_var = tfe.Variable('', name='expert_replay_buffer')

    saver = tfe.Saver([expert_replay_buffer_var])
    tf.gfile.MakeDirs(FLAGS.save_dir)

    with tf.variable_scope('actor'):
        actor = Actor(obs_shape[0], act_shape[0])
    expert_saver = tfe.Saver(actor.variables)

    best_checkpoint = None
    best_reward = float('-inf')

    checkpoint_state = tf.train.get_checkpoint_state(FLAGS.expert_dir)

    for checkpoint in checkpoint_state.all_model_checkpoint_paths:
        expert_saver.restore(checkpoint)
        expert_reward, _ = do_rollout(env,
                                      actor,
                                      replay_buffer=None,
                                      noise_scale=0.0,
                                      num_trajectories=10)

        if expert_reward > best_reward:
            best_reward = expert_reward
            best_checkpoint = checkpoint

    expert_saver.restore(best_checkpoint)

    expert_replay_buffer = ReplayBuffer()
    expert_reward, _ = do_rollout(
        env,
        actor,
        replay_buffer=expert_replay_buffer,
        noise_scale=0.0,
        num_trajectories=FLAGS.num_expert_trajectories)

    logging.info('Expert reward %f', expert_reward)
    print('Expert reward {}'.format(expert_reward))

    expert_replay_buffer_var.assign(pickle.dumps(expert_replay_buffer))
    saver.save(os.path.join(FLAGS.save_dir, 'expert_replay_buffer'))
예제 #10
0
    def run(self,
            training_dataset,
            validation_dataset,
            optimizer,
            args,
            tensorboard=False):
        #The only option :) tensorboard = True
        if tensorboard == True:
            writer = Writer(args, path=args['exp_dir'] + '/runs')
        else:
            print('Please use tensorboard!')
            return 0

        with tf.contrib.summary.record_summaries_every_n_global_steps(1):
            with tf.device(self._device):
                for itx in tqdm(range(
                        0, args['num_iterations'] // args['val_freq']),
                                desc='iterations:'):
                    try:
                        print("training...")
                        # Training iterations.
                        train_loss = 0.0
                        for i in range(args['val_freq']):
                            left_patches, right_patches, labels = training_dataset.iterator.get_next(
                            )
                            batch = Batch(left_patches, right_patches, labels)
                            grads, t_loss = self.compute(batch, training=True)
                            optimizer.apply_gradients(
                                zip(grads, self.variables))
                            train_loss += t_loss

                        writer.log_to_tensorboard('train_loss',
                                                  train_loss /
                                                  args['val_freq'],
                                                  step=self._global_step)

                        print('validating...')
                        # Validation iterations.
                        error_pixel_2 = 0.0
                        error_pixel_3 = 0.0
                        error_pixel_4 = 0.0
                        error_pixel_5 = 0.0
                        for i, idx in enumerate(validation_dataset.sample_ids):
                            left_image, right_image, disparity_ground_truth = validation_dataset.left_images[
                                idx], validation_dataset.right_images[
                                    idx], validation_dataset.disparity_images[
                                        idx]
                            paddings = validation_dataset.get_paddings()
                            left_image = tf.pad(tf.expand_dims(left_image, 0),
                                                paddings, "CONSTANT")
                            right_image = tf.pad(
                                tf.expand_dims(right_image, 0), paddings,
                                "CONSTANT")
                            batch = Batch(left_image, right_image,
                                          disparity_ground_truth)
                            disparity_prediction = self.compute(batch,
                                                                training=False)

                            valid_gt_pixels = (disparity_ground_truth !=
                                               0).astype('float')
                            masked_prediction_valid = disparity_prediction * valid_gt_pixels
                            num_valid_gt_pixels = valid_gt_pixels.sum()

                            error_pixel_2 += (np.abs(masked_prediction_valid -
                                                     disparity_ground_truth) >
                                              2).sum() / num_valid_gt_pixels
                            error_pixel_3 += (np.abs(masked_prediction_valid -
                                                     disparity_ground_truth) >
                                              3).sum() / num_valid_gt_pixels
                            error_pixel_4 += (np.abs(masked_prediction_valid -
                                                     disparity_ground_truth) >
                                              4).sum() / num_valid_gt_pixels
                            error_pixel_5 += (np.abs(masked_prediction_valid -
                                                     disparity_ground_truth) >
                                              5).sum() / num_valid_gt_pixels

                        #print('----------validation summary--------------')
                        #print(error_pixel_2 / len(validation_dataset.sample_ids))
                        #print(error_pixel_3 / len(validation_dataset.sample_ids))
                        #print(error_pixel_4 / len(validation_dataset.sample_ids))
                        #print(error_pixel_5 / len(validation_dataset.sample_ids))

                        writer.log_to_tensorboard(
                            'error', (error_pixel_2, error_pixel_3,
                                      error_pixel_4, error_pixel_5,
                                      len(validation_dataset.sample_ids)),
                            step=self._global_step)

                        print('Saving one random prediciton...')
                        random_img_idx = np.random.choice(
                            validation_dataset.sample_ids)
                        sample_left_image, sample_right_image, disparity_prediction = validation_dataset.left_images[
                            random_img_idx], validation_dataset.right_images[
                                random_img_idx], validation_dataset.disparity_images[
                                    random_img_idx]
                        paddings = validation_dataset.get_paddings()
                        sample_left_image = tf.pad(
                            tf.expand_dims(sample_left_image, 0), paddings,
                            "CONSTANT")
                        sample_right_image = tf.pad(
                            tf.expand_dims(sample_right_image, 0), paddings,
                            "CONSTANT")
                        batch = Batch(sample_left_image, sample_right_image,
                                      disparity_prediction)
                        disparity_prediction = self.compute(batch,
                                                            training=False)
                        writer.log_to_tensorboard(
                            'qualitative',
                            (disparity_prediction, sample_left_image,
                             sample_right_image),
                            step=self._global_step)
                        self._global_step.assign_add(args['val_freq'])

                        # Save checkpoint.
                        tfe.Saver(self.variables).save(
                            join(self._exp_dir, 'checkpoints', 'checkpoints'),
                            global_step=self._global_step)
                    except tf.errors.OutOfRangeError:
                        break
예제 #11
0
                break

        # evaluate after epoch
        iterator = test_dataset.make_one_shot_iterator(
        )  # dont repeat any values from test set
        for x, y in iterator:
            preds = model(x)
            loss = tf.losses.mean_squared_error(y, preds[:, 0])

            test_loss(loss)

        print("Epoch %d: Train Loss = %0.4f | Test Loss = %0.4f\n" %
              (e + 1, train_loss.result(), test_loss.result()))

    # Make sure to add not just the "model" variables, but also the custom variables we added !
    saver = tfe.Saver(model.variables + list(model.custom_variables.values()))
    saver.save('weights/10_01_custom_models/weights.ckpt')
    print("Model saved")

    # Here we need to reset the keras internal backend first
    tf.keras.backend.clear_session()

    # Now we restore the model and predict again on test set
    model2 = CustomRegressor()

    # we need to run the model at least once to build all of the variables and the custom variables
    # make sure to build the model the same way, otherwise it wont find the weights in the checkpoints properly
    # safest option is to call model.call(tf_input_batch) explicitly
    model2.call(dummy_x)

    # ensure that you are loading both the Keras variables AND the custom variables