def setup_datasets(model_hparams, batch_size, seed, train_dataset, val_dataset,
                   take):
    # Dataset preprocessing
    train_tf_dataset = train_dataset.get_datasets(mode='train',
                                                  shuffle_files=True,
                                                  take=take)
    val_tf_dataset = val_dataset.get_datasets(mode='val',
                                              shuffle_files=True,
                                              take=take)

    train_tf_dataset = train_tf_dataset.shuffle(
        model_hparams['shuffle_buffer_size'], reshuffle_each_iteration=True)

    # rospy.logerr_once("NOT BALANCING!")
    train_tf_dataset = balance(train_tf_dataset)
    val_tf_dataset = balance(val_tf_dataset)

    train_tf_dataset = batch_tf_dataset(train_tf_dataset,
                                        batch_size,
                                        drop_remainder=True)
    val_tf_dataset = batch_tf_dataset(val_tf_dataset,
                                      batch_size,
                                      drop_remainder=True)

    train_tf_dataset = train_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    val_tf_dataset = val_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)

    return train_tf_dataset, val_tf_dataset
def setup_datasets(model_hparams, batch_size, seed, train_dataset, val_dataset,
                   take):
    # Dataset preprocessing
    train_tf_dataset = train_dataset.get_datasets(mode='train', take=take)
    val_tf_dataset = val_dataset.get_datasets(mode='val')

    # mix up examples before batching
    train_tf_dataset = train_tf_dataset.shuffle(
        model_hparams['shuffle_buffer_size'])

    train_tf_dataset = batch_tf_dataset(train_tf_dataset,
                                        batch_size,
                                        drop_remainder=True)
    val_tf_dataset = batch_tf_dataset(val_tf_dataset,
                                      batch_size,
                                      drop_remainder=True)

    train_tf_dataset = train_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    val_tf_dataset = val_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)

    return train_tf_dataset, val_tf_dataset
def generate_classifier_examples(fwd_model: BaseDynamicsFunction,
                                 tf_dataset: tf.data.Dataset,
                                 dataset: DynamicsDatasetLoader,
                                 labeling_params: Dict, batch_size: int):
    classifier_horizon = labeling_params['classifier_horizon']
    assert classifier_horizon >= 2
    tf_dataset = batch_tf_dataset(tf_dataset, batch_size, drop_remainder=False)
    sc = dataset.scenario

    for idx, _ in enumerate(tf_dataset):
        pass
    n_total_batches = idx

    t0 = perf_counter()
    for idx, example in enumerate(tf_dataset):
        dt = perf_counter() - t0
        print(f"{idx} / {n_total_batches} batches in {dt:.3f} seconds")
        actual_batch_size = int(example['traj_idx'].shape[0])

        valid_out_examples = []
        for start_t in range(0,
                             dataset.steps_per_traj - classifier_horizon + 1,
                             labeling_params['start_step']):
            prediction_end_t = dataset.steps_per_traj
            actual_prediction_horizon = prediction_end_t - start_t
            actual_states_from_start_t = {
                k: example[k][:, start_t:prediction_end_t]
                for k in dataset.state_keys
            }
            actions_from_start_t = {
                k: example[k][:, start_t:prediction_end_t - 1]
                for k in dataset.action_keys
            }

            predictions_from_start_t, _ = fwd_model.propagate_differentiable_batched(
                environment={},
                state=actual_states_from_start_t,
                actions=actions_from_start_t)
            prediction_actual = PredictionActualExample(
                example=example,
                actions=actions_from_start_t,
                actual_states=actual_states_from_start_t,
                predictions=predictions_from_start_t,
                start_t=start_t,
                labeling_params=labeling_params,
                actual_prediction_horizon=actual_prediction_horizon,
                batch_size=actual_batch_size)
            valid_out_examples_for_start_t = generate_classifier_examples_from_batch(
                sc, prediction_actual)
            valid_out_examples.extend(valid_out_examples_for_start_t)

        yield valid_out_examples
def eval_main(dataset_dirs: List[pathlib.Path],
              mode: str,
              batch_size: int,
              use_gt_rope: bool,
              threshold: Optional[float] = None,
              old_compat: bool = False,
              take: Optional[int] = None,
              checkpoint: Optional[pathlib.Path] = None,
              trials_directory=pathlib.Path,
              **kwargs):
    ###############
    # Model
    ###############
    trial_path = checkpoint.parent.absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    model_class = link_bot_classifiers.get_model(params['model_class'])

    ###############
    # Dataset
    ###############
    dataset = ClassifierDatasetLoader(dataset_dirs,
                                      load_true_states=True,
                                      use_gt_rope=use_gt_rope,
                                      old_compat=old_compat,
                                      threshold=threshold)
    tf_dataset = dataset.get_datasets(mode=mode, take=take)
    tf_dataset = balance(tf_dataset)

    ###############
    # Evaluate
    ###############
    tf_dataset = batch_tf_dataset(tf_dataset, batch_size, drop_remainder=True)

    model = model_class(hparams=params,
                        batch_size=batch_size,
                        scenario=dataset.scenario)
    # This call to model runner restores the model
    runner = ModelRunner(model=model,
                         training=False,
                         params=params,
                         checkpoint=checkpoint,
                         trial_path=trial_path,
                         key_metric=AccuracyMetric,
                         batch_metadata=dataset.batch_metadata)

    metrics = runner.val_epoch(tf_dataset)
    for metric_name, metric_value in metrics.items():
        print(f"{metric_name:30s}: {metric_value}")
    return metrics
Exemple #5
0
def load_dataset_and_models(args):
    comparison_info = json.load(args.comparison.open("r"))
    models = {}
    for name, model_info in comparison_info.items():
        model_dir = paths_from_json(model_info['model_dir'])
        model, _ = dynamics_utils.load_generic_model(model_dir)
        models[name] = model

    dataset = DynamicsDatasetLoader(args.dataset_dirs)
    tf_dataset = dataset.get_datasets(mode=args.mode,
                                      shard=args.shard,
                                      take=args.take)
    tf_dataset = batch_tf_dataset(tf_dataset, 1)

    return tf_dataset, dataset, models
def eval_main(
    dataset_dirs: List[pathlib.Path],
    checkpoint: pathlib.Path,
    mode: str,
    batch_size: int,
    use_gt_rope: bool,
):
    test_dataset = DynamicsDatasetLoader(dataset_dirs, use_gt_rope=use_gt_rope)

    trials_directory = pathlib.Path('trials').absolute()
    trial_path = checkpoint.parent.absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    model = state_space_dynamics.get_model(params['model_class'])
    net = model(hparams=params,
                batch_size=batch_size,
                scenario=test_dataset.scenario)

    runner = ModelRunner(model=net,
                         training=False,
                         checkpoint=checkpoint,
                         batch_metadata=test_dataset.batch_metadata,
                         trial_path=trial_path,
                         params=params)

    test_tf_dataset = test_dataset.get_datasets(mode=mode)
    test_tf_dataset = batch_tf_dataset(test_tf_dataset,
                                       batch_size,
                                       drop_remainder=True)
    validation_metrics = runner.val_epoch(test_tf_dataset)
    for name, value in validation_metrics.items():
        print(f"{name}: {value}")

    # more metrics that can't be expressed as just an average over metrics on each batch
    all_errors = None
    for batch in test_tf_dataset:
        outputs = runner.model(batch, training=False)
        errors_for_batch = test_dataset.scenario.classifier_distance(
            outputs, batch)
        if all_errors is not None:
            all_errors = tf.concat([all_errors, errors_for_batch], axis=0)
        else:
            all_errors = errors_for_batch
    print(f"90th percentile {np.percentile(all_errors.numpy(), 90)}")
    print(f"95th percentile {np.percentile(all_errors.numpy(), 95)}")
    print(f"99th percentile {np.percentile(all_errors.numpy(), 99)}")
    print(f"max {np.max(all_errors.numpy())}")
Exemple #7
0
def viz_main(args):
    dataset_dirs = args.dataset_dirs
    checkpoint = args.checkpoint

    trial_path, params = load_trial(checkpoint.parent.absolute())

    dataset = DynamicsDatasetLoader(dataset_dirs)
    scenario = dataset.scenario

    tf_dataset = dataset.get_datasets(mode='val')
    tf_dataset = batch_tf_dataset(tf_dataset,
                                  batch_size=1,
                                  drop_remainder=True)

    model = CFM(hparams=params, batch_size=1, scenario=scenario)
    ckpt = tf.train.Checkpoint(model=model)
    manager = tf.train.CheckpointManager(ckpt, args.checkpoint, max_to_keep=1)
    status = ckpt.restore(manager.latest_checkpoint).expect_partial()
    if manager.latest_checkpoint:
        print(Fore.CYAN + "Restored from {}".format(manager.latest_checkpoint))
        status.assert_existing_objects_matched()
    else:
        raise RuntimeError("Failed to restore!!!")

    for example_idx, example in enumerate(tf_dataset):
        stepper = RvizAnimationController(n_time_steps=dataset.steps_per_traj)
        for t in range(dataset.steps_per_traj):
            output = model(
                model.preprocess_no_gradient(example, training=False))

            actual_t = numpify(
                remove_batch(scenario.index_time_batched_predicted(example,
                                                                   t)))
            action_t = numpify(
                remove_batch(scenario.index_time_batched_predicted(example,
                                                                   t)))
            scenario.plot_state_rviz(actual_t, label='actual', color='red')
            scenario.plot_action_rviz(actual_t, action_t, color='gray')
            prediction_t = remove_batch(
                scenario.index_time_batched_predicted(output, t))
            scenario.plot_state_rviz(prediction_t,
                                     label='predicted',
                                     color='blue')

            stepper.step()
def compute_classifier_threshold(
    dataset_dirs: List[pathlib.Path],
    checkpoint: pathlib.Path,
    mode: str,
    batch_size: int,
    use_gt_rope: bool,
):
    test_dataset = DynamicsDatasetLoader(dataset_dirs, use_gt_rope=use_gt_rope)

    trials_directory = pathlib.Path('trials').absolute()
    trial_path = checkpoint.parent.absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    model = state_space_dynamics.get_model(params['model_class'])
    net = model(hparams=params,
                batch_size=batch_size,
                scenario=test_dataset.scenario)

    runner = ModelRunner(model=net,
                         training=False,
                         checkpoint=checkpoint,
                         batch_metadata=test_dataset.batch_metadata,
                         trial_path=trial_path,
                         params=params)

    test_tf_dataset = test_dataset.get_datasets(mode=mode)
    test_tf_dataset = batch_tf_dataset(test_tf_dataset,
                                       batch_size,
                                       drop_remainder=True)

    all_errors = None
    for batch in test_tf_dataset:
        outputs = runner.model(batch, training=False)
        errors_for_batch = test_dataset.scenario.classifier_distance(
            batch, outputs)
        if all_errors is not None:
            all_errors = tf.concat([all_errors, errors_for_batch], axis=0)
        else:
            all_errors = errors_for_batch

    classifier_threshold = np.percentile(all_errors.numpy(), 90)
    rospy.loginfo(f"90th percentile {classifier_threshold}")
    return classifier_threshold
def viz_dataset(
    dataset_dirs: List[pathlib.Path],
    checkpoint: pathlib.Path,
    mode: str,
    viz_func: Callable,
    use_gt_rope: bool,
    **kwargs,
):
    test_dataset = DynamicsDatasetLoader(dataset_dirs, use_gt_rope=use_gt_rope)

    test_tf_dataset = test_dataset.get_datasets(mode=mode)
    test_tf_dataset = batch_tf_dataset(test_tf_dataset, 1, drop_remainder=True)

    model, _ = dynamics_utils.load_generic_model([checkpoint])

    for i, batch in enumerate(test_tf_dataset):
        batch.update(test_dataset.batch_metadata)
        outputs, _ = model.from_example(batch, training=False)

        viz_func(batch, outputs, test_dataset, model)
def eval_main(
    dataset_dirs: List[pathlib.Path],
    checkpoint: pathlib.Path,
    mode: str,
    batch_size: int,
    **kwargs,
):
    ###############
    # Model
    ###############
    trial_path = checkpoint.parent.absolute()
    trials_directory = pathlib.Path('recovery_trials').absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    scenario = get_scenario(params['scenario'])
    net = NNRecoveryModel(hparams=params, scenario=scenario, batch_size=1)

    ###############
    # Dataset
    ###############
    test_dataset = RecoveryDatasetLoader(dataset_dirs)
    test_tf_dataset = test_dataset.get_datasets(mode=mode)

    ###############
    # Evaluate
    ###############
    test_tf_dataset = batch_tf_dataset(test_tf_dataset,
                                       batch_size,
                                       drop_remainder=True)

    runner = ModelRunner(model=net,
                         training=False,
                         params=params,
                         checkpoint=checkpoint,
                         trial_path=trial_path,
                         batch_metadata=test_dataset.batch_metadata)
    validation_metrics = runner.val_epoch(test_tf_dataset)
    for name, value in validation_metrics.items():
        print(f"{name}: {value:.3f}")
def viz_ensemble_main(dataset_dir: pathlib.Path,
                      checkpoints: List[pathlib.Path], mode: str,
                      batch_size: int, only_errors: bool, use_gt_rope: bool,
                      **kwargs):
    dynamics_stdev_pub_ = rospy.Publisher("dynamics_stdev",
                                          Float32,
                                          queue_size=10)
    classifier_stdev_pub_ = rospy.Publisher("classifier_stdev",
                                            Float32,
                                            queue_size=10)
    accept_probability_pub_ = rospy.Publisher("accept_probability_viz",
                                              Float32,
                                              queue_size=10)
    traj_idx_pub_ = rospy.Publisher("traj_idx_viz", Float32, queue_size=10)

    ###############
    # Model
    ###############
    model = load_generic_model(checkpoints)

    ###############
    # Dataset
    ###############
    test_dataset = ClassifierDatasetLoader([dataset_dir],
                                           load_true_states=True,
                                           use_gt_rope=use_gt_rope)
    test_tf_dataset = test_dataset.get_datasets(mode=mode)
    test_tf_dataset = batch_tf_dataset(test_tf_dataset,
                                       batch_size,
                                       drop_remainder=True)
    scenario = test_dataset.scenario

    ###############
    # Evaluate
    ###############

    # Iterate over test set
    all_accuracies_over_time = []
    all_stdevs = []
    all_labels = []
    classifier_ensemble_stdevs = []
    for batch_idx, test_batch in enumerate(test_tf_dataset):
        test_batch.update(test_dataset.batch_metadata)

        mean_predictions, stdev_predictions = model.check_constraint_from_example(
            test_batch)
        mean_probabilities = mean_predictions['probabilities']
        stdev_probabilities = stdev_predictions['probabilities']

        labels = tf.expand_dims(test_batch['is_close'][:, 1:], axis=2)

        all_labels = tf.concat(
            (all_labels, tf.reshape(test_batch['is_close'][:, 1:], [-1])),
            axis=0)
        all_stdevs = tf.concat(
            (all_stdevs, tf.reshape(test_batch[add_predicted('stdev')], [-1])),
            axis=0)

        accuracy_over_time = tf.keras.metrics.binary_accuracy(
            y_true=labels, y_pred=mean_probabilities)
        all_accuracies_over_time.append(accuracy_over_time)

        # Visualization
        test_batch.pop("time")
        test_batch.pop("batch_size")
        decisions = mean_probabilities > 0.5
        classifier_is_correct = tf.squeeze(tf.equal(decisions,
                                                    tf.cast(labels, tf.bool)),
                                           axis=-1)
        for b in range(batch_size):
            example = index_dict_of_batched_tensors_tf(test_batch, b)

            classifier_ensemble_stdev = stdev_probabilities[b].numpy().squeeze(
            )
            classifier_ensemble_stdevs.append(classifier_ensemble_stdev)

            # if the classifier is correct at all time steps, ignore
            if only_errors and tf.reduce_all(classifier_is_correct[b]):
                continue

            # if only_collision
            predicted_rope_states = tf.reshape(
                example[add_predicted('link_bot')][1], [-1, 3])
            xs = predicted_rope_states[:, 0]
            ys = predicted_rope_states[:, 1]
            zs = predicted_rope_states[:, 2]
            in_collision = bool(
                batch_in_collision_tf_3d(environment=example,
                                         xs=xs,
                                         ys=ys,
                                         zs=zs,
                                         inflate_radius_m=0)[0].numpy())
            label = bool(example['is_close'][1].numpy())
            accept = decisions[b, 0, 0].numpy()
            # if not (in_collision and accept):
            #     continue

            scenario.plot_environment_rviz(example)

            stdev_probabilities[b].numpy().squeeze()
            classifier_stdev_msg = Float32()
            classifier_stdev_msg.data = stdev_probabilities[b].numpy().squeeze(
            )
            classifier_stdev_pub_.publish(classifier_stdev_msg)

            actual_0 = scenario.index_state_time(example, 0)
            actual_1 = scenario.index_state_time(example, 1)
            pred_0 = scenario.index_predicted_state_time(example, 0)
            pred_1 = scenario.index_predicted_state_time(example, 1)
            action = scenario.index_action_time(example, 0)
            label = example['is_close'][1]
            scenario.plot_state_rviz(actual_0,
                                     label='actual',
                                     color='#FF0000AA',
                                     idx=0)
            scenario.plot_state_rviz(actual_1,
                                     label='actual',
                                     color='#E00016AA',
                                     idx=1)
            scenario.plot_state_rviz(pred_0,
                                     label='predicted',
                                     color='#0000FFAA',
                                     idx=0)
            scenario.plot_state_rviz(pred_1,
                                     label='predicted',
                                     color='#0553FAAA',
                                     idx=1)
            scenario.plot_action_rviz(pred_0, action)
            scenario.plot_is_close(label)

            dynamics_stdev_t = example[add_predicted('stdev')][1, 0].numpy()
            dynamics_stdev_msg = Float32()
            dynamics_stdev_msg.data = dynamics_stdev_t
            dynamics_stdev_pub_.publish(dynamics_stdev_msg)

            accept_probability_t = mean_probabilities[b, 0, 0].numpy()
            accept_probability_msg = Float32()
            accept_probability_msg.data = accept_probability_t
            accept_probability_pub_.publish(accept_probability_msg)

            traj_idx_msg = Float32()
            traj_idx_msg.data = batch_idx * batch_size + b
            traj_idx_pub_.publish(traj_idx_msg)

            # stepper = RvizSimpleStepper()
            # stepper.step()

        print(np.mean(classifier_ensemble_stdevs))

    all_accuracies_over_time = tf.concat(all_accuracies_over_time, axis=0)
    mean_accuracies_over_time = tf.reduce_mean(all_accuracies_over_time,
                                               axis=0)
    std_accuracies_over_time = tf.math.reduce_std(all_accuracies_over_time,
                                                  axis=0)
    mean_classifier_ensemble_stdev = tf.reduce_mean(classifier_ensemble_stdevs)
    print(mean_classifier_ensemble_stdev)
def viz_main(dataset_dirs: List[pathlib.Path],
             checkpoint: pathlib.Path,
             mode: str,
             batch_size: int,
             only_errors: bool,
             use_gt_rope: bool,
             old_compat: bool = False,
             **kwargs):
    stdev_pub_ = rospy.Publisher("stdev", Float32, queue_size=10)
    traj_idx_pub_ = rospy.Publisher("traj_idx_viz", Float32, queue_size=10)

    ###############
    # Model
    ###############
    trials_directory = pathlib.Path('trials').absolute()
    trial_path = checkpoint.parent.absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    model_class = link_bot_classifiers.get_model(params['model_class'])

    ###############
    # Dataset
    ###############
    dataset = ClassifierDatasetLoader(
        dataset_dirs,
        load_true_states=True,
        use_gt_rope=use_gt_rope,
        threshold=params['classifier_dataset_hparams']['labeling_params']
        ['threshold'],
        old_compat=old_compat)
    model = model_class(hparams=params,
                        batch_size=batch_size,
                        scenario=dataset.scenario)
    tf_dataset = dataset.get_datasets(mode=mode)
    scenario = dataset.scenario

    ###############
    # Evaluate
    ###############
    tf_dataset = batch_tf_dataset(tf_dataset, batch_size, drop_remainder=True)

    model = classifier_utils.load_generic_model([checkpoint])

    for batch_idx, example in enumerate(
            progressbar(tf_dataset, widgets=base_dataset.widgets)):
        example.update(dataset.batch_metadata)
        predictions, _ = model.check_constraint_from_example(example,
                                                             training=False)

        labels = tf.expand_dims(example['is_close'][:, 1:], axis=2)

        probabilities = predictions['probabilities']

        # Visualization
        example.pop("time")
        example.pop("batch_size")
        decisions = probabilities > 0.5
        classifier_is_correct = tf.squeeze(tf.equal(decisions,
                                                    tf.cast(labels, tf.bool)),
                                           axis=-1)
        for b in range(batch_size):
            example_b = index_dict_of_batched_tensors_tf(example, b)

            # if the classifier is correct at all time steps, ignore
            if only_errors and tf.reduce_all(classifier_is_correct[b]):
                continue

            def _custom_viz_t(scenario: Base3DScenario, e: Dict, t: int):
                if t > 0:
                    accept_probability_t = predictions['probabilities'][
                        b, t - 1, 0].numpy()
                else:
                    accept_probability_t = -999
                scenario.plot_accept_probability(accept_probability_t)

                traj_idx_msg = Float32()
                traj_idx_msg.data = batch_idx * batch_size + b
                traj_idx_pub_.publish(traj_idx_msg)

            anim = RvizAnimation(scenario=scenario,
                                 n_time_steps=dataset.horizon,
                                 init_funcs=[
                                     init_viz_env,
                                     dataset.init_viz_action(),
                                 ],
                                 t_funcs=[
                                     _custom_viz_t,
                                     dataset.classifier_transition_viz_t(),
                                     ExperimentScenario.plot_stdev_t,
                                 ])
            with open("debugging.hjson", 'w') as f:
                my_hdump(numpify(example_b), f)
            anim.play(example_b)
def train_main(
    dataset_dirs: List[pathlib.Path],
    model_hparams: pathlib.Path,
    classifier_checkpoint: pathlib.Path,
    log: str,
    batch_size: int,
    epochs: int,
    seed: int,
    checkpoint: Optional[pathlib.Path] = None,
    ensemble_idx: Optional[int] = None,
    trials_directory=pathlib.Path,
    **kwargs,
):
    ###############
    # Datasets
    ###############
    train_dataset = RecoveryDatasetLoader(dataset_dirs)
    val_dataset = RecoveryDatasetLoader(dataset_dirs)

    ###############
    # Model
    ###############
    model_hparams = json.load((model_hparams).open('r'))
    model_hparams['recovery_dataset_hparams'] = train_dataset.hparams
    model_hparams['batch_size'] = batch_size
    model_hparams['seed'] = seed
    model_hparams['datasets'] = paths_to_json(dataset_dirs)
    model_hparams['latest_training_time'] = int(time.time())
    scenario = get_scenario(model_hparams['scenario'])

    # Dataset preprocessing
    train_tf_dataset = train_dataset.get_datasets(mode='train')
    val_tf_dataset = val_dataset.get_datasets(mode='val')

    train_tf_dataset = batch_tf_dataset(train_tf_dataset,
                                        batch_size,
                                        drop_remainder=True)
    val_tf_dataset = batch_tf_dataset(val_tf_dataset,
                                      batch_size,
                                      drop_remainder=True)

    train_tf_dataset = train_tf_dataset.shuffle(buffer_size=512, seed=seed)

    train_tf_dataset = train_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    val_tf_dataset = val_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)

    model = NNRecoveryModel(hparams=model_hparams,
                            scenario=scenario,
                            batch_size=batch_size)

    ############
    # Initialize weights from classifier model by "restoring" from checkpoint
    ############
    if not checkpoint:
        # load in the weights for the conv & dense layers of the classifier's encoder, skip the last few layers
        classifier_model = tf.train.Checkpoint(conv_layers=model.conv_layers)
        classifier_root = tf.train.Checkpoint(model=classifier_model)
        classifier_checkpoint_manager = tf.train.CheckpointManager(
            classifier_root, classifier_checkpoint.as_posix(), max_to_keep=1)

        status = classifier_root.restore(
            classifier_checkpoint_manager.latest_checkpoint)
        status.expect_partial()
        status.assert_existing_objects_matched()
        assert classifier_checkpoint_manager.latest_checkpoint is not None
        print(Fore.MAGENTA + "Restored {}".format(
            classifier_checkpoint_manager.latest_checkpoint) + Fore.RESET)
    ############

    trial_path = None
    checkpoint_name = None
    if checkpoint:
        trial_path = checkpoint.parent.absolute()
        checkpoint_name = checkpoint.name
    trials_directory = pathlib.Path('recovery_trials').absolute()
    group_name = log if trial_path is None else None
    trial_path, _ = filepath_tools.create_or_load_trial(
        group_name=group_name,
        params=model_hparams,
        trial_path=trial_path,
        trials_directory=trials_directory,
        write_summary=False)
    runner = ModelRunner(model=model,
                         training=True,
                         params=model_hparams,
                         trial_path=trial_path,
                         val_every_n_batches=1,
                         mid_epoch_val_batches=100,
                         validate_first=True,
                         checkpoint=checkpoint,
                         batch_metadata=train_dataset.batch_metadata)

    # Train
    runner.train(train_tf_dataset, val_tf_dataset, num_epochs=epochs)

    return trial_path