コード例 #1
0
    def __init__(self,
                 training,
                 group_name=None,
                 trial_path=None,
                 params=None,
                 write_summary=True):
        self.side_length = 64
        self.num_voxels = self.side_length**3
        self.training = training

        self.trial_path, self.params = filepath_tools.create_or_load_trial(
            group_name=group_name,
            params=params,
            trial_path=trial_path,
            write_summary=write_summary)
        self.group_name = self.trial_path.parts[-2]

        self.batch_size = 16
        if not self.training:
            self.batch_size = 1

        self.train_summary_writer = tf.summary.create_file_writer(
            (self.trial_path / "logs/train").as_posix())
        self.test_summary_writer = tf.summary.create_file_writer(
            (self.trial_path / "logs/test").as_posix())

        if self.params['network'] == 'VoxelCNN':
            self.model = VoxelCNN(self.params, batch_size=self.batch_size)
        # if self.params['network'] == 'StackedVoxelCNN':
        #     self.model = StackedVoxelCNN(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'AutoEncoder':
            self.model = AutoEncoder(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'VAE':
            self.model = VAE(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'VAE_GAN':
            self.model = VAE_GAN(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'Augmented_VAE':
            self.model = Augmented_VAE(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'Conditional_VCNN':
            self.model = ConditionalVCNN(self.params,
                                         batch_size=self.batch_size)
        elif self.params['network'] == 'AE_VCNN':
            self.model = AE_VCNN(self.params, batch_size=self.batch_size)
        else:
            raise Exception('Unknown Model Type')

        self.num_batches = None

        self.ckpt = tf.train.Checkpoint(step=tf.Variable(1),
                                        epoch=tf.Variable(0),
                                        train_time=tf.Variable(0.0),
                                        optimizer=self.model.opt,
                                        net=self.model.get_model())
        self.checkpoint_path = self.trial_path / "training_checkpoints/"
        self.manager = tf.train.CheckpointManager(
            self.ckpt, self.checkpoint_path.as_posix(), max_to_keep=1)
        self.restore()
コード例 #2
0
    def test_get_trial_directory_load_existing(self):
        expected_trial_path = pathlib.Path('.testing/new_trial/subdir')
        params_filename = expected_trial_path / 'params.json'
        expected_params = filepath_tools.get_default_params()
        expected_params['a'] = 2
        with params_filename.open("w") as params_file:
            json.dump(expected_params, params_file)
        expected_trial_path.mkdir(parents=True, exist_ok=True)

        trial_path, loaded_params = filepath_tools.create_or_load_trial(
            trial_path=expected_trial_path, write_summary=False)
        self.assertEqual(expected_trial_path, trial_path)
        self.assertEqual(loaded_params, expected_params)
コード例 #3
0
 def test_get_trial_directory_new_train(self):
     base_dir = pathlib.Path('.testing')
     group_name = 'new_trial'
     trial_directory, params = filepath_tools.create_or_load_trial(
         group_name=group_name,
         params={},
         trials_directory=base_dir,
         write_summary=False)
     trial_directory = trial_directory
     self.assertTrue(trial_directory.exists())
     self.assertEqual(trial_directory.parent,
                      pathlib.Path(base_dir) / group_name)
     arc_utilities.path_utils.rm_tree(trial_directory)
def eval_main(dataset_dirs: List[pathlib.Path],
              mode: str,
              batch_size: int,
              use_gt_rope: bool,
              threshold: Optional[float] = None,
              old_compat: bool = False,
              take: Optional[int] = None,
              checkpoint: Optional[pathlib.Path] = None,
              trials_directory=pathlib.Path,
              **kwargs):
    ###############
    # Model
    ###############
    trial_path = checkpoint.parent.absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    model_class = link_bot_classifiers.get_model(params['model_class'])

    ###############
    # Dataset
    ###############
    dataset = ClassifierDatasetLoader(dataset_dirs,
                                      load_true_states=True,
                                      use_gt_rope=use_gt_rope,
                                      old_compat=old_compat,
                                      threshold=threshold)
    tf_dataset = dataset.get_datasets(mode=mode, take=take)
    tf_dataset = balance(tf_dataset)

    ###############
    # Evaluate
    ###############
    tf_dataset = batch_tf_dataset(tf_dataset, batch_size, drop_remainder=True)

    model = model_class(hparams=params,
                        batch_size=batch_size,
                        scenario=dataset.scenario)
    # This call to model runner restores the model
    runner = ModelRunner(model=model,
                         training=False,
                         params=params,
                         checkpoint=checkpoint,
                         trial_path=trial_path,
                         key_metric=AccuracyMetric,
                         batch_metadata=dataset.batch_metadata)

    metrics = runner.val_epoch(tf_dataset)
    for metric_name, metric_value in metrics.items():
        print(f"{metric_name:30s}: {metric_value}")
    return metrics
コード例 #5
0
def eval_main(
    dataset_dirs: List[pathlib.Path],
    checkpoint: pathlib.Path,
    mode: str,
    batch_size: int,
    use_gt_rope: bool,
):
    test_dataset = DynamicsDatasetLoader(dataset_dirs, use_gt_rope=use_gt_rope)

    trials_directory = pathlib.Path('trials').absolute()
    trial_path = checkpoint.parent.absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    model = state_space_dynamics.get_model(params['model_class'])
    net = model(hparams=params,
                batch_size=batch_size,
                scenario=test_dataset.scenario)

    runner = ModelRunner(model=net,
                         training=False,
                         checkpoint=checkpoint,
                         batch_metadata=test_dataset.batch_metadata,
                         trial_path=trial_path,
                         params=params)

    test_tf_dataset = test_dataset.get_datasets(mode=mode)
    test_tf_dataset = batch_tf_dataset(test_tf_dataset,
                                       batch_size,
                                       drop_remainder=True)
    validation_metrics = runner.val_epoch(test_tf_dataset)
    for name, value in validation_metrics.items():
        print(f"{name}: {value}")

    # more metrics that can't be expressed as just an average over metrics on each batch
    all_errors = None
    for batch in test_tf_dataset:
        outputs = runner.model(batch, training=False)
        errors_for_batch = test_dataset.scenario.classifier_distance(
            outputs, batch)
        if all_errors is not None:
            all_errors = tf.concat([all_errors, errors_for_batch], axis=0)
        else:
            all_errors = errors_for_batch
    print(f"90th percentile {np.percentile(all_errors.numpy(), 90)}")
    print(f"95th percentile {np.percentile(all_errors.numpy(), 95)}")
    print(f"99th percentile {np.percentile(all_errors.numpy(), 99)}")
    print(f"max {np.max(all_errors.numpy())}")
コード例 #6
0
def setup_training_paths(checkpoint, ensemble_idx, log, model_hparams,
                         trials_directory):
    trial_path = None
    checkpoint_name = None
    if checkpoint:
        trial_path = checkpoint.parent.absolute()
        checkpoint_name = checkpoint.name
    group_name = log if trial_path is None else None
    if ensemble_idx is not None:
        group_name = f"{group_name}_{ensemble_idx}"
    trial_path, _ = filepath_tools.create_or_load_trial(
        group_name=group_name,
        params=model_hparams,
        trial_path=trial_path,
        trials_directory=trials_directory,
        write_summary=False)
    return checkpoint_name, trial_path
コード例 #7
0
def compute_classifier_threshold(
    dataset_dirs: List[pathlib.Path],
    checkpoint: pathlib.Path,
    mode: str,
    batch_size: int,
    use_gt_rope: bool,
):
    test_dataset = DynamicsDatasetLoader(dataset_dirs, use_gt_rope=use_gt_rope)

    trials_directory = pathlib.Path('trials').absolute()
    trial_path = checkpoint.parent.absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    model = state_space_dynamics.get_model(params['model_class'])
    net = model(hparams=params,
                batch_size=batch_size,
                scenario=test_dataset.scenario)

    runner = ModelRunner(model=net,
                         training=False,
                         checkpoint=checkpoint,
                         batch_metadata=test_dataset.batch_metadata,
                         trial_path=trial_path,
                         params=params)

    test_tf_dataset = test_dataset.get_datasets(mode=mode)
    test_tf_dataset = batch_tf_dataset(test_tf_dataset,
                                       batch_size,
                                       drop_remainder=True)

    all_errors = None
    for batch in test_tf_dataset:
        outputs = runner.model(batch, training=False)
        errors_for_batch = test_dataset.scenario.classifier_distance(
            batch, outputs)
        if all_errors is not None:
            all_errors = tf.concat([all_errors, errors_for_batch], axis=0)
        else:
            all_errors = errors_for_batch

    classifier_threshold = np.percentile(all_errors.numpy(), 90)
    rospy.loginfo(f"90th percentile {classifier_threshold}")
    return classifier_threshold
コード例 #8
0
def eval_main(
    dataset_dirs: List[pathlib.Path],
    checkpoint: pathlib.Path,
    mode: str,
    batch_size: int,
    **kwargs,
):
    ###############
    # Model
    ###############
    trial_path = checkpoint.parent.absolute()
    trials_directory = pathlib.Path('recovery_trials').absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    scenario = get_scenario(params['scenario'])
    net = NNRecoveryModel(hparams=params, scenario=scenario, batch_size=1)

    ###############
    # Dataset
    ###############
    test_dataset = RecoveryDatasetLoader(dataset_dirs)
    test_tf_dataset = test_dataset.get_datasets(mode=mode)

    ###############
    # Evaluate
    ###############
    test_tf_dataset = batch_tf_dataset(test_tf_dataset,
                                       batch_size,
                                       drop_remainder=True)

    runner = ModelRunner(model=net,
                         training=False,
                         params=params,
                         checkpoint=checkpoint,
                         trial_path=trial_path,
                         batch_metadata=test_dataset.batch_metadata)
    validation_metrics = runner.val_epoch(test_tf_dataset)
    for name, value in validation_metrics.items():
        print(f"{name}: {value:.3f}")
def viz_main(dataset_dirs: List[pathlib.Path],
             checkpoint: pathlib.Path,
             mode: str,
             batch_size: int,
             only_errors: bool,
             use_gt_rope: bool,
             old_compat: bool = False,
             **kwargs):
    stdev_pub_ = rospy.Publisher("stdev", Float32, queue_size=10)
    traj_idx_pub_ = rospy.Publisher("traj_idx_viz", Float32, queue_size=10)

    ###############
    # Model
    ###############
    trials_directory = pathlib.Path('trials').absolute()
    trial_path = checkpoint.parent.absolute()
    _, params = filepath_tools.create_or_load_trial(
        trial_path=trial_path, trials_directory=trials_directory)
    model_class = link_bot_classifiers.get_model(params['model_class'])

    ###############
    # Dataset
    ###############
    dataset = ClassifierDatasetLoader(
        dataset_dirs,
        load_true_states=True,
        use_gt_rope=use_gt_rope,
        threshold=params['classifier_dataset_hparams']['labeling_params']
        ['threshold'],
        old_compat=old_compat)
    model = model_class(hparams=params,
                        batch_size=batch_size,
                        scenario=dataset.scenario)
    tf_dataset = dataset.get_datasets(mode=mode)
    scenario = dataset.scenario

    ###############
    # Evaluate
    ###############
    tf_dataset = batch_tf_dataset(tf_dataset, batch_size, drop_remainder=True)

    model = classifier_utils.load_generic_model([checkpoint])

    for batch_idx, example in enumerate(
            progressbar(tf_dataset, widgets=base_dataset.widgets)):
        example.update(dataset.batch_metadata)
        predictions, _ = model.check_constraint_from_example(example,
                                                             training=False)

        labels = tf.expand_dims(example['is_close'][:, 1:], axis=2)

        probabilities = predictions['probabilities']

        # Visualization
        example.pop("time")
        example.pop("batch_size")
        decisions = probabilities > 0.5
        classifier_is_correct = tf.squeeze(tf.equal(decisions,
                                                    tf.cast(labels, tf.bool)),
                                           axis=-1)
        for b in range(batch_size):
            example_b = index_dict_of_batched_tensors_tf(example, b)

            # if the classifier is correct at all time steps, ignore
            if only_errors and tf.reduce_all(classifier_is_correct[b]):
                continue

            def _custom_viz_t(scenario: Base3DScenario, e: Dict, t: int):
                if t > 0:
                    accept_probability_t = predictions['probabilities'][
                        b, t - 1, 0].numpy()
                else:
                    accept_probability_t = -999
                scenario.plot_accept_probability(accept_probability_t)

                traj_idx_msg = Float32()
                traj_idx_msg.data = batch_idx * batch_size + b
                traj_idx_pub_.publish(traj_idx_msg)

            anim = RvizAnimation(scenario=scenario,
                                 n_time_steps=dataset.horizon,
                                 init_funcs=[
                                     init_viz_env,
                                     dataset.init_viz_action(),
                                 ],
                                 t_funcs=[
                                     _custom_viz_t,
                                     dataset.classifier_transition_viz_t(),
                                     ExperimentScenario.plot_stdev_t,
                                 ])
            with open("debugging.hjson", 'w') as f:
                my_hdump(numpify(example_b), f)
            anim.play(example_b)
コード例 #10
0
def train_main(
    dataset_dirs: List[pathlib.Path],
    model_hparams: pathlib.Path,
    classifier_checkpoint: pathlib.Path,
    log: str,
    batch_size: int,
    epochs: int,
    seed: int,
    checkpoint: Optional[pathlib.Path] = None,
    ensemble_idx: Optional[int] = None,
    trials_directory=pathlib.Path,
    **kwargs,
):
    ###############
    # Datasets
    ###############
    train_dataset = RecoveryDatasetLoader(dataset_dirs)
    val_dataset = RecoveryDatasetLoader(dataset_dirs)

    ###############
    # Model
    ###############
    model_hparams = json.load((model_hparams).open('r'))
    model_hparams['recovery_dataset_hparams'] = train_dataset.hparams
    model_hparams['batch_size'] = batch_size
    model_hparams['seed'] = seed
    model_hparams['datasets'] = paths_to_json(dataset_dirs)
    model_hparams['latest_training_time'] = int(time.time())
    scenario = get_scenario(model_hparams['scenario'])

    # Dataset preprocessing
    train_tf_dataset = train_dataset.get_datasets(mode='train')
    val_tf_dataset = val_dataset.get_datasets(mode='val')

    train_tf_dataset = batch_tf_dataset(train_tf_dataset,
                                        batch_size,
                                        drop_remainder=True)
    val_tf_dataset = batch_tf_dataset(val_tf_dataset,
                                      batch_size,
                                      drop_remainder=True)

    train_tf_dataset = train_tf_dataset.shuffle(buffer_size=512, seed=seed)

    train_tf_dataset = train_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)
    val_tf_dataset = val_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE)

    model = NNRecoveryModel(hparams=model_hparams,
                            scenario=scenario,
                            batch_size=batch_size)

    ############
    # Initialize weights from classifier model by "restoring" from checkpoint
    ############
    if not checkpoint:
        # load in the weights for the conv & dense layers of the classifier's encoder, skip the last few layers
        classifier_model = tf.train.Checkpoint(conv_layers=model.conv_layers)
        classifier_root = tf.train.Checkpoint(model=classifier_model)
        classifier_checkpoint_manager = tf.train.CheckpointManager(
            classifier_root, classifier_checkpoint.as_posix(), max_to_keep=1)

        status = classifier_root.restore(
            classifier_checkpoint_manager.latest_checkpoint)
        status.expect_partial()
        status.assert_existing_objects_matched()
        assert classifier_checkpoint_manager.latest_checkpoint is not None
        print(Fore.MAGENTA + "Restored {}".format(
            classifier_checkpoint_manager.latest_checkpoint) + Fore.RESET)
    ############

    trial_path = None
    checkpoint_name = None
    if checkpoint:
        trial_path = checkpoint.parent.absolute()
        checkpoint_name = checkpoint.name
    trials_directory = pathlib.Path('recovery_trials').absolute()
    group_name = log if trial_path is None else None
    trial_path, _ = filepath_tools.create_or_load_trial(
        group_name=group_name,
        params=model_hparams,
        trial_path=trial_path,
        trials_directory=trials_directory,
        write_summary=False)
    runner = ModelRunner(model=model,
                         training=True,
                         params=model_hparams,
                         trial_path=trial_path,
                         val_every_n_batches=1,
                         mid_epoch_val_batches=100,
                         validate_first=True,
                         checkpoint=checkpoint,
                         batch_metadata=train_dataset.batch_metadata)

    # Train
    runner.train(train_tf_dataset, val_tf_dataset, num_epochs=epochs)

    return trial_path
コード例 #11
0
    def __init__(self,
                 training,
                 group_name=None,
                 trial_path=None,
                 params=None,
                 write_summary=True,
                 exists_required=False):
        """
        @type training: bool
        @param training: 
        @param group_name: 
        @param trial_path: 
        @param params: 
        @param write_summary:
        @param exists_required: If True, will fail if checkpoint does not already exist
        """
        self.side_length = 64
        self.num_voxels = self.side_length**3
        self.training = training

        self.trial_path, self.params = filepath_tools.create_or_load_trial(
            group_name=group_name,
            params=params,
            trial_path=trial_path,
            write_summary=write_summary)
        self.exists_required = exists_required
        self.group_name = self.trial_path.parts[-2]

        self.batch_size = 1 if not self.training else params['batch_size']

        self.train_summary_writer = tf.summary.create_file_writer(
            (self.trial_path / "logs/train").as_posix())
        self.test_summary_writer = tf.summary.create_file_writer(
            (self.trial_path / "logs/test").as_posix())

        if self.params['network'] == 'VoxelCNN':
            self.model = VoxelCNN(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'AutoEncoder':
            self.model = AutoEncoder(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'VAE':
            self.model = VAE(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'VAE_GAN':
            self.model = VAE_GAN(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'Augmented_VAE':
            self.model = Augmented_VAE(self.params, batch_size=self.batch_size)
        elif self.params['network'] == 'Conditional_VCNN':
            self.model = ConditionalVCNN(self.params,
                                         batch_size=self.batch_size)
        elif self.params['network'] == 'AE_VCNN':
            self.model = AE_VCNN(self.params, batch_size=self.batch_size)
        elif self.params['network'] == "RealNVP":
            self.model = RealNVP(hparams=self.params,
                                 batch_size=self.batch_size,
                                 training=training)
        elif self.params['network'] == "PSSNet" or \
                self.params['network'] == "NormalizingAE":  # NormalizingAE was legacy name
            self.model = PSSNet(self.params, batch_size=self.batch_size)
            self.model.flow = ModelRunner(training=False,
                                          trial_path=self.params['flow'],
                                          exists_required=True).model.flow
        elif self.params['network'] == "3D_rec_gan":
            self.model = ThreeD_rec_gan(self.params,
                                        batch_size=self.batch_size)
        else:
            raise Exception('Unknown Model Type')

        self.num_batches = None

        self.latest_ckpt = tf.train.Checkpoint(step=tf.Variable(1),
                                               epoch=tf.Variable(0),
                                               train_time=tf.Variable(0.0),
                                               net=self.model)
        self.latest_checkpoint_path = self.trial_path / "latest_checkpoints/"
        self.latest_checkpoint_manager = tf.train.CheckpointManager(
            self.latest_ckpt,
            self.latest_checkpoint_path.as_posix(),
            max_to_keep=1)
        self.num_batches = None

        self.best_ckpt = tf.train.Checkpoint(step=tf.Variable(1),
                                             epoch=tf.Variable(0),
                                             train_time=tf.Variable(0.0),
                                             best_key_metric_value=tf.Variable(
                                                 10e10, dtype=tf.float32),
                                             net=self.model)
        self.best_checkpoint_path = self.trial_path / "best_checkpoint/"
        self.best_checkpoint_manager = tf.train.CheckpointManager(
            self.best_ckpt,
            self.best_checkpoint_path.as_posix(),
            max_to_keep=1)
        self.restore()