def setup_datasets(model_hparams, batch_size, seed, train_dataset, val_dataset, take): # Dataset preprocessing train_tf_dataset = train_dataset.get_datasets(mode='train', shuffle_files=True, take=take) val_tf_dataset = val_dataset.get_datasets(mode='val', shuffle_files=True, take=take) train_tf_dataset = train_tf_dataset.shuffle( model_hparams['shuffle_buffer_size'], reshuffle_each_iteration=True) # rospy.logerr_once("NOT BALANCING!") train_tf_dataset = balance(train_tf_dataset) val_tf_dataset = balance(val_tf_dataset) train_tf_dataset = batch_tf_dataset(train_tf_dataset, batch_size, drop_remainder=True) val_tf_dataset = batch_tf_dataset(val_tf_dataset, batch_size, drop_remainder=True) train_tf_dataset = train_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE) val_tf_dataset = val_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE) return train_tf_dataset, val_tf_dataset
def setup_datasets(model_hparams, batch_size, seed, train_dataset, val_dataset, take): # Dataset preprocessing train_tf_dataset = train_dataset.get_datasets(mode='train', take=take) val_tf_dataset = val_dataset.get_datasets(mode='val') # mix up examples before batching train_tf_dataset = train_tf_dataset.shuffle( model_hparams['shuffle_buffer_size']) train_tf_dataset = batch_tf_dataset(train_tf_dataset, batch_size, drop_remainder=True) val_tf_dataset = batch_tf_dataset(val_tf_dataset, batch_size, drop_remainder=True) train_tf_dataset = train_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE) val_tf_dataset = val_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE) return train_tf_dataset, val_tf_dataset
def generate_classifier_examples(fwd_model: BaseDynamicsFunction, tf_dataset: tf.data.Dataset, dataset: DynamicsDatasetLoader, labeling_params: Dict, batch_size: int): classifier_horizon = labeling_params['classifier_horizon'] assert classifier_horizon >= 2 tf_dataset = batch_tf_dataset(tf_dataset, batch_size, drop_remainder=False) sc = dataset.scenario for idx, _ in enumerate(tf_dataset): pass n_total_batches = idx t0 = perf_counter() for idx, example in enumerate(tf_dataset): dt = perf_counter() - t0 print(f"{idx} / {n_total_batches} batches in {dt:.3f} seconds") actual_batch_size = int(example['traj_idx'].shape[0]) valid_out_examples = [] for start_t in range(0, dataset.steps_per_traj - classifier_horizon + 1, labeling_params['start_step']): prediction_end_t = dataset.steps_per_traj actual_prediction_horizon = prediction_end_t - start_t actual_states_from_start_t = { k: example[k][:, start_t:prediction_end_t] for k in dataset.state_keys } actions_from_start_t = { k: example[k][:, start_t:prediction_end_t - 1] for k in dataset.action_keys } predictions_from_start_t, _ = fwd_model.propagate_differentiable_batched( environment={}, state=actual_states_from_start_t, actions=actions_from_start_t) prediction_actual = PredictionActualExample( example=example, actions=actions_from_start_t, actual_states=actual_states_from_start_t, predictions=predictions_from_start_t, start_t=start_t, labeling_params=labeling_params, actual_prediction_horizon=actual_prediction_horizon, batch_size=actual_batch_size) valid_out_examples_for_start_t = generate_classifier_examples_from_batch( sc, prediction_actual) valid_out_examples.extend(valid_out_examples_for_start_t) yield valid_out_examples
def eval_main(dataset_dirs: List[pathlib.Path], mode: str, batch_size: int, use_gt_rope: bool, threshold: Optional[float] = None, old_compat: bool = False, take: Optional[int] = None, checkpoint: Optional[pathlib.Path] = None, trials_directory=pathlib.Path, **kwargs): ############### # Model ############### trial_path = checkpoint.parent.absolute() _, params = filepath_tools.create_or_load_trial( trial_path=trial_path, trials_directory=trials_directory) model_class = link_bot_classifiers.get_model(params['model_class']) ############### # Dataset ############### dataset = ClassifierDatasetLoader(dataset_dirs, load_true_states=True, use_gt_rope=use_gt_rope, old_compat=old_compat, threshold=threshold) tf_dataset = dataset.get_datasets(mode=mode, take=take) tf_dataset = balance(tf_dataset) ############### # Evaluate ############### tf_dataset = batch_tf_dataset(tf_dataset, batch_size, drop_remainder=True) model = model_class(hparams=params, batch_size=batch_size, scenario=dataset.scenario) # This call to model runner restores the model runner = ModelRunner(model=model, training=False, params=params, checkpoint=checkpoint, trial_path=trial_path, key_metric=AccuracyMetric, batch_metadata=dataset.batch_metadata) metrics = runner.val_epoch(tf_dataset) for metric_name, metric_value in metrics.items(): print(f"{metric_name:30s}: {metric_value}") return metrics
def load_dataset_and_models(args): comparison_info = json.load(args.comparison.open("r")) models = {} for name, model_info in comparison_info.items(): model_dir = paths_from_json(model_info['model_dir']) model, _ = dynamics_utils.load_generic_model(model_dir) models[name] = model dataset = DynamicsDatasetLoader(args.dataset_dirs) tf_dataset = dataset.get_datasets(mode=args.mode, shard=args.shard, take=args.take) tf_dataset = batch_tf_dataset(tf_dataset, 1) return tf_dataset, dataset, models
def eval_main( dataset_dirs: List[pathlib.Path], checkpoint: pathlib.Path, mode: str, batch_size: int, use_gt_rope: bool, ): test_dataset = DynamicsDatasetLoader(dataset_dirs, use_gt_rope=use_gt_rope) trials_directory = pathlib.Path('trials').absolute() trial_path = checkpoint.parent.absolute() _, params = filepath_tools.create_or_load_trial( trial_path=trial_path, trials_directory=trials_directory) model = state_space_dynamics.get_model(params['model_class']) net = model(hparams=params, batch_size=batch_size, scenario=test_dataset.scenario) runner = ModelRunner(model=net, training=False, checkpoint=checkpoint, batch_metadata=test_dataset.batch_metadata, trial_path=trial_path, params=params) test_tf_dataset = test_dataset.get_datasets(mode=mode) test_tf_dataset = batch_tf_dataset(test_tf_dataset, batch_size, drop_remainder=True) validation_metrics = runner.val_epoch(test_tf_dataset) for name, value in validation_metrics.items(): print(f"{name}: {value}") # more metrics that can't be expressed as just an average over metrics on each batch all_errors = None for batch in test_tf_dataset: outputs = runner.model(batch, training=False) errors_for_batch = test_dataset.scenario.classifier_distance( outputs, batch) if all_errors is not None: all_errors = tf.concat([all_errors, errors_for_batch], axis=0) else: all_errors = errors_for_batch print(f"90th percentile {np.percentile(all_errors.numpy(), 90)}") print(f"95th percentile {np.percentile(all_errors.numpy(), 95)}") print(f"99th percentile {np.percentile(all_errors.numpy(), 99)}") print(f"max {np.max(all_errors.numpy())}")
def viz_main(args): dataset_dirs = args.dataset_dirs checkpoint = args.checkpoint trial_path, params = load_trial(checkpoint.parent.absolute()) dataset = DynamicsDatasetLoader(dataset_dirs) scenario = dataset.scenario tf_dataset = dataset.get_datasets(mode='val') tf_dataset = batch_tf_dataset(tf_dataset, batch_size=1, drop_remainder=True) model = CFM(hparams=params, batch_size=1, scenario=scenario) ckpt = tf.train.Checkpoint(model=model) manager = tf.train.CheckpointManager(ckpt, args.checkpoint, max_to_keep=1) status = ckpt.restore(manager.latest_checkpoint).expect_partial() if manager.latest_checkpoint: print(Fore.CYAN + "Restored from {}".format(manager.latest_checkpoint)) status.assert_existing_objects_matched() else: raise RuntimeError("Failed to restore!!!") for example_idx, example in enumerate(tf_dataset): stepper = RvizAnimationController(n_time_steps=dataset.steps_per_traj) for t in range(dataset.steps_per_traj): output = model( model.preprocess_no_gradient(example, training=False)) actual_t = numpify( remove_batch(scenario.index_time_batched_predicted(example, t))) action_t = numpify( remove_batch(scenario.index_time_batched_predicted(example, t))) scenario.plot_state_rviz(actual_t, label='actual', color='red') scenario.plot_action_rviz(actual_t, action_t, color='gray') prediction_t = remove_batch( scenario.index_time_batched_predicted(output, t)) scenario.plot_state_rviz(prediction_t, label='predicted', color='blue') stepper.step()
def compute_classifier_threshold( dataset_dirs: List[pathlib.Path], checkpoint: pathlib.Path, mode: str, batch_size: int, use_gt_rope: bool, ): test_dataset = DynamicsDatasetLoader(dataset_dirs, use_gt_rope=use_gt_rope) trials_directory = pathlib.Path('trials').absolute() trial_path = checkpoint.parent.absolute() _, params = filepath_tools.create_or_load_trial( trial_path=trial_path, trials_directory=trials_directory) model = state_space_dynamics.get_model(params['model_class']) net = model(hparams=params, batch_size=batch_size, scenario=test_dataset.scenario) runner = ModelRunner(model=net, training=False, checkpoint=checkpoint, batch_metadata=test_dataset.batch_metadata, trial_path=trial_path, params=params) test_tf_dataset = test_dataset.get_datasets(mode=mode) test_tf_dataset = batch_tf_dataset(test_tf_dataset, batch_size, drop_remainder=True) all_errors = None for batch in test_tf_dataset: outputs = runner.model(batch, training=False) errors_for_batch = test_dataset.scenario.classifier_distance( batch, outputs) if all_errors is not None: all_errors = tf.concat([all_errors, errors_for_batch], axis=0) else: all_errors = errors_for_batch classifier_threshold = np.percentile(all_errors.numpy(), 90) rospy.loginfo(f"90th percentile {classifier_threshold}") return classifier_threshold
def viz_dataset( dataset_dirs: List[pathlib.Path], checkpoint: pathlib.Path, mode: str, viz_func: Callable, use_gt_rope: bool, **kwargs, ): test_dataset = DynamicsDatasetLoader(dataset_dirs, use_gt_rope=use_gt_rope) test_tf_dataset = test_dataset.get_datasets(mode=mode) test_tf_dataset = batch_tf_dataset(test_tf_dataset, 1, drop_remainder=True) model, _ = dynamics_utils.load_generic_model([checkpoint]) for i, batch in enumerate(test_tf_dataset): batch.update(test_dataset.batch_metadata) outputs, _ = model.from_example(batch, training=False) viz_func(batch, outputs, test_dataset, model)
def eval_main( dataset_dirs: List[pathlib.Path], checkpoint: pathlib.Path, mode: str, batch_size: int, **kwargs, ): ############### # Model ############### trial_path = checkpoint.parent.absolute() trials_directory = pathlib.Path('recovery_trials').absolute() _, params = filepath_tools.create_or_load_trial( trial_path=trial_path, trials_directory=trials_directory) scenario = get_scenario(params['scenario']) net = NNRecoveryModel(hparams=params, scenario=scenario, batch_size=1) ############### # Dataset ############### test_dataset = RecoveryDatasetLoader(dataset_dirs) test_tf_dataset = test_dataset.get_datasets(mode=mode) ############### # Evaluate ############### test_tf_dataset = batch_tf_dataset(test_tf_dataset, batch_size, drop_remainder=True) runner = ModelRunner(model=net, training=False, params=params, checkpoint=checkpoint, trial_path=trial_path, batch_metadata=test_dataset.batch_metadata) validation_metrics = runner.val_epoch(test_tf_dataset) for name, value in validation_metrics.items(): print(f"{name}: {value:.3f}")
def viz_ensemble_main(dataset_dir: pathlib.Path, checkpoints: List[pathlib.Path], mode: str, batch_size: int, only_errors: bool, use_gt_rope: bool, **kwargs): dynamics_stdev_pub_ = rospy.Publisher("dynamics_stdev", Float32, queue_size=10) classifier_stdev_pub_ = rospy.Publisher("classifier_stdev", Float32, queue_size=10) accept_probability_pub_ = rospy.Publisher("accept_probability_viz", Float32, queue_size=10) traj_idx_pub_ = rospy.Publisher("traj_idx_viz", Float32, queue_size=10) ############### # Model ############### model = load_generic_model(checkpoints) ############### # Dataset ############### test_dataset = ClassifierDatasetLoader([dataset_dir], load_true_states=True, use_gt_rope=use_gt_rope) test_tf_dataset = test_dataset.get_datasets(mode=mode) test_tf_dataset = batch_tf_dataset(test_tf_dataset, batch_size, drop_remainder=True) scenario = test_dataset.scenario ############### # Evaluate ############### # Iterate over test set all_accuracies_over_time = [] all_stdevs = [] all_labels = [] classifier_ensemble_stdevs = [] for batch_idx, test_batch in enumerate(test_tf_dataset): test_batch.update(test_dataset.batch_metadata) mean_predictions, stdev_predictions = model.check_constraint_from_example( test_batch) mean_probabilities = mean_predictions['probabilities'] stdev_probabilities = stdev_predictions['probabilities'] labels = tf.expand_dims(test_batch['is_close'][:, 1:], axis=2) all_labels = tf.concat( (all_labels, tf.reshape(test_batch['is_close'][:, 1:], [-1])), axis=0) all_stdevs = tf.concat( (all_stdevs, tf.reshape(test_batch[add_predicted('stdev')], [-1])), axis=0) accuracy_over_time = tf.keras.metrics.binary_accuracy( y_true=labels, y_pred=mean_probabilities) all_accuracies_over_time.append(accuracy_over_time) # Visualization test_batch.pop("time") test_batch.pop("batch_size") decisions = mean_probabilities > 0.5 classifier_is_correct = tf.squeeze(tf.equal(decisions, tf.cast(labels, tf.bool)), axis=-1) for b in range(batch_size): example = index_dict_of_batched_tensors_tf(test_batch, b) classifier_ensemble_stdev = stdev_probabilities[b].numpy().squeeze( ) classifier_ensemble_stdevs.append(classifier_ensemble_stdev) # if the classifier is correct at all time steps, ignore if only_errors and tf.reduce_all(classifier_is_correct[b]): continue # if only_collision predicted_rope_states = tf.reshape( example[add_predicted('link_bot')][1], [-1, 3]) xs = predicted_rope_states[:, 0] ys = predicted_rope_states[:, 1] zs = predicted_rope_states[:, 2] in_collision = bool( batch_in_collision_tf_3d(environment=example, xs=xs, ys=ys, zs=zs, inflate_radius_m=0)[0].numpy()) label = bool(example['is_close'][1].numpy()) accept = decisions[b, 0, 0].numpy() # if not (in_collision and accept): # continue scenario.plot_environment_rviz(example) stdev_probabilities[b].numpy().squeeze() classifier_stdev_msg = Float32() classifier_stdev_msg.data = stdev_probabilities[b].numpy().squeeze( ) classifier_stdev_pub_.publish(classifier_stdev_msg) actual_0 = scenario.index_state_time(example, 0) actual_1 = scenario.index_state_time(example, 1) pred_0 = scenario.index_predicted_state_time(example, 0) pred_1 = scenario.index_predicted_state_time(example, 1) action = scenario.index_action_time(example, 0) label = example['is_close'][1] scenario.plot_state_rviz(actual_0, label='actual', color='#FF0000AA', idx=0) scenario.plot_state_rviz(actual_1, label='actual', color='#E00016AA', idx=1) scenario.plot_state_rviz(pred_0, label='predicted', color='#0000FFAA', idx=0) scenario.plot_state_rviz(pred_1, label='predicted', color='#0553FAAA', idx=1) scenario.plot_action_rviz(pred_0, action) scenario.plot_is_close(label) dynamics_stdev_t = example[add_predicted('stdev')][1, 0].numpy() dynamics_stdev_msg = Float32() dynamics_stdev_msg.data = dynamics_stdev_t dynamics_stdev_pub_.publish(dynamics_stdev_msg) accept_probability_t = mean_probabilities[b, 0, 0].numpy() accept_probability_msg = Float32() accept_probability_msg.data = accept_probability_t accept_probability_pub_.publish(accept_probability_msg) traj_idx_msg = Float32() traj_idx_msg.data = batch_idx * batch_size + b traj_idx_pub_.publish(traj_idx_msg) # stepper = RvizSimpleStepper() # stepper.step() print(np.mean(classifier_ensemble_stdevs)) all_accuracies_over_time = tf.concat(all_accuracies_over_time, axis=0) mean_accuracies_over_time = tf.reduce_mean(all_accuracies_over_time, axis=0) std_accuracies_over_time = tf.math.reduce_std(all_accuracies_over_time, axis=0) mean_classifier_ensemble_stdev = tf.reduce_mean(classifier_ensemble_stdevs) print(mean_classifier_ensemble_stdev)
def viz_main(dataset_dirs: List[pathlib.Path], checkpoint: pathlib.Path, mode: str, batch_size: int, only_errors: bool, use_gt_rope: bool, old_compat: bool = False, **kwargs): stdev_pub_ = rospy.Publisher("stdev", Float32, queue_size=10) traj_idx_pub_ = rospy.Publisher("traj_idx_viz", Float32, queue_size=10) ############### # Model ############### trials_directory = pathlib.Path('trials').absolute() trial_path = checkpoint.parent.absolute() _, params = filepath_tools.create_or_load_trial( trial_path=trial_path, trials_directory=trials_directory) model_class = link_bot_classifiers.get_model(params['model_class']) ############### # Dataset ############### dataset = ClassifierDatasetLoader( dataset_dirs, load_true_states=True, use_gt_rope=use_gt_rope, threshold=params['classifier_dataset_hparams']['labeling_params'] ['threshold'], old_compat=old_compat) model = model_class(hparams=params, batch_size=batch_size, scenario=dataset.scenario) tf_dataset = dataset.get_datasets(mode=mode) scenario = dataset.scenario ############### # Evaluate ############### tf_dataset = batch_tf_dataset(tf_dataset, batch_size, drop_remainder=True) model = classifier_utils.load_generic_model([checkpoint]) for batch_idx, example in enumerate( progressbar(tf_dataset, widgets=base_dataset.widgets)): example.update(dataset.batch_metadata) predictions, _ = model.check_constraint_from_example(example, training=False) labels = tf.expand_dims(example['is_close'][:, 1:], axis=2) probabilities = predictions['probabilities'] # Visualization example.pop("time") example.pop("batch_size") decisions = probabilities > 0.5 classifier_is_correct = tf.squeeze(tf.equal(decisions, tf.cast(labels, tf.bool)), axis=-1) for b in range(batch_size): example_b = index_dict_of_batched_tensors_tf(example, b) # if the classifier is correct at all time steps, ignore if only_errors and tf.reduce_all(classifier_is_correct[b]): continue def _custom_viz_t(scenario: Base3DScenario, e: Dict, t: int): if t > 0: accept_probability_t = predictions['probabilities'][ b, t - 1, 0].numpy() else: accept_probability_t = -999 scenario.plot_accept_probability(accept_probability_t) traj_idx_msg = Float32() traj_idx_msg.data = batch_idx * batch_size + b traj_idx_pub_.publish(traj_idx_msg) anim = RvizAnimation(scenario=scenario, n_time_steps=dataset.horizon, init_funcs=[ init_viz_env, dataset.init_viz_action(), ], t_funcs=[ _custom_viz_t, dataset.classifier_transition_viz_t(), ExperimentScenario.plot_stdev_t, ]) with open("debugging.hjson", 'w') as f: my_hdump(numpify(example_b), f) anim.play(example_b)
def train_main( dataset_dirs: List[pathlib.Path], model_hparams: pathlib.Path, classifier_checkpoint: pathlib.Path, log: str, batch_size: int, epochs: int, seed: int, checkpoint: Optional[pathlib.Path] = None, ensemble_idx: Optional[int] = None, trials_directory=pathlib.Path, **kwargs, ): ############### # Datasets ############### train_dataset = RecoveryDatasetLoader(dataset_dirs) val_dataset = RecoveryDatasetLoader(dataset_dirs) ############### # Model ############### model_hparams = json.load((model_hparams).open('r')) model_hparams['recovery_dataset_hparams'] = train_dataset.hparams model_hparams['batch_size'] = batch_size model_hparams['seed'] = seed model_hparams['datasets'] = paths_to_json(dataset_dirs) model_hparams['latest_training_time'] = int(time.time()) scenario = get_scenario(model_hparams['scenario']) # Dataset preprocessing train_tf_dataset = train_dataset.get_datasets(mode='train') val_tf_dataset = val_dataset.get_datasets(mode='val') train_tf_dataset = batch_tf_dataset(train_tf_dataset, batch_size, drop_remainder=True) val_tf_dataset = batch_tf_dataset(val_tf_dataset, batch_size, drop_remainder=True) train_tf_dataset = train_tf_dataset.shuffle(buffer_size=512, seed=seed) train_tf_dataset = train_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE) val_tf_dataset = val_tf_dataset.prefetch(tf.data.experimental.AUTOTUNE) model = NNRecoveryModel(hparams=model_hparams, scenario=scenario, batch_size=batch_size) ############ # Initialize weights from classifier model by "restoring" from checkpoint ############ if not checkpoint: # load in the weights for the conv & dense layers of the classifier's encoder, skip the last few layers classifier_model = tf.train.Checkpoint(conv_layers=model.conv_layers) classifier_root = tf.train.Checkpoint(model=classifier_model) classifier_checkpoint_manager = tf.train.CheckpointManager( classifier_root, classifier_checkpoint.as_posix(), max_to_keep=1) status = classifier_root.restore( classifier_checkpoint_manager.latest_checkpoint) status.expect_partial() status.assert_existing_objects_matched() assert classifier_checkpoint_manager.latest_checkpoint is not None print(Fore.MAGENTA + "Restored {}".format( classifier_checkpoint_manager.latest_checkpoint) + Fore.RESET) ############ trial_path = None checkpoint_name = None if checkpoint: trial_path = checkpoint.parent.absolute() checkpoint_name = checkpoint.name trials_directory = pathlib.Path('recovery_trials').absolute() group_name = log if trial_path is None else None trial_path, _ = filepath_tools.create_or_load_trial( group_name=group_name, params=model_hparams, trial_path=trial_path, trials_directory=trials_directory, write_summary=False) runner = ModelRunner(model=model, training=True, params=model_hparams, trial_path=trial_path, val_every_n_batches=1, mid_epoch_val_batches=100, validate_first=True, checkpoint=checkpoint, batch_metadata=train_dataset.batch_metadata) # Train runner.train(train_tf_dataset, val_tf_dataset, num_epochs=epochs) return trial_path