def pre_invariant_preprocessor(use_tsf: UseTsf) -> preprocess.Transformer: if use_tsf is UseTsf.COORD: return preprocess.PytorchTransformer( preprocess.NullSingleTransformer()) elif use_tsf is UseTsf.FEEDFORWARD_BASELINE: return util.no_tsf_preprocessor() else: return preprocess.PytorchTransformer( preprocess.NullSingleTransformer(), preprocess.RobustMinMaxScaler())
def pre_invariant_preprocessor(use_tsf: UseTsf) -> preprocess.Transformer: if use_tsf is UseTsf.COORD: return preprocess.PytorchTransformer( preprocess.NullSingleTransformer()) elif use_tsf in [UseTsf.SKIP, UseTsf.REX_SKIP]: # normalize position and force dimensions separately using shared scales return preprocess.PytorchTransformer( preprocess.RobustMinMaxScaler( dims_share_scale=[[0, 1], [3, 4]]), preprocess.RobustMinMaxScaler( dims_share_scale=[[0, 1], [3, 4]])) else: return preprocess.PytorchTransformer( preprocess.NullSingleTransformer(), preprocess.RobustMinMaxScaler( dims_share_scale=[[0, 1], [3, 4]]))
def update_ds_with_transform(env, ds, use_tsf, get_pre_invariant_preprocessor, evaluate_transform=True, rep_name=None): invariant_tsf = get_transform(env, ds, use_tsf, override_name=rep_name) if invariant_tsf: # load transform (only 1 function for learning transform reduces potential for different learning params) if use_tsf is not UseTsf.COORD and not invariant_tsf.load(invariant_tsf.get_last_checkpoint()): raise RuntimeError("Transform {} should be learned before using".format(invariant_tsf.name)) if evaluate_transform: losses = invariant_tsf.evaluate_validation(None) logger.info("tsf on validation %s", " ".join( ["{} {:.5f}".format(name, loss.mean().cpu().item()) if loss is not None else "" for name, loss in zip(invariant_tsf.loss_names(), losses)])) components = [get_pre_invariant_preprocessor(use_tsf), invariant.InvariantTransformer(invariant_tsf)] if use_tsf not in [UseTsf.SKIP, UseTsf.REX_SKIP]: components.append(preprocess.PytorchTransformer(preprocess.RobustMinMaxScaler())) preprocessor = preprocess.Compose(components) else: preprocessor = no_tsf_preprocessor() # update the datasource to use transformed data untransformed_config = ds.update_preprocessor(preprocessor) tsf_name = use_tsf.name if rep_name is not None: tsf_name = "{}_{}".format(tsf_name, rep_name) return untransformed_config, tsf_name, preprocessor
BOOT_STRAP_ITER) new_data = np.zeros((BOOT_STRAP_ITER, nx + nu)) for i in range(BOOT_STRAP_ITER): pre_action_state = env.state action = np.random.uniform(low=ACTION_LOW, high=ACTION_HIGH) env.step([action]) # env.render() new_data[i, :nx] = pre_action_state new_data[i, nx:] = action fill_dataset(new_data) logger.info("bootstrapping finished") # TODO directly making the change in state into angular representation is wrong preprocessor = preprocess.PytorchTransformer( preprocess.AngleToCosSinRepresentation(0), preprocess.AngleToCosSinRepresentation(0)) untransformed_config = ds.update_preprocessor(preprocessor) # pm = prior.GMMPrior.from_data(ds) # pm = prior.LSQPrior.from_data(ds) mw = model.NetworkModelWrapper( model.DeterministicUser( make.make_sequential_network(config, activation_factory=torch.nn.Tanh, h_units=(16, 16)).to(device=d)), ds) pm = prior.NNPrior.from_data(mw, train_epochs=0) # linearizable_dynamics = online_model.OnlineDynamicsModel(0.1, pm, ds, sigreg=1e-10) online_dynamics = online_model.OnlineLinearizeMixing( 0.1, pm,
def pre_invariant_preprocessor(use_tsf: UseTsf) -> preprocess.Transformer: return preprocess.PytorchTransformer( preprocess.MinMaxScaler(), preprocess.NullSingleTransformer())
def no_tsf_preprocessor(): return preprocess.PytorchTransformer(preprocess.RobustMinMaxScaler())