def _load_dynamics(self, samples_names, device): configfiles = self._get_configfiles(samples_names) envs = self._get_environments(configfiles) action_spaces = [env._get_action_space() for env in envs] observ_spaces = [env._get_state_space() for env in envs] nstacks = [configfile['nstack'] for configfile in configfiles] sthocastics = [configfile['sthocastic'] for configfile in configfiles] hidden_layers = [ configfile['hidden_layers'] for configfile in configfiles ] dynamics_list = [ Dynamics(state_space.shape, action_space.shape, nstack, sthocastic, hlayers=hlayers) for state_space, action_space, nstack, sthocastic, hlayers in zip( observ_spaces, action_spaces, nstacks, sthocastics, hidden_layers) ] #set_trace() """ Loading Dynamics """ checkpoints = [ torch.load(os.path.join(sample_name, 'params_high.pkl')) for sample_name in samples_names ] for dynamics, checkpoint in zip(dynamics_list, checkpoints): dynamics.load_state_dict(checkpoint['model_state_dict']) dynamics.mean_input = checkpoint['mean_input'] dynamics.std_input = checkpoint['std_input'] dynamics.epsilon = checkpoint['epsilon'] dynamics.to(device) return dynamics_list
def sanity_check_path(fold, id_ex, ipath): set_trace() from mbrl.network import Dynamics from utils.sanity_check import SanityCheck import torch from utils.analize_dynamics import plot_error_map device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') path_name = compute_restore_file(fold, id_ex) assert path_name is not None, 'Not file of paths founded' paths = joblib.load(path_name) #index_start_pos = 63 #index_start_pos = 27 with open(os.path.join(fold, 'rolls'+id_ex+'/experiment_config.json'), 'r') as fp: config_experiment = json.load(fp) nstack = config_experiment['nstack'] dt = config_experiment['dt'] horizon = config_experiment['horizon'] #index_start_pos = 21*(nstack-1) + 9 max_path_length = config_experiment['max_path_length'] env_class = config_experiment['env_name'] path = paths[ipath] state_sz = path['observation'].shape[1]//nstack action_sz = path['actions'].shape[1]//nstack dynamics = Dynamics((state_sz, ), (action_sz,), nstack, False) checkpoint = torch.load(fold +'/params_high.pkl') dynamics.load_state_dict(checkpoint['model_state_dict']) dynamics.mean_input = checkpoint['mean_input'] dynamics.std_input = checkpoint['std_input'] dynamics.epsilon = checkpoint['epsilon'] dynamics.to(device) set_trace() matrix = SanityCheck.get_errors_matrixes_from_path(path, action_sz, state_sz, horizon, nstack, dynamics, device) fig, axs = plt.subplots(2,1, figsize=(12, 8)) fig, axs[1] = plot_error_map(matrix, 1250, _vmax=20, fig=fig, ax=axs[1]) pos_otime = get_positions_otime(fold, id_ex, list_paths=[ipath]) x_time = pos_otime[0]['x'] t_ = pos_otime[0]['t'] axs[0].plot(t_, x_time) plt.show()
] self.optimizer_ML = optim.Adam(params=self.dynamics_ML.parameters(), lr=self.inner_lr) def _copy_from_ML(self): [ ft_net.load_state_dict(copy.deepcopy( self.dynamics_ML.state_dict())) for ft_net in self.ft_networks ] if __name__ == "__main__": from mbrl.wrapped_env import QuadrotorEnv env = QuadrotorEnv(port=28001, reward_type='type1') state_shape = env.observation_space.shape action_shape = env.action_space.shape stack_n = 2 sthocastic = False actfn = torch.tanh hlayers = [250, 250, 250] lr = 1e-4 """ Meta learning parameters """ M, K, N = 10, 10, 3 set_trace() dynamics_ml = Dynamics(state_shape, action_shape, stack_n, sthocastic, actfn, hlayers) mlearnerclass = MetaLearner(dynamics_ml, M, K, N, lr) mlearnerclass._copy_from_ML()
env_ = env_class(port=32001, reward_type=config['reward_type'], fault_rotor=None) # 28 #vecenv=ParallelVrepEnv(ports=[25001,28001], max_path_length=250, envClass=QuadrotorEnv) vecenv = ParallelVrepEnv(ports=[30001, 31001], max_path_length=config['max_path_length'], envClass=env_class, reward_type=config['reward_type'], cripple_rotor_list=config['crippled_rotor']) state_shape = env_class._get_state_space().shape action_shape = env_class._get_action_space().shape activation_function = DecodeActFunction(config['activation_function']) dyn = Dynamics(state_shape, action_shape, stack_n=config['nstack'], sthocastic=config['sthocastic'], actfn=activation_function, hlayers=config['hidden_layers']) dyn = dyn.to(device) metalearner = MetaLearner(dyn, config['M_points'], config['K_points'], config['N_class'], config['inner_lr'], config['outer_lr']) #optimizer = optim.Adam(lr=config['learning_rate'], params=dyn.parameters()) mpc_class = DecodeMPC(config['mpc']) mpc = mpc_class(config['horizon'], config['candidates'], env_, dyn, device, config['discount']) #trainer = Trainer(dyn, config['batch_size'], config['n_epochs'], config['validation_percent'], config['learning_rate'], device, optimizer)
from wrapper_quad.wrapper_vrep import VREPQuad from IPython.core.debugger import set_trace import torch env_ = VREPQuad(port=27001) vecenv = ParallelVrepEnv(ports=[25001, 26001], num_rollouts=10, max_path_length=250, envClass=VREPQuad) state_shape = env_.observation_space.shape action_shape = env_.action_space.shape dyn = Dynamics(state_shape, action_shape, stack_n=4, sthocastic=True) x = torch.randn(88) x = dyn(x) print('--------- Creation of runner--------') runner = Runner(vecenv, env_, dyn, None, 250, 100) print('running...') paths = runner.run(random=True) set_trace() print(x)
"sthocastic": False, "hidden_layers": config_train['hidden_layers'], "crippled_rotor": config_train['crippled_rotor'] } env_class = DecodeEnvironment(config['env_name']) env_ = env_class(port=28001, reward_type=config['reward_type'], fault_rotor=config['crippled_rotor']) state_shape = env_.observation_space.shape action_shape = env_.action_space.shape device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') """ Load dynamics """ dynamics = Dynamics(state_shape, action_shape, stack_n=config['nstack'], sthocastic=config['sthocastic'], hlayers=config['hidden_layers']) rs = RandomShooter(config['horizon'], config['candidates'], env_, dynamics, device, config['discount']) checkpoint = torch.load(os.path.join(restore_folder, 'params_high.pkl')) dynamics.load_state_dict(checkpoint['model_state_dict']) dynamics.mean_input = checkpoint['mean_input'] dynamics.std_input = checkpoint['std_input'] dynamics.epsilon = checkpoint['epsilon'] dynamics.to(device) #set_trace() """ Send a Trajectory to follow""" trajectoryManager = Trajectory(config['max_path_length'], 2)
"""************************ Objects for training *************************** """ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') env_class = DecodeEnvironment(config['env_name']) env_ = env_class(port=27001, reward_type=config['reward_type'], fault_rotor=config['crippled_rotor']) # 28 #vecenv=ParallelVrepEnv(ports=[25001,28001], max_path_length=250, envClass=QuadrotorEnv) vecenv=ParallelVrepEnv(ports=[19999, 20001,21001,22001], max_path_length=config['max_path_length'], envClass=env_class, reward_type=config['reward_type'], cripple_rotor=config['crippled_rotor']) state_shape = env_.observation_space.shape action_shape = env_.action_space.shape activation_function = DecodeActFunction(config['activation_function']) dyn = Dynamics(state_shape, action_shape, stack_n=config['nstack'], sthocastic=config['sthocastic'], actfn=activation_function, hlayers=config['hidden_layers']) dyn = dyn.to(device) optimizer = optim.Adam(lr=config['learning_rate'], params=dyn.parameters()) mpc_class = DecodeMPC(config['mpc']) mpc = mpc_class(config['horizon'], config['candidates'], env_, dyn, device, config['discount']) trainer = Trainer(dyn, config['batch_size'], config['n_epochs'], config['validation_percent'], config['learning_rate'], device, optimizer) print('--------- Creation of runner--------') runner = Runner(vecenv, env_, dyn, mpc, config['max_path_length'], config['total_tsteps_per_run']) assert not os.path.exists(save_path), 'Already this folder is busy, select other'
from mbrl.wrapped_env import QuadrotorEnv from mbrl.mpc import RandomShooter from IPython.core.debugger import set_trace import torch env_ = QuadrotorEnv(port=27001) vecenv = ParallelVrepEnv(ports=[25001, 28001], max_path_length=250, envClass=QuadrotorEnv) state_shape = env_.observation_space.shape action_shape = env_.action_space.shape dyn = Dynamics(state_shape, action_shape, stack_n=4, sthocastic=False) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') dyn = dyn.to(device) rs = RandomShooter(10, 1000, env_, dyn, device, 0.999) print('--------- Creation of runner--------') runner = Runner(vecenv, env_, dyn, rs, 250, 100) print('running...') paths = runner.run(random=False)