def _load_dynamics(self, samples_names, device):
        configfiles = self._get_configfiles(samples_names)
        envs = self._get_environments(configfiles)
        action_spaces = [env._get_action_space() for env in envs]
        observ_spaces = [env._get_state_space() for env in envs]
        nstacks = [configfile['nstack'] for configfile in configfiles]
        sthocastics = [configfile['sthocastic'] for configfile in configfiles]
        hidden_layers = [
            configfile['hidden_layers'] for configfile in configfiles
        ]
        dynamics_list = [
            Dynamics(state_space.shape,
                     action_space.shape,
                     nstack,
                     sthocastic,
                     hlayers=hlayers)
            for state_space, action_space, nstack, sthocastic, hlayers in zip(
                observ_spaces, action_spaces, nstacks, sthocastics,
                hidden_layers)
        ]
        #set_trace()
        """ Loading Dynamics """
        checkpoints = [
            torch.load(os.path.join(sample_name, 'params_high.pkl'))
            for sample_name in samples_names
        ]

        for dynamics, checkpoint in zip(dynamics_list, checkpoints):
            dynamics.load_state_dict(checkpoint['model_state_dict'])
            dynamics.mean_input = checkpoint['mean_input']
            dynamics.std_input = checkpoint['std_input']
            dynamics.epsilon = checkpoint['epsilon']
            dynamics.to(device)

        return dynamics_list
Exemple #2
0
def sanity_check_path(fold, id_ex, ipath):
    set_trace()
    from mbrl.network import Dynamics
    from utils.sanity_check import SanityCheck
    import torch
    from utils.analize_dynamics import plot_error_map
    device      =   torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    path_name   =   compute_restore_file(fold, id_ex)
    
    assert path_name is not None, 'Not file of paths founded'

    paths           =   joblib.load(path_name)

    #index_start_pos =   63
    #index_start_pos =   27
    with open(os.path.join(fold, 'rolls'+id_ex+'/experiment_config.json'), 'r') as fp:
        config_experiment   =   json.load(fp)
    
    nstack          =   config_experiment['nstack']
    dt              =   config_experiment['dt']
    horizon         =   config_experiment['horizon']
    #index_start_pos =   21*(nstack-1) + 9
    max_path_length =   config_experiment['max_path_length']

    env_class       =   config_experiment['env_name']
    
    path    =   paths[ipath]

    state_sz     =   path['observation'].shape[1]//nstack
    action_sz    =   path['actions'].shape[1]//nstack
    
    dynamics            =   Dynamics((state_sz, ), (action_sz,), nstack, False)
    checkpoint  =   torch.load(fold +'/params_high.pkl')
    dynamics.load_state_dict(checkpoint['model_state_dict'])
    dynamics.mean_input =   checkpoint['mean_input']
    dynamics.std_input  =   checkpoint['std_input']
    dynamics.epsilon    =   checkpoint['epsilon']
    dynamics.to(device)
    
    set_trace()
    matrix  =   SanityCheck.get_errors_matrixes_from_path(path, action_sz, state_sz, horizon, nstack, dynamics, device)
    
    fig, axs = plt.subplots(2,1, figsize=(12, 8))

    fig, axs[1]    =   plot_error_map(matrix, 1250, _vmax=20, fig=fig, ax=axs[1])
    
    pos_otime       =    get_positions_otime(fold, id_ex, list_paths=[ipath])
    x_time          =   pos_otime[0]['x']
    t_              =   pos_otime[0]['t']
    axs[0].plot(t_, x_time)

    plt.show()
Exemple #3
0
        ]
        self.optimizer_ML = optim.Adam(params=self.dynamics_ML.parameters(),
                                       lr=self.inner_lr)

    def _copy_from_ML(self):
        [
            ft_net.load_state_dict(copy.deepcopy(
                self.dynamics_ML.state_dict())) for ft_net in self.ft_networks
        ]


if __name__ == "__main__":
    from mbrl.wrapped_env import QuadrotorEnv
    env = QuadrotorEnv(port=28001, reward_type='type1')
    state_shape = env.observation_space.shape
    action_shape = env.action_space.shape
    stack_n = 2
    sthocastic = False
    actfn = torch.tanh
    hlayers = [250, 250, 250]
    lr = 1e-4
    """ Meta learning parameters """
    M, K, N = 10, 10, 3
    set_trace()
    dynamics_ml = Dynamics(state_shape, action_shape, stack_n, sthocastic,
                           actfn, hlayers)

    mlearnerclass = MetaLearner(dynamics_ml, M, K, N, lr)

    mlearnerclass._copy_from_ML()
Exemple #4
0
env_ = env_class(port=32001,
                 reward_type=config['reward_type'],
                 fault_rotor=None)  # 28
#vecenv=ParallelVrepEnv(ports=[25001,28001], max_path_length=250, envClass=QuadrotorEnv)
vecenv = ParallelVrepEnv(ports=[30001, 31001],
                         max_path_length=config['max_path_length'],
                         envClass=env_class,
                         reward_type=config['reward_type'],
                         cripple_rotor_list=config['crippled_rotor'])
state_shape = env_class._get_state_space().shape
action_shape = env_class._get_action_space().shape
activation_function = DecodeActFunction(config['activation_function'])

dyn = Dynamics(state_shape,
               action_shape,
               stack_n=config['nstack'],
               sthocastic=config['sthocastic'],
               actfn=activation_function,
               hlayers=config['hidden_layers'])
dyn = dyn.to(device)

metalearner = MetaLearner(dyn, config['M_points'], config['K_points'],
                          config['N_class'], config['inner_lr'],
                          config['outer_lr'])

#optimizer           =   optim.Adam(lr=config['learning_rate'], params=dyn.parameters())

mpc_class = DecodeMPC(config['mpc'])
mpc = mpc_class(config['horizon'], config['candidates'], env_, dyn, device,
                config['discount'])

#trainer =   Trainer(dyn, config['batch_size'], config['n_epochs'], config['validation_percent'], config['learning_rate'], device, optimizer)
Exemple #5
0
from wrapper_quad.wrapper_vrep import VREPQuad

from IPython.core.debugger import set_trace

import torch

env_ = VREPQuad(port=27001)
vecenv = ParallelVrepEnv(ports=[25001, 26001],
                         num_rollouts=10,
                         max_path_length=250,
                         envClass=VREPQuad)

state_shape = env_.observation_space.shape
action_shape = env_.action_space.shape

dyn = Dynamics(state_shape, action_shape, stack_n=4, sthocastic=True)

x = torch.randn(88)
x = dyn(x)

print('--------- Creation of runner--------')

runner = Runner(vecenv, env_, dyn, None, 250, 100)

print('running...')

paths = runner.run(random=True)

set_trace()
print(x)
        "sthocastic": False,
        "hidden_layers": config_train['hidden_layers'],
        "crippled_rotor": config_train['crippled_rotor']
    }
    env_class = DecodeEnvironment(config['env_name'])
    env_ = env_class(port=28001,
                     reward_type=config['reward_type'],
                     fault_rotor=config['crippled_rotor'])
    state_shape = env_.observation_space.shape
    action_shape = env_.action_space.shape

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    """ Load dynamics """
    dynamics = Dynamics(state_shape,
                        action_shape,
                        stack_n=config['nstack'],
                        sthocastic=config['sthocastic'],
                        hlayers=config['hidden_layers'])
    rs = RandomShooter(config['horizon'], config['candidates'], env_, dynamics,
                       device, config['discount'])
    checkpoint = torch.load(os.path.join(restore_folder, 'params_high.pkl'))
    dynamics.load_state_dict(checkpoint['model_state_dict'])

    dynamics.mean_input = checkpoint['mean_input']
    dynamics.std_input = checkpoint['std_input']
    dynamics.epsilon = checkpoint['epsilon']

    dynamics.to(device)
    #set_trace()
    """ Send a Trajectory to follow"""
    trajectoryManager = Trajectory(config['max_path_length'], 2)
"""************************
    Objects for training
***************************
"""

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

env_class   =   DecodeEnvironment(config['env_name'])
env_ = env_class(port=27001, reward_type=config['reward_type'], fault_rotor=config['crippled_rotor']) # 28
#vecenv=ParallelVrepEnv(ports=[25001,28001], max_path_length=250, envClass=QuadrotorEnv)
vecenv=ParallelVrepEnv(ports=[19999, 20001,21001,22001], max_path_length=config['max_path_length'], envClass=env_class, reward_type=config['reward_type'], cripple_rotor=config['crippled_rotor'])
state_shape         =   env_.observation_space.shape
action_shape        =   env_.action_space.shape
activation_function =   DecodeActFunction(config['activation_function'])

dyn = Dynamics(state_shape, action_shape, stack_n=config['nstack'], sthocastic=config['sthocastic'], actfn=activation_function, hlayers=config['hidden_layers'])
dyn = dyn.to(device)

optimizer           =   optim.Adam(lr=config['learning_rate'], params=dyn.parameters())

mpc_class           =   DecodeMPC(config['mpc'])
mpc                 =   mpc_class(config['horizon'], config['candidates'], env_, dyn, device, config['discount'])

trainer =   Trainer(dyn, config['batch_size'], config['n_epochs'], config['validation_percent'], config['learning_rate'], device, optimizer)

print('--------- Creation of runner--------')

runner = Runner(vecenv, env_, dyn, mpc, config['max_path_length'], config['total_tsteps_per_run'])


assert not os.path.exists(save_path), 'Already this folder is busy, select other'
from mbrl.wrapped_env import QuadrotorEnv
from mbrl.mpc import RandomShooter

from IPython.core.debugger import set_trace

import torch

env_ = QuadrotorEnv(port=27001)
vecenv = ParallelVrepEnv(ports=[25001, 28001],
                         max_path_length=250,
                         envClass=QuadrotorEnv)

state_shape = env_.observation_space.shape
action_shape = env_.action_space.shape

dyn = Dynamics(state_shape, action_shape, stack_n=4, sthocastic=False)

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

dyn = dyn.to(device)

rs = RandomShooter(10, 1000, env_, dyn, device, 0.999)

print('--------- Creation of runner--------')

runner = Runner(vecenv, env_, dyn, rs, 250, 100)

print('running...')

paths = runner.run(random=False)