def main(): if torch.cuda.is_available(): print("CUDA available, proceeding with GPU...") device = torch.device("cuda") else: print("No GPU found, proceeding with CPU...") device = torch.device("cpu") env = gym.make(ENV_ID) raw_env = env.unwrapped np_random = raw_env.np_random observation_space = get_observation_space(env) n_inputs = observation_space.low.size + env.action_space.low.size n_outputs = observation_space.low.size model = MlpModel(n_inputs, n_outputs, hidden_units=(64, 32), np_random=np_random, device=device) model_path = Path(f'./out/{EXP_NAME}_model.pkl') # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl') data_path = Path(f'./out/{EXP_NAME}_data.pkl') do_train = True if model_path.exists(): print('Found existing model.') if OVERWRITE_EXISTING: print('Overwriting...') else: model = MlpModel.load(model_path, device) do_train = False else: print('Existing model not found.') if do_train: df = pd.read_pickle('../data/push_sphere_v0_details.pkl') episode_length = df['step'].max() + 1 n_episodes = df['episode'].max() + 1 episodes = [] for i in range(n_episodes): ep_df = df[df['episode'] == i] actions = np.array(ep_df['raw_action'].tolist()) states = np.array(ep_df['raw_obs'].tolist()) episodes.append((states, actions[1:])) if False: dataset = EnvDataset(env) dataset.generate(n_episodes, episode_length) episodes = dataset.data def epoch_callback(epoch, loss): print(epoch, loss) if epoch % 10 == 0: path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl') model.save(path) evaluate(MlpModel, path, 'FetchPushSphereDense-v1', strategy=push_strategy, strategy_period=episode_length) #evaluate(MlpModel, path, 'FetchReachDense-v1') print('Training...') losses = model.train(episodes, epochs=TRAINING_EPOCHS, batch_size=BATCH_SIZE, epoch_callback=epoch_callback, scale_data=True, shuffle_data=True) print('Saving model...') model.save(model_path) if VISUAL_TEST: print('Testing model...') reward_fn = RewardFunction.simplified_push_reward(env) controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random, reward_function=reward_fn) for e in range(2000): env.reset() controller.forget_history() for s in range(100): env.render() tic = datetime.now() obs = get_observations(env) action = controller.get_action(obs) _, rewards, dones, info = env.step(action) toc = datetime.now() print((toc - tic).total_seconds())
def main(): if torch.cuda.is_available(): print("CUDA available, proceeding with GPU...") device = torch.device("cuda") else: print("No GPU found, proceeding with CPU...") device = torch.device("cpu") env = gym.make(ENV_ID) raw_env = env.unwrapped np_random = raw_env.np_random observation_space = get_observation_space(env) n_inputs = observation_space.low.size + env.action_space.low.size n_outputs = observation_space.low.size model = MDN_Model(n_inputs, n_outputs, MDN_COMPONENTS, hidden_units=(20, ), np_random=np_random, device=device) model_path = Path(f'./out/{EXP_NAME}_model.pkl') # model_path = Path(f'./out/{EXP_NAME}_model_e10.pkl') data_path = Path(f'./out/{EXP_NAME}_data.pkl') do_train = True if model_path.exists(): print('Found existing model.') if OVERWRITE_EXISTING: print('Overwriting...') else: model = MDN_Model.load(model_path, device) do_train = False else: print('Existing model not found.') if do_train: do_generate = False dataset = EnvDataset(env) if data_path.exists(): print('Loading data...') dataset.load(data_path) if dataset.episodes != N_EPISODES or dataset.episode_length != EPISODE_LENGTH: print( 'Existing data is not compatible with the desired parameters.' ) do_generate = True else: do_generate = True if do_generate: print('Generating data...') dataset.generate(N_EPISODES, EPISODE_LENGTH, strategy=STRATEGY) dataset.save(data_path) episodes = dataset.data def epoch_callback(epoch, loss): print(epoch, loss) if epoch % 10 == 0: path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl') model.save(path) evaluate_mdn(path, ENV_ID, strategy=STRATEGY) if False: eps_with_changes = 0 for e in episodes: prev_pos = np.zeros(3) changed = -1 for s in e[0]: sphere_pos = s[3:6].copy() changed += int( not np.allclose(sphere_pos, prev_pos, atol=0.001)) prev_pos = sphere_pos eps_with_changes += int(changed > 0) print(eps_with_changes) print('Training...') losses = model.train(episodes, TRAINING_EPOCHS, batch_size=BATCH_SIZE, epoch_callback=epoch_callback, scale_data=True, shuffle_data=True) print('Saving model...') model.save(model_path) if VISUAL_TEST: print('Testing model...') controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random) for e in range(2000): env.reset() for s in range(100): env.render() tic = datetime.now() obs = get_observations(env) action = controller.get_action(obs) _, rewards, dones, info = env.step(action) toc = datetime.now() print((toc - tic).total_seconds())
def main(): if torch.cuda.is_available(): print("CUDA available, proceeding with GPU...") device = torch.device("cuda") else: print("No GPU found, proceeding with CPU...") device = torch.device("cpu") env = gym.make(ENV_ID) raw_env = env.unwrapped np_random = raw_env.np_random observation_space = get_observation_space(env) mlp1_n_inputs = Z_SIZE + env.action_space.low.size mlp1_n_outputs = Z_SIZE # mlp1_n_inputs = observation_space.low.size + env.action_space.low.size # mlp1_n_outputs = observation_space.low.size mlp2_n_inputs = Z_SIZE mlp2_n_outputs = observation_space.low.size mlp1 = MlpModel(mlp1_n_inputs, mlp1_n_outputs, hidden_units=(128, 128, 128, 64), np_random=np_random, device=device) mlp2 = SimpleMlpModel(mlp2_n_inputs, mlp2_n_outputs, hidden_units=(128, 128, 32), device=device) mlp1_model_path = Path(f'./out/{EXP_NAME}1_model.pkl') mlp2_model_path = Path(f'./out/{EXP_NAME}2_model.pkl') # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl') do_train = True if mlp1_model_path.exists(): print('Found existing model.') if OVERWRITE_EXISTING: print('Overwriting...') else: print(mlp1_model_path.as_posix()) print(mlp2_model_path.as_posix()) exit(0) else: print('Existing model not found.') if do_train: ########################################################## df = pd.read_pickle(DATA_PATH) n_episodes = df['episode'].max() + 1 episode_length = df['step'].max() + 1 all_z = np.load(Z_PATH)['arr_0'] episodes = [] for i in range(n_episodes): ep_df = df[df['episode'] == i] actions = np.array(ep_df['raw_action'].tolist()) raw_obs = np.array(ep_df['raw_obs'].tolist()) # s = raw_obs s = all_z[i] episodes.append((s, actions[1:])) mlp2_x = all_z.reshape(n_episodes * episode_length, -1) mlp2_y = np.array(list(df['raw_obs'])) ########################################################## def epoch_callback(epoch, loss): print(epoch, loss) print('Training...') losses = mlp1.train(episodes, epochs=TRAINING_EPOCHS1, batch_size=BATCH_SIZE, epoch_callback=epoch_callback, scale_data=True, shuffle_data=True) losses = mlp2.train(mlp2_x, mlp2_y, epochs=TRAINING_EPOCHS2, batch_size=BATCH_SIZE, epoch_callback=epoch_callback, scale_data=True, shuffle_data=True) print('Saving models...') mlp1.save(mlp1_model_path) mlp2.save(mlp2_model_path)
def main(): if torch.cuda.is_available(): print("CUDA available, proceeding with GPU...") device = torch.device("cuda") else: print("No GPU found, proceeding with CPU...") device = torch.device("cpu") env = gym.make(ENV_ID) raw_env = env.unwrapped np_random = raw_env.np_random observation_space = get_observation_space(env) n_inputs = 16 + env.action_space.low.size n_outputs = observation_space.low.size model = LSTM_Model(n_inputs, 32, n_outputs, n_layers=2, np_random=np_random, device=device, window_size=5) model_path = Path(f'./out/{EXP_NAME}_model.pkl') # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl') do_train = True if model_path.exists(): print('Found existing model.') if OVERWRITE_EXISTING: print('Overwriting...') else: model = LSTM_Model.load(model_path, device) do_train = False else: print('Existing model not found.') if do_train: ########################################################## df = pd.read_pickle(DATA_PATH) n_episodes = df['episode'].max() + 1 # episode_length = df['step'].max() + 1 episodes = [] targets, all_z = None, None if Z_TO_OBS: all_z = np.load(Z_PATH)['arr_0'] targets = [] for i in range(n_episodes): ep_df = df[df['episode'] == i] actions = np.array(ep_df['raw_action'].tolist()) raw_obs = np.array(ep_df['raw_obs'].tolist()) if Z_TO_OBS: targets.append(raw_obs[1:]) z = all_z[i] episodes.append((z, actions[1:])) else: episodes.append((raw_obs, actions[1:])) if targets is not None: targets = np.array(targets) ########################################################## def epoch_callback(epoch, loss): print(epoch, loss) if epoch % 10 == 0: path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl') model.save(path) # evaluate(LSTM_Model, path, ENV_ID) print('Training...') losses = model.train(episodes, targets=targets, epochs=TRAINING_EPOCHS, batch_size=BATCH_SIZE, epoch_callback=epoch_callback, scale_data=True, scale_targets=True) print('Saving model...') model.save(model_path) if VISUAL_TEST: print('Testing model...') controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random) for e in range(2000): env.reset() controller.forget_history() for s in range(100): env.render() tic = datetime.now() obs = get_observations(env) action = controller.get_action(obs) _, rewards, dones, info = env.step(action) toc = datetime.now() print((toc - tic).total_seconds())