Esempio n. 1
0
log_dir = "./MultiEnv/Data"

# generate TrainENV file
# TrainEnvNum = 500
# env = TfEnv(GridBase(params))
# env._wrapped_env.generate_grid=True
# env._wrapped_env.generate_b0_start_goal=True

# for i in range(TrainEnvNum):
#     env.reset()
#     params = dict(
#         env=env,
#     )
#     joblib.dump(params,log_dir+'/TrainEnv'+'/env_'+str(i)+'.pkl')
#     plot_env(env,save=True,path=log_dir+'/TrainEnv'+'/Map_'+str(i)+'.pdf')

# generate TestENV file
TestEnvNum = 50
env = TfEnv(GridBase(params))
env._wrapped_env.generate_grid = True
env._wrapped_env.generate_b0_start_goal = True

for i in range(TestEnvNum):
    env.reset()
    params = dict(env=env, )
    joblib.dump(params, log_dir + '/TestEnv2' + '/env_' + str(i) + '.pkl')
    plot_env(env,
             save=True,
             path=log_dir + '/TestEnv2' + '/Map_' + str(i) + '.pdf')
Esempio n. 2
0
params['obs_len'] = len(params['observe_directions'])
params['num_state'] = params['grid_n'] * params['grid_m']
params['traj_limit'] = 4 * (params['grid_n'] * params['grid_m']
                            )  # 4 * (params['grid_n'] + params['grid_m'])
params['R_step'] = [params['R_step']] * params['num_action']
params['R_step'][params['stayaction']] = params['R_stay']

env_ref = joblib.load('./env.pkl')['env']
grid = env_ref._wrapped_env.grid
b0 = env_ref._wrapped_env.b0
start_state = env_ref._wrapped_env.start_state
goal_state = env_ref._wrapped_env.goal_state
env = TfEnv(
    GridBase(params,
             grid=grid,
             b0=b0,
             start_state=start_state,
             goal_state=goal_state))
env._wrapped_env.generate_grid = False
env._wrapped_env.generate_b0_start_goal = False
env.reset()

log_dir = "./Data/obs_1goal20step0stay_1_gru"

tabular_log_file = osp.join(log_dir, "progress.csv")
text_log_file = osp.join(log_dir, "debug.log")
params_log_file = osp.join(log_dir, "params.json")
pkl_file = osp.join(log_dir, "params.pkl")

logger.add_text_output(text_log_file)
logger.add_tabular_output(tabular_log_file)
Esempio n. 3
0
from rllab.misc.instrument import stub, run_experiment_lite
from qmdp_policy import QMDPPolicy
from sandbox.rocky.tf.policies.categorical_gru_policy import CategoricalGRUPolicy

import lasagne.nonlinearities as NL
from sandbox.rocky.tf.envs.base import TfEnv
from rllab.misc import logger
import os.path as osp
import tensorflow as tf
from sandbox.rocky.tf.samplers.batch_sampler import BatchSampler
import joblib
import dill
import numpy as np
#stub(globals())

env = TfEnv(GridBase())
env._wrapped_env.generate_grid = True
env._wrapped_env.generate_b0_start_goal = True
env.reset()
env._wrapped_env.generate_grid = False
env._wrapped_env.generate_b0_start_goal = False

env_img = env._wrapped_env.env_img
goal_img = env._wrapped_env.goal_img
b0_img = env._wrapped_env.b0_img
start_state = env._wrapped_env.start_state

params = dict(env=env, )
# file = open('env.pkl', 'wb')
# dill.dump(params, file)
# with open('env.pkl', 'rb') as file: