Example #1
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from utils.utils import TabularPolicy, TabularValueFun
    from part1.tabular_value_iteration import ValueIteration
    from envs import Grid1DEnv, GridWorldEnv
    envs = [GridWorldEnv(seed=0), GridWorldEnv(seed=1)]

    for env in envs:
        env_name = env.__name__
        exp_dir = os.getcwd() + '/data/part1/%s/policy_type%s_temperature%s/' % (env_name, args.policy_type, args.temperature)
        logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
        args_dict = vars(args)
        args_dict['env'] = env_name
        json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)

        policy = TabularPolicy(env)
        value_fun = TabularValueFun(env)
        algo = ValueIteration(env,
                              value_fun,
                              policy,
                              policy_type=args.policy_type,
                              render=render,
                              temperature=args.temperature)
        algo.train()
Example #2
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv
    from utils.utils import TabularPolicy, TabularValueFun
    from part1.tabular_value_iteration import ValueIteration
    from part2.look_ahead_policy import LookAheadPolicy
    from part2.discretize import Discretize
    envs = [
        DoubleIntegratorEnv(),
        MountainCarEnv(),
        CartPoleEnv(),
        SwingUpEnv()
    ]

    for env in envs:
        env_name = env.__class__.__name__

        if env_name == 'MountainCarEnv':
            state_discretization = 51
        else:
            state_discretization = 21
        exp_dir = os.getcwd(
        ) + '/data/part2_d/%s/policy_type%s_mode%s_horizon%s/' % (
            env_name, args.policy_type, args.mode, args.horizon)
        logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
        args_dict = vars(args)
        args_dict['env'] = env_name
        json.dump(vars(args),
                  open(exp_dir + '/params.json', 'w'),
                  indent=2,
                  sort_keys=True)

        env = Discretize(env,
                         state_discretization=state_discretization,
                         mode=args.mode)
        value_fun = TabularValueFun(env)
        if args.policy_type == 'tabular':
            policy = TabularPolicy(env)
        elif args.policy_type == 'look_ahead':
            policy = LookAheadPolicy(env, value_fun, args.horizon)
        else:
            raise NotImplementedError
        algo = ValueIteration(env,
                              value_fun,
                              policy,
                              render=render,
                              max_itr=args.max_iter,
                              num_rollouts=1,
                              render_itr=5,
                              log_itr=5)
        algo.train()
Example #3
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from utils.utils import TabularPolicy
    from utils.value_function import TabularValueFun
    from algos.tabular_value_iteration import ValueIteration
    from envs import ASRSEnv, TabularEnv, ProbDistEnv, DynamicProbEnv, StaticOrderProcess, SeasonalOrderProcess

    num_products = np.array(eval(args.storage_shape)).prod()
    assert (eval(args.dist_param) is None) or (num_products == len(
        eval(args.dist_param)
    )), 'storage_shape should be consistent with dist_param length'
    op = StaticOrderProcess(num_products=num_products,
                            dist_param=eval(args.dist_param))

    base_env = ASRSEnv(eval(args.storage_shape),
                       order_process=op,
                       origin_coord=eval(args.exit_coord))

    env = TabularEnv(base_env)

    env_name = env.__name__
    exp_dir = os.getcwd(
    ) + '/data/version1/%s/policy_type%s_temperature%s_envsize_%s/' % (
        env_name, args.policy_type, args.temperature,
        np.array(eval(args.storage_shape)).prod())
    logger.configure(dir=exp_dir,
                     format_strs=['stdout', 'log', 'csv'],
                     level=eval(args.logger_level))
    args_dict = vars(args)
    args_dict['env'] = env_name
    json.dump(vars(args),
              open(exp_dir + '/params.json', 'w'),
              indent=2,
              sort_keys=True)

    policy = TabularPolicy(env)
    value_fun = TabularValueFun(env)
    algo = ValueIteration(env,
                          value_fun,
                          policy,
                          policy_type=args.policy_type,
                          render=render,
                          temperature=args.temperature,
                          num_rollouts=args.num_rollouts)
    algo.train()
    value_fun.save(f'{exp_dir}/value_fun.npy')
Example #4
0
def main(args):
    render = args.render
    if not render:
        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
    from envs import CartPoleEnv, SwingUpEnv
    from utils.utils import TabularPolicy, TabularValueFun
    from part1.tabular_value_iteration import ValueIteration
    from part4.discretize import Discretize
    envs = [CartPoleEnv(), SwingUpEnv()]

    for env in envs:
        env_name = env.__class__.__name__
        exp_dir = os.getcwd(
        ) + '/data/part4/%s/mode%s_state_discretization%s/' % (
            env_name, args.mode, str(args.state_discretization))
        logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
        args_dict = vars(args)
        args_dict['env'] = env_name
        json.dump(vars(args),
                  open(exp_dir + '/params.json', 'w'),
                  indent=2,
                  sort_keys=True)

        env = Discretize(env,
                         state_discretization=args.state_discretization,
                         mode=args.mode)
        value_fun = TabularValueFun(env)
        policy = TabularPolicy(env)
        algo = ValueIteration(env,
                              value_fun,
                              policy,
                              render=render,
                              max_itr=args.max_iter,
                              num_rollouts=1,
                              render_itr=5,
                              log_itr=5)
        algo.train()