Beispiel #1
0
def main(flags: DictConfig):
    flags = init_flags(flags)
    env = create_env(flags)
    flags.observation_shape = env.observation_space.shape

    if flags.model == "Vanilla":
        from algos.vanilla.learn import learn, create_buffers, optimizer
        flags.action_shape = env.action_space.n
        if flags.ObsType == "State":
            from algos.vanilla.model import FCNet
            Net = FCNet
        elif flags.ObsType == "Image" or flags.ObsType == "Atari":
            from algos.vanilla.model import AtariNet
            Net = AtariNet

    elif flags.model == "SAC":
        if flags.ActionType == "Continuous":
            from algos.sac.learn import learn, create_buffers, optimizer
            from algos.sac.model import SACNet
            flags.action_shape = env.action_space.shape[0]
            Net = SACNet
        elif flags.ActionType == "Discrete":
            from algos.sac_discrete.learn import learn, create_buffers, optimizer
            from algos.sac_discrete.model import SACNet
            flags.action_shape = env.action_space.n
            Net = SACNet

    elif flags.model == "SLAC":
        if flags.ActionType == "Continuous":
            from algos.slac.learn import learn, create_buffers, optimizer
            from algos.slac.model import SACNet
            flags.action_shape = env.action_space.shape[0]
            Net = SACNet
        elif flags.ActionType == "Discrete":
            from algos.slac_discrete.learn import learn, create_buffers, optimizer
            from algos.slac_discrete.model import SACNet
            flags.action_shape = env.action_space.n
            flags.observation_shape = env.observation_space.shape
            Net = SACNet

    elif flags.model == "Random":
        if flags.ActionType == "Continuous":
            print("TOOD")

        elif flags.ActionType == "Discrete":
            from algos.random_agant.model import RandomAgent_discrete
            from algos.random_agant.learn import learn, create_buffers, optimizer

            flags.action_shape = env.action_space.n
            flags.observation_shape = env.observation_space.shape
            Net = RandomAgent_discrete

    env.close()

    if flags.mode == "train":
        train(flags, Net, learn, create_buffers, optimizer)
    else:
        test(flags, Net)