예제 #1
0
def main(args):
    if args.cpu_only == True:
        cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
        tf.config.experimental.set_visible_devices(devices=cpu,
                                                   device_type='CPU')

    # random seed setting
    if args.random_seed <= 0:
        random_seed = np.random.randint(1, 9999)
    else:
        random_seed = args.random_seed

    tf.random.set_seed(random_seed)
    np.random.seed(random_seed)
    random.seed(random_seed)

    domain_name = args.env_name.split('/')[0]
    task_name = args.env_name.split('/')[1]
    env = dmc2gym.make(domain_name=domain_name,
                       task_name=task_name,
                       seed=random_seed,
                       visualize_reward=False,
                       from_pixels=True,
                       height=args.image_size,
                       width=args.image_size,
                       frame_skip=args.frame_skip
                       )  #Pre image size for curl, image size for dbc
    env = FrameStack(env, k=args.frame_stack)

    test_env = dmc2gym.make(domain_name=domain_name,
                            task_name=task_name,
                            seed=random_seed,
                            visualize_reward=False,
                            from_pixels=True,
                            height=args.image_size,
                            width=args.image_size,
                            frame_skip=args.frame_skip
                            )  #Pre image size for curl, image size for dbc
    test_env = FrameStack(test_env, k=args.frame_stack)

    state_dim = (3 * args.frame_stack, args.image_size, args.image_size)
    action_dim = env.action_space.shape[0]
    max_action = env.action_space.high[0]
    min_action = env.action_space.low[0]

    if args.algorithm == 'SACv1':
        algorithm = 0
    elif args.algorithm == 'SACv2':
        algorithm = SACv2_AE(state_dim, action_dim, args)

    print("Training of", env.unwrapped.spec.id)
    print("Algorithm:", algorithm.name)
    print("State dim:", state_dim)
    print("Action dim:", action_dim)
    print("Max action:", max_action)
    print("Min action:", min_action)

    trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action,
                            args)
    trainer.run()
예제 #2
0
def main(args):
    if args.cpu_only == True:
        cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
        tf.config.experimental.set_visible_devices(devices=cpu,
                                                   device_type='CPU')

    # random seed setting
    if args.random_seed <= 0:
        random_seed = np.random.randint(1, 9999)
    else:
        random_seed = args.random_seed

    tf.random.set_seed(random_seed)
    np.random.seed(random_seed)
    random.seed(random_seed)

    #env setting
    if args.domain_type == 'gym':
        #openai gym
        env = gym.make(args.env_name)
        env.seed(random_seed)
        env.action_space.seed(random_seed)

        test_env = gym.make(args.env_name)
        test_env.seed(random_seed)
        test_env.action_space.seed(random_seed)
    else:
        #deepmind control suite
        env = dmc2gym.make(domain_name=args.env_name.split('/')[0],
                           task_name=args.env_name.split('/')[1],
                           seed=random_seed)
        test_env = dmc2gym.make(domain_name=args.env_name.split('/')[0],
                                task_name=args.env_name.split('/')[1],
                                seed=random_seed)

    if args.discrete == True:
        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.n
        max_action = 1
        min_action = 1
    else:
        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        max_action = env.action_space.high[0]
        min_action = env.action_space.low[0]

    algorithm = TRPO(state_dim, action_dim, args)

    print("Training of", env.unwrapped.spec.id)
    print("Algorithm:", algorithm.name)
    print("State dim:", state_dim)
    print("Action dim:", action_dim)
    print("Max action:", max_action)
    print("Min action:", min_action)
    print("Discrete: ", args.discrete)

    trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action,
                            args)
    trainer.run()
예제 #3
0
def main(args):
    if args.cpu_only == True:
        cpu = tf.config.experimental.list_physical_devices(device_type='CPU')
        tf.config.experimental.set_visible_devices(devices=cpu, device_type='CPU')

    # random seed setting
    if args.random_seed <= 0:
        random_seed = np.random.randint(1, 9999)
    else:
        random_seed = args.random_seed

    tf.random.set_seed(random_seed)
    np.random.seed(random_seed)
    random.seed(random_seed)

    #env setting
    if args.domain_type == 'gym':
        #openai gym
        env = gym.make(args.env_name)
        env.seed(random_seed)
        env.action_space.seed(random_seed)

        test_env = gym.make(args.env_name)
        test_env.seed(random_seed)
        test_env.action_space.seed(random_seed)

    elif args.domain_type == 'dmc':
        #deepmind control suite
        env = dmc2gym.make(domain_name=args.env_name.split('/')[0], task_name=args.env_name.split('/')[1], seed=random_seed)
        test_env = dmc2gym.make(domain_name=args.env_name.split('/')[0], task_name=args.env_name.split('/')[1], seed=random_seed)

    elif args.domain_type == 'atari':
        #openai gym
        env = gym.make(args.env_name)
        env = AtariPreprocessing(env, frame_skip=args.frame_skip, screen_size=args.image_size, grayscale_newaxis=True)
        env = FrameStack(env, args.frame_stack)
        env._max_episode_steps = 10000
        env.seed(random_seed)
        env.action_space.seed(random_seed)

        test_env = gym.make(args.env_name)
        test_env = AtariPreprocessing(test_env, frame_skip=args.frame_skip, screen_size=args.image_size, grayscale_newaxis=True)
        test_env._max_episode_steps = 10000
        test_env = FrameStack(test_env, args.frame_stack)
        test_env.seed(random_seed)
        test_env.action_space.seed(random_seed)


    state_dim = env.observation_space.shape[0]

    if args.domain_type == 'atari':
        state_dim = env.observation_space.shape

    action_dim = env.action_space.n
    max_action = 1
    min_action = 1


    if args.domain_type is 'gym':
        algorithm = DQN(state_dim, action_dim, args)
    elif args.domain_type is 'dmc':
        algorithm = DQN(state_dim, action_dim, args)
    elif args.domain_type == 'atari':
        algorithm = ImageDQN(state_dim, action_dim, args)

    print("Training of", env.unwrapped.spec.id)
    print("Algorithm:", algorithm.name)
    print("State dim:", state_dim)
    print("Action dim:", action_dim)

    trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action, args)
    trainer.run()