def main(args): if args.cpu_only == True: cpu = tf.config.experimental.list_physical_devices(device_type='CPU') tf.config.experimental.set_visible_devices(devices=cpu, device_type='CPU') # random seed setting if args.random_seed <= 0: random_seed = np.random.randint(1, 9999) else: random_seed = args.random_seed tf.random.set_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) domain_name = args.env_name.split('/')[0] task_name = args.env_name.split('/')[1] env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=random_seed, visualize_reward=False, from_pixels=True, height=args.image_size, width=args.image_size, frame_skip=args.frame_skip ) #Pre image size for curl, image size for dbc env = FrameStack(env, k=args.frame_stack) test_env = dmc2gym.make(domain_name=domain_name, task_name=task_name, seed=random_seed, visualize_reward=False, from_pixels=True, height=args.image_size, width=args.image_size, frame_skip=args.frame_skip ) #Pre image size for curl, image size for dbc test_env = FrameStack(test_env, k=args.frame_stack) state_dim = (3 * args.frame_stack, args.image_size, args.image_size) action_dim = env.action_space.shape[0] max_action = env.action_space.high[0] min_action = env.action_space.low[0] if args.algorithm == 'SACv1': algorithm = 0 elif args.algorithm == 'SACv2': algorithm = SACv2_AE(state_dim, action_dim, args) print("Training of", env.unwrapped.spec.id) print("Algorithm:", algorithm.name) print("State dim:", state_dim) print("Action dim:", action_dim) print("Max action:", max_action) print("Min action:", min_action) trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action, args) trainer.run()
def main(args): if args.cpu_only == True: cpu = tf.config.experimental.list_physical_devices(device_type='CPU') tf.config.experimental.set_visible_devices(devices=cpu, device_type='CPU') # random seed setting if args.random_seed <= 0: random_seed = np.random.randint(1, 9999) else: random_seed = args.random_seed tf.random.set_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) #env setting if args.domain_type == 'gym': #openai gym env = gym.make(args.env_name) env.seed(random_seed) env.action_space.seed(random_seed) test_env = gym.make(args.env_name) test_env.seed(random_seed) test_env.action_space.seed(random_seed) else: #deepmind control suite env = dmc2gym.make(domain_name=args.env_name.split('/')[0], task_name=args.env_name.split('/')[1], seed=random_seed) test_env = dmc2gym.make(domain_name=args.env_name.split('/')[0], task_name=args.env_name.split('/')[1], seed=random_seed) if args.discrete == True: state_dim = env.observation_space.shape[0] action_dim = env.action_space.n max_action = 1 min_action = 1 else: state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = env.action_space.high[0] min_action = env.action_space.low[0] algorithm = TRPO(state_dim, action_dim, args) print("Training of", env.unwrapped.spec.id) print("Algorithm:", algorithm.name) print("State dim:", state_dim) print("Action dim:", action_dim) print("Max action:", max_action) print("Min action:", min_action) print("Discrete: ", args.discrete) trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action, args) trainer.run()
def main(args): if args.cpu_only == True: cpu = tf.config.experimental.list_physical_devices(device_type='CPU') tf.config.experimental.set_visible_devices(devices=cpu, device_type='CPU') # random seed setting if args.random_seed <= 0: random_seed = np.random.randint(1, 9999) else: random_seed = args.random_seed tf.random.set_seed(random_seed) np.random.seed(random_seed) random.seed(random_seed) #env setting if args.domain_type == 'gym': #openai gym env = gym.make(args.env_name) env.seed(random_seed) env.action_space.seed(random_seed) test_env = gym.make(args.env_name) test_env.seed(random_seed) test_env.action_space.seed(random_seed) elif args.domain_type == 'dmc': #deepmind control suite env = dmc2gym.make(domain_name=args.env_name.split('/')[0], task_name=args.env_name.split('/')[1], seed=random_seed) test_env = dmc2gym.make(domain_name=args.env_name.split('/')[0], task_name=args.env_name.split('/')[1], seed=random_seed) elif args.domain_type == 'atari': #openai gym env = gym.make(args.env_name) env = AtariPreprocessing(env, frame_skip=args.frame_skip, screen_size=args.image_size, grayscale_newaxis=True) env = FrameStack(env, args.frame_stack) env._max_episode_steps = 10000 env.seed(random_seed) env.action_space.seed(random_seed) test_env = gym.make(args.env_name) test_env = AtariPreprocessing(test_env, frame_skip=args.frame_skip, screen_size=args.image_size, grayscale_newaxis=True) test_env._max_episode_steps = 10000 test_env = FrameStack(test_env, args.frame_stack) test_env.seed(random_seed) test_env.action_space.seed(random_seed) state_dim = env.observation_space.shape[0] if args.domain_type == 'atari': state_dim = env.observation_space.shape action_dim = env.action_space.n max_action = 1 min_action = 1 if args.domain_type is 'gym': algorithm = DQN(state_dim, action_dim, args) elif args.domain_type is 'dmc': algorithm = DQN(state_dim, action_dim, args) elif args.domain_type == 'atari': algorithm = ImageDQN(state_dim, action_dim, args) print("Training of", env.unwrapped.spec.id) print("Algorithm:", algorithm.name) print("State dim:", state_dim) print("Action dim:", action_dim) trainer = Basic_trainer(env, test_env, algorithm, max_action, min_action, args) trainer.run()