def main(args): dataset, env = get_pybullet(args.dataset) d3rlpy.seed(args.seed) train_episodes, test_episodes = train_test_split(dataset, test_size=0.2) device = None if args.gpu is None else Device(args.gpu) encoder_factory = VectorEncoderFactory(hidden_units=[256, 256, 256, 256]) awac = AWAC(actor_encoder_factory=encoder_factory, critic_encoder_factory=encoder_factory, q_func_factory=args.q_func, use_gpu=device) awac.fit(train_episodes, eval_episodes=test_episodes, n_epochs=1000, scorers={ 'environment': evaluate_on_environment(env), 'td_error': td_error_scorer, 'discounted_advantage': discounted_sum_of_advantage_scorer, 'value_scale': average_value_estimation_scorer, 'value_std': value_estimation_std_scorer, 'action_diff': continuous_action_diff_scorer })
def test_device(mock): device = Device() copy_device = copy.deepcopy(device) assert device.get_id() == 0 assert copy_device.get_id() == 0 with parallel(): inc_device = copy.deepcopy(device) assert device.get_id() == 1 assert inc_device.get_id() == 1 # check circulation inc2_device = copy.deepcopy(device) assert device.get_id() == 0 assert inc2_device.get_id() == 0
def main(args): dataset, env = get_pybullet(args.dataset) d3rlpy.seed(args.seed) train_episodes, test_episodes = train_test_split(dataset, test_size=0.2) device = None if args.gpu is None else Device(args.gpu) dynamics = ProbabilisticEnsembleDynamics(use_gpu=device) dynamics.fit(train_episodes, eval_episodes=test_episodes, n_steps=100000, scorers={ "obs_error": dynamics_observation_prediction_error_scorer, "reward_error": dynamics_reward_prediction_error_scorer, }) combo = COMBO(q_func_factory=args.q_func, dynamics=dynamics, use_gpu=device) combo.fit(train_episodes, eval_episodes=test_episodes, n_steps=1000000, scorers={ 'environment': evaluate_on_environment(env), 'td_error': td_error_scorer, 'discounted_advantage': discounted_sum_of_advantage_scorer, 'value_scale': average_value_estimation_scorer, 'value_std': value_estimation_std_scorer, 'action_diff': continuous_action_diff_scorer })
def main(args): dataset, env = get_atari(args.dataset) d3rlpy.seed(args.seed) train_episodes, test_episodes = train_test_split(dataset, test_size=0.2) device = None if args.gpu is None else Device(args.gpu) bc = DiscreteBC(n_epochs=100, scaler='pixel', use_batch_norm=False, use_gpu=device) bc.fit(train_episodes, eval_episodes=test_episodes, scorers={'environment': evaluate_on_environment(env, epsilon=0.05)})
def main(args): dataset, env = get_pybullet(args.dataset) d3rlpy.seed(args.seed) train_episodes, test_episodes = train_test_split(dataset, test_size=0.2) device = None if args.gpu is None else Device(args.gpu) bc = BC(n_epochs=100, use_gpu=device) bc.fit(train_episodes, eval_episodes=test_episodes, scorers={ 'environment': evaluate_on_environment(env), 'action_diff': continuous_action_diff_scorer })
def main(args): dataset, env = get_pybullet(args.dataset) d3rlpy.seed(args.seed) train_episodes, test_episodes = train_test_split(dataset, test_size=0.2) device = None if args.gpu is None else Device(args.gpu) awr = AWR(n_epochs=100, use_gpu=device) awr.fit(train_episodes, eval_episodes=test_episodes, scorers={ 'environment': evaluate_on_environment(env), 'td_error': td_error_scorer, 'value_scale': average_value_estimation_scorer, 'action_diff': continuous_action_diff_scorer })
def main(args): dataset, env = get_pybullet(args.dataset) d3rlpy.seed(args.seed) train_episodes, test_episodes = train_test_split(dataset, test_size=0.2) device = None if args.gpu is None else Device(args.gpu) sac = SAC(n_epochs=100, q_func_type=args.q_func_type, use_gpu=device) sac.fit(train_episodes, eval_episodes=test_episodes, scorers={ 'environment': evaluate_on_environment(env), 'td_error': td_error_scorer, 'discounted_advantage': discounted_sum_of_advantage_scorer, 'value_scale': average_value_estimation_scorer, 'value_std': value_estimation_std_scorer, 'action_diff': continuous_action_diff_scorer })
def main(args): dataset, env = get_atari(args.dataset) d3rlpy.seed(args.seed) train_episodes, test_episodes = train_test_split(dataset, test_size=0.2) device = None if args.gpu is None else Device(args.gpu) dqn = DQN(n_epochs=100, q_func_type=args.q_func_type, scaler='pixel', use_batch_norm=False, use_gpu=device) dqn.fit(train_episodes, eval_episodes=test_episodes, scorers={ 'environment': evaluate_on_environment(env, epsilon=0.05), 'td_error': td_error_scorer, 'discounted_advantage': discounted_sum_of_advantage_scorer, 'value_scale': average_value_estimation_scorer })
@pytest.mark.parametrize('value', ['min_max', MinMaxScaler(), None]) def test_check_scaler(value): scaler = check_scaler(value) if value is None: assert scaler is None else: assert isinstance(scaler, MinMaxScaler) @pytest.mark.parametrize('value', [['random_shift'], [RandomShift()], None]) def test_check_augmentation(value): pipeline = check_augmentation(value) assert isinstance(pipeline, AugmentationPipeline) if value is None: assert len(pipeline.augmentations) == 0 else: assert isinstance(pipeline.augmentations[0], RandomShift) @pytest.mark.parametrize('value', [False, True, 0, Device(0)]) def test_check_use_gpu(value): device = check_use_gpu(value) if type(value) == bool and value: assert device.get_id() == 0 elif type(value) == bool and not value: assert device is None elif type(value) == int: assert device.get_id() == 0 elif isinstance(value, Device): assert device.get_id() == 0
def to_gpu(self, device=Device()): self.device = 'cuda:%d' % device.get_id() to_cuda(self, self.device)