def make_configs(self): configurator = Configurator('grid') configurator.fixed('log.dir', 'some path') configurator.grid('network.lr', [0.1, 0.01, 0.05]) configurator.grid('network.layers', [16, 32]) configurator.grid('env.id', ['CartPole-v1', 'Ant-v2', 'HalfCheetah-v2']) configs = configurator.make_configs() return configs
def make_configs(self): configurator = Configurator('grid') configurator.fixed('cuda', True) configurator.grid('network.type', ['VAE', 'ConvVAE']) configurator.fixed('network.z_dim', 8) configurator.fixed('train.num_epoch', 100) configurator.fixed('train.batch_size', 128) configurator.fixed('eval.batch_size', 128) configurator.fixed('log.interval', 100) configurator.fixed('log.dir', 'logs') list_config = configurator.make_configs() return list_config
def make_configs(self): configurator = Configurator('grid') configurator.fixed('cuda', False) # ES for small net, do not use GPU configurator.fixed('env.id', 'Pendulum-v0') configurator.fixed('network.recurrent', True) configurator.fixed('network.hidden_size', [32]) configurator.fixed('es.algo', 'CMAES') configurator.fixed('es.popsize', 16) configurator.fixed('es.mu0', 0.0) configurator.fixed('es.std0', 0.5) """Hyperparameter search, later time configurator.grid('env.id', ['Pendulum-v0', 'Reacher-v2', 'InvertedPendulum-v2', 'HumanoidStandup-v2']) configurator.grid('network.hidden_size', [[32], [32, 32], [64, 64]]) configurator.grid('es.popsize', [8, 16, 32, 64]) configurator.grid('es.mu0', [0.0, 0.3, 0.5]) configurator.grid('es.std0', [0.1, 0.5, 1.0]) """ configurator.fixed('train.num_iteration', 3000) configurator.fixed('train.N', 5) configurator.fixed('train.T', 300) configurator.fixed('log.interval', 100) configurator.fixed('log.dir', 'logs') list_config = configurator.make_configs() return list_config
def make_configs(self): configurator = Configurator('grid') configurator.fixed('cuda', True) # whether to use GPU configurator.fixed('env.id', 'HalfCheetah-v2') configurator.fixed('env.standardize', True) # whether to use VecStandardize configurator.fixed('env.time_aware_obs', False) # whether to append time step to observation configurator.fixed('network.recurrent', False) configurator.fixed('network.hidden_sizes', [64, 64]) # TODO: [64, 64] configurator.fixed( 'network.independent_V', False) # share or not for params of policy and value network configurator.fixed('algo.lr', 3e-4) configurator.fixed('algo.lr_V', 1e-3) configurator.fixed('algo.use_lr_scheduler', True) configurator.fixed('algo.gamma', 0.99) configurator.fixed('algo.gae_lambda', 0.97) configurator.fixed('agent.standardize_Q', False) # whether to standardize discounted returns configurator.fixed('agent.standardize_adv', True) # whether to standardize advantage estimates configurator.fixed('agent.max_grad_norm', 0.5) # grad clipping, set None to turn off configurator.fixed('agent.entropy_coef', 0.0) configurator.fixed('agent.value_coef', 0.5) configurator.fixed('agent.fit_terminal_value', True) configurator.fixed('agent.terminal_value_coef', 0.1) configurator.fixed('agent.clip_range', 0.2) # PPO epsilon of ratio clipping configurator.fixed( 'agent.target_kl', 0.015 ) # appropriate KL between new and old policies after an update, for early stopping (Usually small, e.g. 0.01, 0.05) # only for continuous control configurator.fixed( 'env.clip_action', True) # clip sampled action within valid bound before step() configurator.fixed( 'agent.min_std', 1e-6) # min threshould for std, avoid numerical instability configurator.fixed('agent.std_style', 'exp') # std parameterization, 'exp' or 'softplus' configurator.fixed('agent.constant_std', None) # constant std, set None to learn it configurator.fixed('agent.std_state_dependent', False) # whether to learn std with state dependency configurator.fixed('agent.init_std', 0.5) # initial std for state-independent std configurator.fixed('train.timestep', 1e6) # either 'train.iter' or 'train.timestep' configurator.fixed('train.N', 2) # number of trajectories per training iteration configurator.fixed('train.ratio_T', 1.0) # percentage of max allowed horizon configurator.fixed('eval.independent', False) configurator.fixed( 'eval.N', 10 ) # number of episodes to evaluate, do not specify T for complete episode configurator.fixed('train.batch_size', 256) configurator.fixed('train.num_epochs', 80) configurator.fixed('log.interval', 10) # logging interval configurator.fixed('log.dir', 'logs/default') # logging directory list_config = configurator.make_configs() return list_config
def make_configs(self): configurator = Configurator('grid') configurator.fixed('cuda', False) # whether to use GPU configurator.fixed('env.id', 'HalfCheetah-v2') configurator.fixed('env.standardize', True) # whether to use VecStandardize configurator.fixed('network.recurrent', True) configurator.fixed('network.hidden_sizes', [32]) # TODO: [64, 64] configurator.fixed('algo.lr', 1e-3) configurator.fixed('algo.use_lr_scheduler', False) configurator.fixed('algo.gamma', 0.99) configurator.fixed('agent.standardize_Q', False) # whether to standardize discounted returns configurator.fixed('agent.standardize_adv', False) # whether to standardize advantage estimates configurator.fixed('agent.max_grad_norm', 0.5) # grad clipping, set None to turn off configurator.fixed('agent.entropy_coef', 0.01) configurator.fixed('agent.value_coef', 0.5) # only for continuous control configurator.fixed( 'agent.min_std', 1e-6) # min threshould for std, avoid numerical instability configurator.fixed('agent.std_style', 'exp') # std parameterization, 'exp' or 'softplus' configurator.fixed('agent.constant_std', None) # constant std, set None to learn it configurator.fixed('agent.std_state_dependent', False) # whether to learn std with state dependency configurator.fixed('agent.init_std', 0.5) # initial std for state-independent std configurator.fixed('train.timestep', 1e7) # either 'train.iter' or 'train.timestep' configurator.fixed('train.N', 10) # number of segments per training iteration configurator.fixed('train.T', 5) # fixed-length segment rolling configurator.fixed( 'eval.N', 100 ) # number of episodes to evaluate, do not specify T for complete episode configurator.fixed('log.record_interval', 100) # interval to record the logging configurator.fixed('log.print_interval', 500) # interval to print the logging to screen configurator.fixed('log.dir', 'logs') # logging directory list_config = configurator.make_configs() return list_config
def make_configs(self): configurator = Configurator('grid') configurator.fixed('cuda', False) configurator.fixed('algo.lr', 7e-4) configurator.fixed('algo.lr_V', 1e-3) configurator.fixed('algo.gamma', 0.99) configurator.fixed('algo.gae_lambda', 0.97) configurator.fixed('env.count', 20) configurator.fixed('agent.count', 20) configurator.fixed('agent.standardize_Q', False) configurator.fixed('agent.standardize_adv', False) configurator.fixed('agent.max_grad_norm', 0.5) configurator.fixed('agent.entropy_coef', 0.01) configurator.fixed('agent.value_coef', 0.5) configurator.fixed('agent.fit_terminal_value', False) configurator.fixed('agent.terminal_value_coef', 0.1) configurator.fixed('train.iter', 10000) configurator.fixed('log.interval', 10) configurator.fixed('log.dir', 'logs-3') return configurator.make_configs()
def test_configurator(): # Construction invalidity check with pytest.raises(AssertionError): Configurator(search_mode='n') with pytest.raises(AssertionError): Configurator(search_mode='random', num_sample=None) # Create a configurator # Grid search configurator = Configurator(search_mode='grid') assert len(configurator.items) == 0 with pytest.raises(AssertionError): configurator.fixed('seeds', [1, 2, 3]) configurator.fixed('log.dir', 'some path') assert len(configurator.items) == 1 assert isinstance(configurator.items['log.dir'], list) assert configurator.items['log.dir'][0] == 'some path' with pytest.raises(AssertionError): configurator.fixed('log.dir', 'second') with pytest.raises(AssertionError): configurator.grid('log.T', 'must be list, not string') configurator.grid('network.lr', [1e-2, 5e-3, 1e-4, 5e-4]) configurator.grid('network.layers', [1, 2, 3]) configurator.grid('env.id', ['CartPole-v1', 'Ant-v2']) configs = configurator.make_configs() assert len(configs) == 24 # order-preserving check assert all([list(c.keys()) == ['ID', 'log.dir', 'network.lr', 'network.layers', 'env.id'] for c in configs]) assert all([c['log.dir'] == 'some path' for c in configs]) assert all([c['network.lr'] in [1e-2, 5e-3, 1e-4, 5e-4] for c in configs]) assert all([c['network.layers'] in [1, 2, 3] for c in configs]) assert all([c['env.id'] in ['CartPole-v1', 'Ant-v2'] for c in configs]) # Grid search does not allow methods for random search with pytest.raises(AssertionError): configurator.categorical('one', [1, 2]) with pytest.raises(AssertionError): configurator.uniform('two', 1, 3) with pytest.raises(AssertionError): configurator.discrete_uniform('three', 5, 10) with pytest.raises(AssertionError): configurator.log_uniform('four', 0.0001, 0.1) Configurator.print_config(configs[20]) config_dataframe = Configurator.to_dataframe(configs) config_dataframe = Configurator.dataframe_subset(config_dataframe, 'network.lr', [0.01, 0.005]) config_dataframe = Configurator.dataframe_groupview(config_dataframe, ['env.id', 'network.lr']) del configurator del configs del config_dataframe # Random search configurator = Configurator('random', num_sample=20) assert len(configurator.items) == 0 with pytest.raises(AssertionError): configurator.fixed('seeds', [1, 2, 3]) with pytest.raises(AssertionError): configurator.categorical('seeds', [1, 2]) with pytest.raises(AssertionError): configurator.uniform('seeds', 1, 3) with pytest.raises(AssertionError): configurator.discrete_uniform('seeds', 5, 10) with pytest.raises(AssertionError): configurator.log_uniform('seeds', 0.0001, 0.1) with pytest.raises(AssertionError): configurator.grid('network.layers', [1, 2, 3]) configurator.fixed('log.dir', 'some path') assert len(configurator.items) == 1 assert isinstance(configurator.items['log.dir'], typing.Generator) assert next(configurator.items['log.dir']) == 'some path' with pytest.raises(AssertionError): configurator.fixed('log.dir', 'second') configurator.categorical('network.layers', [1, 2, 3]) with pytest.raises(AssertionError): configurator.categorical('network.layers2', 12) # must be list assert isinstance(configurator.items['network.layers'], typing.Generator) configurator.uniform('entropy_coef', 0.1, 2.0) configurator.discrete_uniform('train.N', 1, 100) configurator.log_uniform('network.lr', 1e-7, 1e-1) assert len(configurator.items) == 5 configs = configurator.make_configs() assert len(configs[0]) == 1+5 assert len(configs) == 20 # order-preserving check l = ['ID', 'log.dir', 'network.layers', 'entropy_coef', 'train.N', 'network.lr'] assert all([list(c.keys()) == l for c in configs]) assert all([c['log.dir'] == 'some path' for c in configs]) assert all([c['network.layers'] in [1, 2, 3] for c in configs]) assert all([c['entropy_coef'] >= 0.1 and c['entropy_coef'] <= 2.0 for c in configs]) assert all([isinstance(c['train.N'], int) for c in configs]) assert all([c['train.N'] >= 1 and c['train.N'] <= 100 for c in configs]) assert all([isinstance(c['network.lr'], float) for c in configs]) assert all([c['network.lr'] >= 1e-7 and c['network.lr'] <= 1e-1 for c in configs]) Configurator.print_config(configs[11]) config_dataframe = Configurator.to_dataframe(configs) config_dataframe = Configurator.dataframe_subset(config_dataframe, 'network.layers', [1, 2]) config_dataframe = Configurator.dataframe_groupview(config_dataframe, ['network.layers', 'log.dir']) config_dataframe