def _get_agent(self, action_spec): config = ConfigDict() config.model = ConfigDict() config.model.class_path = "liaison.agents.models.mlp" config.model.hidden_layer_sizes = [64, 64] config.lr_init = 1e-3 config.lr_min = 1e-4 config.lr_start_dec_step = 1000 config.lr_dec_steps = 1000 config.lr_dec_val = .1 config.lr_dec_approach = 'linear' config.ent_dec_init = 1 config.ent_dec_min = 0 config.ent_dec_steps = 1000 config.ent_start_dec_step = 1000 config.ent_dec_val = .1 config.ent_dec_approach = 'linear' config.grad_clip = 1.0 config.discount_factor = 0.99 config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 with tf.variable_scope('mlp', reuse=tf.AUTO_REUSE): return MLPAgent(action_spec=action_spec, name='test', seed=42, **config)
def get_config(): config = ConfigDict() config.model = ConfigDict() config.lr_init = 1e-4 config.lr_min = 1e-7 config.lr_start_dec_step = 1000 config.lr_dec_steps = 1000 config.lr_dec_val = .1 config.lr_dec_approach = 'linear' config.ent_dec_init = 1e-2 config.ent_dec_min = 0.0 config.ent_dec_steps = 1000 config.ent_start_dec_step = 1000 # dec_val not used for linear scheme config.ent_dec_val = .1 config.ent_dec_approach = 'linear' # specify <= 0 here to disable grad clip config.grad_clip = 1.0 config.discount_factor = 1.0 config.optimizer = ConfigDict() # Options: Adam or RMSProp. config.optimizer.name = 'Adam' # hyperparams for RMSProp config.optimizer.decay = .9 config.optimizer.momentum = 0.0 config.optimizer.epsilon = 1e-7 return config
def _get_agent_instance(self): action_spec = BoundedArraySpec((10, 20), np.int32, 0, N_NODES - 1, name='test_spec') config = ConfigDict() config.model = self._get_model_config() config.lr_init = 1e-3 config.lr_min = 1e-4 config.lr_start_dec_step = 1000 config.lr_dec_steps = 1000 config.lr_dec_val = .1 config.lr_dec_approach = 'linear' config.ent_dec_init = 1 config.ent_dec_min = 0 config.ent_dec_steps = 1000 config.ent_start_dec_step = 1000 config.ent_dec_val = .1 config.ent_dec_approach = 'linear' config.grad_clip = 1.0 config.discount_factor = 0.99 config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 with tf.variable_scope('gcn_rins', reuse=tf.AUTO_REUSE): return MLPAgent(action_spec=action_spec, name='test', seed=42, **config)
def get_agent_config(): config = ConfigDict() config.lr_init = 1e-4 config.lr_min = 1e-7 config.lr_start_dec_step = 1000 config.lr_dec_steps = 1000 config.lr_dec_val = .1 config.lr_dec_approach = 'linear' config.ent_dec_init = 1e-2 config.ent_dec_min = 0.0 config.ent_dec_steps = 1000 config.ent_start_dec_step = 1000 # dec_val not used for linear scheme config.ent_dec_val = .1 config.ent_dec_approach = 'linear' # specify <= 0 here to disable grad clip config.grad_clip = 1.0 config.discount_factor = 1.0 config.optimizer = ConfigDict() # Options: Adam or RMSProp. config.optimizer.name = 'Adam' # hyperparams for RMSProp config.optimizer.decay = .9 config.optimizer.momentum = 0.0 config.optimizer.epsilon = 1e-7 # required fields. config.class_path = "liaison.agents.gcn_multi_actions" config.class_name = "Agent" config.model = ConfigDict() config.model.class_path = 'liaison.agents.models.transformer_auto_regressive' config.model.num_blocks = 4 config.model.d_ff = 32 config.model.num_heads = 4 config.model.d_model = 64 config.model.dropout_rate = 0. config.model.use_mlp_value_func = False # The following code duplicated in gcn_rins.py as well. # Propagate any changes made as needed. config.model.model_kwargs = ConfigDict() config.model.model_kwargs.class_path = "liaison.agents.models.bipartite_gcn_rins" config.model.model_kwargs.n_prop_layers = 4 config.model.model_kwargs.edge_embed_dim = 16 config.model.model_kwargs.node_embed_dim = 32 config.model.model_kwargs.global_embed_dim = 32 config.model.model_kwargs.node_hidden_layer_sizes = [16, 16] config.model.model_kwargs.edge_hidden_layer_sizes = [16, 16] config.model.model_kwargs.policy_torso_hidden_layer_sizes = [16, 16] config.model.model_kwargs.value_torso_hidden_layer_sizes = [16, 16] config.model.model_kwargs.policy_summarize_hidden_layer_sizes = [16] config.model.model_kwargs.value_summarize_hidden_layer_sizes = [16] config.model.model_kwargs.supervised_prediction_torso_hidden_layer_sizes = [ 16, 16 ] config.model.model_kwargs.sum_aggregation = False config.model.model_kwargs.use_layer_norm = True config.model.model_kwargs.apply_gradient_to_graphnet_every = 1 config.model.model_kwargs.memory_hack = False config.clip_rho_threshold = 1.0 config.clip_pg_rho_threshold = 1.0 config.loss = ConfigDict() config.loss.vf_loss_coeff = 1.0 config.loss.al_coeff = ConfigDict() config.loss.al_coeff.init_val = 0. config.loss.al_coeff.min_val = 0. config.loss.al_coeff.start_decay_step = int(1e10) config.loss.al_coeff.decay_steps = 5000 # dec_val not used for linear scheme config.loss.al_coeff.dec_val = .1 config.loss.al_coeff.dec_approach = 'linear' # applicable for agent 'liaison.agents.gcn_large_batch' config.apply_grads_every = 1 config.log_features_every = 0 config.freeze_graphnet_weights_step = 50 + 10 return config