def make_config(): return config_lib.Config( data=config_lib.OneOf( [config_lib.Config(task=1, a='hello'), config_lib.Config( task=2, a=config_lib.OneOf( [config_lib.Config(x=1, y=2), config_lib.Config(x=-1, y=1000, z=4)], x=1)), config_lib.Config(task=3, c=1234)], task=2), model=config_lib.Config(stuff=1))
def default_config(): return config_lib.Config( agent=config_lib.OneOf( [ config_lib.Config( algorithm='pg', policy_lstm_sizes=[35, 35], # Set value_lstm_sizes to None to share weights with policy. value_lstm_sizes=[35, 35], obs_embedding_size=10, grad_clip_threshold=10.0, param_init_factor=1.0, lr=5e-5, pi_loss_hparam=1.0, vf_loss_hparam=0.5, entropy_beta=1e-2, regularizer=0.0, softmax_tr=1.0, # Reciprocal temperature. optimizer='rmsprop', # 'adam', 'sgd', 'rmsprop' topk=0, # Top-k unique codes will be stored. topk_loss_hparam=0.0, # off policy loss multiplier. # Uniformly sample this many episodes from topk buffer per batch. # If topk is 0, this has no effect. topk_batch_size=1, # Exponential moving average baseline for REINFORCE. # If zero, A2C is used. # If non-zero, should be close to 1, like .99, .999, etc. ema_baseline_decay=0.99, # Whether agent can emit EOS token. If true, agent can emit EOS # token which ends the episode early (ends the sequence). # If false, agent must emit tokens until the timestep limit is # reached. e.g. True means variable length code, False means fixed # length code. # WARNING: Making this false slows things down. eos_token=False, replay_temperature=1.0, # Replay probability. 1 = always replay, 0 = always on policy. alpha=0.0, # Whether to normalize importance weights in each minibatch. iw_normalize=True), config_lib.Config( algorithm='ga', crossover_rate=0.99, mutation_rate=0.086), config_lib.Config(algorithm='rand') ], algorithm='pg', ), env=config_lib.Config( # If True, task-specific settings are not needed. task='', # 'print', 'echo', 'reverse', 'remove', ... task_cycle=[], # If non-empty, reptitions will cycle through tasks. task_kwargs='{}', # Python dict literal. task_manager_config=config_lib.Config( # Reward recieved per test case. These bonuses will be scaled # based on how many test cases there are. correct_bonus=2.0, # Bonus for code getting correct answer. code_length_bonus=1.0), # Maximum bonus for short code. correct_syntax=False, ), batch_size=64, timestep_limit=32)
def make_config(): return config_lib.Config( data=config_lib.OneOf( [config_lib.Config(task=1, a='hello'), config_lib.Config(task=2, a='world', b='stuff'), config_lib.Config(task=3, c=1234)], task=2), model=config_lib.Config(stuff=1))