def __init__(self, env_spec, variance=1.0, **kwargs): # absorb generic param without breaking super(GaussianPolicy, self).__init__(env_spec) self.variance = variance log_self(self)
def __init__(self, **kwargs): # absorb generic param without breaking super(LinearMemory, self).__init__() self.exp_keys = [ 'states', 'actions', 'rewards', 'next_states', 'terminals' ] self.exp = {k: [] for k in self.exp_keys} log_self(self)
def __init__(self, env_spec, train_per_n_new_exp=1, gamma=0.95, learning_rate=0.1, epi_change_learning_rate=None, batch_size=16, n_epoch=5, hidden_layers_shape=[4], hidden_layers_activation='sigmoid', output_layer_activation='linear', **kwargs): # absorb generic param without breaking super(DQN, self).__init__(env_spec) self.train_per_n_new_exp = train_per_n_new_exp self.gamma = gamma self.learning_rate = learning_rate self.epi_change_learning_rate = epi_change_learning_rate self.batch_size = batch_size self.n_epoch = 1 self.final_n_epoch = n_epoch self.hidden_layers = hidden_layers_shape self.hidden_layers_activation = hidden_layers_activation self.output_layer_activation = output_layer_activation log_self(self) self.optimizer = None self.build_model()
def __init__(self, env_spec, **kwargs): super(LinearMemory, self).__init__(env_spec) self.exp_keys = [ 'states', 'actions', 'rewards', 'next_states', 'terminals' ] self.exp = {k: [] for k in self.exp_keys} log_self(self)
def __init__(self, max_queue_size=4, **kwargs): '''Construct externally, and set at Agent.compile()''' self.agent = None self.state = None self.exp_queue = [] self.MAX_QUEUE_SIZE = max_queue_size self.never_debugged = True log_self(self)
def __init__(self, env_spec, exploration_anneal_episodes=20, **kwargs): # absorb generic param without breaking super(LinearNoisePolicy, self).__init__(env_spec) self.exploration_anneal_episodes = exploration_anneal_episodes self.n_step = 0 # init log_self(self)
def __init__(self, **kwargs): '''Construct externally, and set at Agent.compile()''' self.agent = None self.keras_optimizer = None self.optim_param = {} self.update_optim_param(**kwargs) self.init_optimizer() log_self(self)
def __init__(self, env_spec, init_e=1.0, final_e=0.1, exploration_anneal_episodes=30, **kwargs): # absorb generic param without breaking super(EpsilonGreedyPolicy, self).__init__(env_spec) self.init_e = init_e self.final_e = final_e self.e = self.init_e self.exploration_anneal_episodes = exploration_anneal_episodes log_self(self)
def __init__(self, **kwargs): # absorb generic param without breaking super(RankedMemory, self).__init__() # use the old self.exp as buffer, remember to clear self.last_exp = self.exp self.epi_memory = [] self.sorted_epi_exp = self.exp self.n_best_epi = 10 # then do left tail selection or early forget, I dont care log_self(self)
def __init__(self, env_spec, init_tau=5., final_tau=0.5, exploration_anneal_episodes=20, **kwargs): # absorb generic param without breaking super(BoltzmannPolicy, self).__init__(env_spec) self.init_tau = init_tau self.final_tau = final_tau self.tau = self.init_tau self.exploration_anneal_episodes = exploration_anneal_episodes self.clip_val = 500 log_self(self)
def __init__(self, env_spec, init_tau=5., final_tau=0.5, exploration_anneal_episodes=20, **kwargs): # absorb generic param without breaking super(BoltzmannPolicy, self).__init__(env_spec) self.init_tau = init_tau self.final_tau = final_tau self.tau = self.init_tau self.exploration_anneal_episodes = exploration_anneal_episodes self.clip_val = 500. log_self(self)
def __init__(self, env_spec, **kwargs): super(HighLowMemory, self).__init__(env_spec) # use the old self.exp as buffer, remember to clear self.last_exp = self.exp self.epi_memory_high = [] self.epi_memory_low = [] self.max_reward = -math.inf self.min_reward = math.inf # 1st 5 epis goes into bad half, recompute every 5 epis self.threshold = math.inf self.threshold_history = [] self.epi_num = 0 self.prob_high = 0.66 self.num_epis_to_sample = 3 self.max_epis_in_mem = 15 self.recompute_freq = 10 log_self(self)
def __init__(self, **kwargs): # absorb generic param without breaking super(HighLowMemory, self).__init__() # use the old self.exp as buffer, remember to clear self.last_exp = self.exp self.epi_memory_high = [] self.epi_memory_low = [] self.max_reward = -math.inf self.min_reward = math.inf # 1st 5 epis goes into bad half, recompute every 5 epis self.threshold = math.inf self.threshold_history = [] self.epi_num = 0 self.prob_high = 0.66 self.num_epis_to_sample = 3 self.max_epis_in_mem = 15 self.recompute_freq = 10 log_self(self)
def __init__(self, env_spec, train_per_n_new_exp=1, gamma=0.95, lr=0.1, epi_change_lr=None, batch_size=16, n_epoch=5, hidden_layers=None, hidden_layers_activation='sigmoid', output_layer_activation='linear', auto_architecture=False, num_hidden_layers=3, first_hidden_layer_size=256, num_initial_channels=16, **kwargs): # absorb generic param without breaking # import only when needed to contain side-effects from keras.layers.core import Dense from keras.models import Sequential, load_model self.Dense = Dense self.Sequential = Sequential self.load_model = load_model super(DQN, self).__init__(env_spec) self.train_per_n_new_exp = train_per_n_new_exp self.gamma = gamma self.lr = lr self.epi_change_lr = epi_change_lr self.batch_size = batch_size self.n_epoch = 1 self.final_n_epoch = n_epoch self.hidden_layers = hidden_layers or [4] self.hidden_layers_activation = hidden_layers_activation self.output_layer_activation = output_layer_activation self.clip_val = 10000 self.auto_architecture = auto_architecture self.num_hidden_layers = num_hidden_layers self.first_hidden_layer_size = first_hidden_layer_size self.num_initial_channels = num_initial_channels log_self(self) self.build_model()
def __init__(self, **kwargs): # absorb generic param without breaking super(HighLowMemoryWithForgetting, self).__init__() self.max_epis_in_mem = 250 log_self(self)
def __init__(self, env_spec, **kwargs): # absorb generic param without breaking super(SoftmaxPolicy, self).__init__(env_spec) self.clip_val = 500. log_self(self)
def __init__(self, env_spec, **kwargs): # absorb generic param without breaking super(ArgmaxPolicy, self).__init__(env_spec) log_self(self)
def __init__(self, env_spec, **kwargs): # absorb generic param without breaking super(BoundedPolicy, self).__init__(env_spec) self.action_bound = env_spec['action_bound_high'] assert env_spec['action_bound_high'] == -env_spec['action_bound_low'] log_self(self)
def __init__(self, env_spec, **kwargs): super(HighLowMemoryWithForgetting, self).__init__(env_spec) self.max_epis_in_mem = 250 log_self(self)
def __init__(self, env_spec, **kwargs): # absorb generic param without breaking super(NoNoisePolicy, self).__init__(env_spec) log_self(self)
def __init__(self, env_spec, **kwargs): super(LinearMemory, self).__init__(env_spec) self.exp_keys = [ 'states', 'actions', 'rewards', 'next_states', 'terminals'] self.exp = {k: [] for k in self.exp_keys} log_self(self)
def change_optim_param(self, **new_param): self.update_optim_param(**new_param) self.init_optimizer() logger.info("Optimizer param changed") log_self(self)