Beispiel #1
0
 def __init__(self,
              env_spec,
              variance=1.0,
              **kwargs):  # absorb generic param without breaking
     super(GaussianPolicy, self).__init__(env_spec)
     self.variance = variance
     log_self(self)
Beispiel #2
0
 def __init__(self, **kwargs):  # absorb generic param without breaking
     super(LinearMemory, self).__init__()
     self.exp_keys = [
         'states', 'actions', 'rewards', 'next_states', 'terminals'
     ]
     self.exp = {k: [] for k in self.exp_keys}
     log_self(self)
Beispiel #3
0
    def __init__(self,
                 env_spec,
                 train_per_n_new_exp=1,
                 gamma=0.95,
                 learning_rate=0.1,
                 epi_change_learning_rate=None,
                 batch_size=16,
                 n_epoch=5,
                 hidden_layers_shape=[4],
                 hidden_layers_activation='sigmoid',
                 output_layer_activation='linear',
                 **kwargs):  # absorb generic param without breaking
        super(DQN, self).__init__(env_spec)

        self.train_per_n_new_exp = train_per_n_new_exp
        self.gamma = gamma
        self.learning_rate = learning_rate
        self.epi_change_learning_rate = epi_change_learning_rate
        self.batch_size = batch_size
        self.n_epoch = 1
        self.final_n_epoch = n_epoch
        self.hidden_layers = hidden_layers_shape
        self.hidden_layers_activation = hidden_layers_activation
        self.output_layer_activation = output_layer_activation
        log_self(self)
        self.optimizer = None
        self.build_model()
Beispiel #4
0
 def __init__(self, env_spec, **kwargs):
     super(LinearMemory, self).__init__(env_spec)
     self.exp_keys = [
         'states', 'actions', 'rewards', 'next_states', 'terminals'
     ]
     self.exp = {k: [] for k in self.exp_keys}
     log_self(self)
Beispiel #5
0
 def __init__(self, max_queue_size=4, **kwargs):
     '''Construct externally, and set at Agent.compile()'''
     self.agent = None
     self.state = None
     self.exp_queue = []
     self.MAX_QUEUE_SIZE = max_queue_size
     self.never_debugged = True
     log_self(self)
Beispiel #6
0
 def __init__(self,
              env_spec,
              exploration_anneal_episodes=20,
              **kwargs):  # absorb generic param without breaking
     super(LinearNoisePolicy, self).__init__(env_spec)
     self.exploration_anneal_episodes = exploration_anneal_episodes
     self.n_step = 0  # init
     log_self(self)
 def __init__(self, **kwargs):
     '''Construct externally, and set at Agent.compile()'''
     self.agent = None
     self.keras_optimizer = None
     self.optim_param = {}
     self.update_optim_param(**kwargs)
     self.init_optimizer()
     log_self(self)
Beispiel #8
0
 def __init__(self, max_queue_size=4, **kwargs):
     '''Construct externally, and set at Agent.compile()'''
     self.agent = None
     self.state = None
     self.exp_queue = []
     self.MAX_QUEUE_SIZE = max_queue_size
     self.never_debugged = True
     log_self(self)
Beispiel #9
0
 def __init__(self, env_spec,
              init_e=1.0, final_e=0.1, exploration_anneal_episodes=30,
              **kwargs):  # absorb generic param without breaking
     super(EpsilonGreedyPolicy, self).__init__(env_spec)
     self.init_e = init_e
     self.final_e = final_e
     self.e = self.init_e
     self.exploration_anneal_episodes = exploration_anneal_episodes
     log_self(self)
Beispiel #10
0
 def __init__(self, **kwargs):  # absorb generic param without breaking
     super(RankedMemory, self).__init__()
     # use the old self.exp as buffer, remember to clear
     self.last_exp = self.exp
     self.epi_memory = []
     self.sorted_epi_exp = self.exp
     self.n_best_epi = 10
     # then do left tail selection or early forget, I dont care
     log_self(self)
Beispiel #11
0
 def __init__(self, env_spec,
              init_tau=5., final_tau=0.5, exploration_anneal_episodes=20,
              **kwargs):  # absorb generic param without breaking
     super(BoltzmannPolicy, self).__init__(env_spec)
     self.init_tau = init_tau
     self.final_tau = final_tau
     self.tau = self.init_tau
     self.exploration_anneal_episodes = exploration_anneal_episodes
     self.clip_val = 500
     log_self(self)
Beispiel #12
0
 def __init__(self, env_spec,
              init_tau=5., final_tau=0.5, exploration_anneal_episodes=20,
              **kwargs):  # absorb generic param without breaking
     super(BoltzmannPolicy, self).__init__(env_spec)
     self.init_tau = init_tau
     self.final_tau = final_tau
     self.tau = self.init_tau
     self.exploration_anneal_episodes = exploration_anneal_episodes
     self.clip_val = 500.
     log_self(self)
Beispiel #13
0
 def __init__(self, env_spec, **kwargs):
     super(HighLowMemory, self).__init__(env_spec)
     # use the old self.exp as buffer, remember to clear
     self.last_exp = self.exp
     self.epi_memory_high = []
     self.epi_memory_low = []
     self.max_reward = -math.inf
     self.min_reward = math.inf
     # 1st  5 epis goes into bad half, recompute every 5 epis
     self.threshold = math.inf
     self.threshold_history = []
     self.epi_num = 0
     self.prob_high = 0.66
     self.num_epis_to_sample = 3
     self.max_epis_in_mem = 15
     self.recompute_freq = 10
     log_self(self)
Beispiel #14
0
 def __init__(self, **kwargs):  # absorb generic param without breaking
     super(HighLowMemory, self).__init__()
     # use the old self.exp as buffer, remember to clear
     self.last_exp = self.exp
     self.epi_memory_high = []
     self.epi_memory_low = []
     self.max_reward = -math.inf
     self.min_reward = math.inf
     # 1st  5 epis goes into bad half, recompute every 5 epis
     self.threshold = math.inf
     self.threshold_history = []
     self.epi_num = 0
     self.prob_high = 0.66
     self.num_epis_to_sample = 3
     self.max_epis_in_mem = 15
     self.recompute_freq = 10
     log_self(self)
Beispiel #15
0
    def __init__(self,
                 env_spec,
                 train_per_n_new_exp=1,
                 gamma=0.95,
                 lr=0.1,
                 epi_change_lr=None,
                 batch_size=16,
                 n_epoch=5,
                 hidden_layers=None,
                 hidden_layers_activation='sigmoid',
                 output_layer_activation='linear',
                 auto_architecture=False,
                 num_hidden_layers=3,
                 first_hidden_layer_size=256,
                 num_initial_channels=16,
                 **kwargs):  # absorb generic param without breaking
        # import only when needed to contain side-effects
        from keras.layers.core import Dense
        from keras.models import Sequential, load_model
        self.Dense = Dense
        self.Sequential = Sequential
        self.load_model = load_model

        super(DQN, self).__init__(env_spec)

        self.train_per_n_new_exp = train_per_n_new_exp
        self.gamma = gamma
        self.lr = lr
        self.epi_change_lr = epi_change_lr
        self.batch_size = batch_size
        self.n_epoch = 1
        self.final_n_epoch = n_epoch
        self.hidden_layers = hidden_layers or [4]
        self.hidden_layers_activation = hidden_layers_activation
        self.output_layer_activation = output_layer_activation
        self.clip_val = 10000
        self.auto_architecture = auto_architecture
        self.num_hidden_layers = num_hidden_layers
        self.first_hidden_layer_size = first_hidden_layer_size
        self.num_initial_channels = num_initial_channels
        log_self(self)
        self.build_model()
Beispiel #16
0
    def __init__(self, env_spec,
                 train_per_n_new_exp=1,
                 gamma=0.95, lr=0.1,
                 epi_change_lr=None,
                 batch_size=16, n_epoch=5, hidden_layers=None,
                 hidden_layers_activation='sigmoid',
                 output_layer_activation='linear',
                 auto_architecture=False,
                 num_hidden_layers=3,
                 first_hidden_layer_size=256,
                 num_initial_channels=16,
                 **kwargs):  # absorb generic param without breaking
        # import only when needed to contain side-effects
        from keras.layers.core import Dense
        from keras.models import Sequential, load_model
        self.Dense = Dense
        self.Sequential = Sequential
        self.load_model = load_model

        super(DQN, self).__init__(env_spec)

        self.train_per_n_new_exp = train_per_n_new_exp
        self.gamma = gamma
        self.lr = lr
        self.epi_change_lr = epi_change_lr
        self.batch_size = batch_size
        self.n_epoch = 1
        self.final_n_epoch = n_epoch
        self.hidden_layers = hidden_layers or [4]
        self.hidden_layers_activation = hidden_layers_activation
        self.output_layer_activation = output_layer_activation
        self.clip_val = 10000
        self.auto_architecture = auto_architecture
        self.num_hidden_layers = num_hidden_layers
        self.first_hidden_layer_size = first_hidden_layer_size
        self.num_initial_channels = num_initial_channels
        log_self(self)
        self.build_model()
Beispiel #17
0
 def __init__(self, **kwargs):  # absorb generic param without breaking
     super(HighLowMemoryWithForgetting, self).__init__()
     self.max_epis_in_mem = 250
     log_self(self)
Beispiel #18
0
 def __init__(self, env_spec,
              **kwargs):  # absorb generic param without breaking
     super(SoftmaxPolicy, self).__init__(env_spec)
     self.clip_val = 500.
     log_self(self)
Beispiel #19
0
 def __init__(self, env_spec,
              **kwargs):  # absorb generic param without breaking
     super(ArgmaxPolicy, self).__init__(env_spec)
     log_self(self)
Beispiel #20
0
 def __init__(self, env_spec,
              **kwargs):  # absorb generic param without breaking
     super(BoundedPolicy, self).__init__(env_spec)
     self.action_bound = env_spec['action_bound_high']
     assert env_spec['action_bound_high'] == -env_spec['action_bound_low']
     log_self(self)
Beispiel #21
0
 def __init__(self, env_spec, **kwargs):
     super(HighLowMemoryWithForgetting, self).__init__(env_spec)
     self.max_epis_in_mem = 250
     log_self(self)
Beispiel #22
0
 def __init__(self, env_spec, exploration_anneal_episodes=20,
              **kwargs):  # absorb generic param without breaking
     super(LinearNoisePolicy, self).__init__(env_spec)
     self.exploration_anneal_episodes = exploration_anneal_episodes
     self.n_step = 0  # init
     log_self(self)
Beispiel #23
0
 def __init__(self, env_spec,
              **kwargs):  # absorb generic param without breaking
     super(NoNoisePolicy, self).__init__(env_spec)
     log_self(self)
Beispiel #24
0
 def __init__(self, env_spec, **kwargs):
     super(HighLowMemoryWithForgetting, self).__init__(env_spec)
     self.max_epis_in_mem = 250
     log_self(self)
Beispiel #25
0
 def __init__(self, env_spec, **kwargs):
     super(LinearMemory, self).__init__(env_spec)
     self.exp_keys = [
         'states', 'actions', 'rewards', 'next_states', 'terminals']
     self.exp = {k: [] for k in self.exp_keys}
     log_self(self)
 def change_optim_param(self, **new_param):
     self.update_optim_param(**new_param)
     self.init_optimizer()
     logger.info("Optimizer param changed")
     log_self(self)