def __init__(self): config_obj = get_config() create_save_dir() state_size = config_obj["state_size"] self.model = build_ac_model() model_input = tf.convert_to_tensor(np.random.random((1, state_size)), dtype=tf.float32) self.model(model_input) self.total_steps = 0 self.game_adapter = GameAdapter.create(get_config()['name'])
def build_ac_model(): from tensorflow.keras.models import Model from tensorflow.keras.layers import Dense, Input action_size = get_config()['action_size'] state_size = get_config()['state_size'] input_layer = Input(shape=(state_size)) actor_dense = Dense(128, activation='relu')(input_layer) actor_logits = Dense(action_size, name='actor_logits')(actor_dense) critic_dense = Dense(128, activation='relu')(input_layer) critic_values = Dense(1, name='critic_values')(critic_dense) return Model(inputs=[input_layer], outputs=[actor_logits, critic_values])
def __init__(self): config_obj = get_config() create_save_dir() state_size = config_obj["state_size"] ac_model = build_ac_model() model_input = tf.convert_to_tensor( np.random.random((1, state_size)), dtype=tf.float32 ) ac_model(global_model, model_input) self.global_model = global_model self.global_episode = 0 self.global_moving_average_reward = 0 self.best_score = 0
def train_worker(lock, result_queue, worker_idx): total_step = 1 mem = Memory() name = "worker-{}-{}".format(get_config()['name'], worker_idx) local_model = get_ac_model() game_adapter = GameAdapter.create(name) done = False while not done: print('hi') current_state = game_adapter.reset() mem.clear() (ep_steps) = train_ep(local_model, game_adapter, current_state) total_step += ep_steps lock.acquire() result_queue.put(None) lock.release() if total_step > WORKER_CHECK_STEPS_INTERVAL: lock.acquire() done = worker_done(master_agent) lock.release()
def worker_done(master_agent): return master_agent.global_episode > get_config()['max_eps']
def is_done(self): return self.total_steps > get_config()["max_eps"]
def train_random(): config_obj = get_config() random_agent = build_random_model(config_obj['max_eps']) run_random_model(random_agent)
def get_save_path(file_name): config_obj = get_config() return os.path.join(get_save_dir(), file_name.format('{}-' + config_obj['name']))
def get_save_dir(): config_obj = get_config() return 'chase_trainer/data/' + config_obj['name']