def simulate(self, name=None, resource_limit=None): model = ModelBuilder() self.models, self.rm, self.dm = model.build_all( resource_limit=resource_limit) self._initialize_queue() simulation = time.time() while not self.execution_queue.is_empty(): current = self.execution_queue.pop() if current.start > self.end: break self._simulate(current) print('Simulation time: ' + str(time.time() - simulation)) LogWriter.write(self.log_queue, name=name)
def learn_episode(self, verbose = True): episode_num_per_unit = 1 learn_num_per_unit = 1 dst_base_dir = Path(self.__cfg.dst_dir_path) log_writer = None all_log_writer = LogWriter(dst_base_dir.joinpath("learn.csv")) for trial in range(self.__cfg.episode_unit_num): if (trial + 1) % 64 == 0: weight_path = dst_base_dir.joinpath("weight", "param{}.bin".format(trial)) if not weight_path.parent.exists(): weight_path.parent.mkdir(parents = True) self.__agent.shared_nn.save(weight_path) explore = True for episode_cnt in range(episode_num_per_unit): log_path = dst_base_dir.joinpath("play", "{}_{}.csv".format(trial, episode_cnt)) if log_writer is not None: del log_writer log_writer = LogWriter(log_path) if verbose: loop_fun = tqdm else: loop_fun = lambda x : x for step, (info, new_es) in loop_fun(enumerate(self.__evolve(explore))): if explore: for new_e in new_es: self.__experiences.append(new_e) out_infos = {} out_infos["episode"] = episode_cnt out_infos["step"] = step out_infos["t"] = step * self.__env.dt out_infos.update(info) out_infos["explore"] = explore log_writer.write(out_infos) # after episode if len(self.__experiences) < self.__batch_size: continue # after episode unit learn_cnt_per_unit = 0 state_shape = self.__env.state_shape s = jnp.zeros(state_shape, dtype = jnp.float32) a = jnp.zeros((state_shape[0], EnAction.num), dtype = jnp.float32) r = jnp.zeros((state_shape[0], 1), dtype = jnp.float32) n_s = jnp.zeros(state_shape, dtype = jnp.float32) n_fin = jnp.zeros((state_shape[0], 1), dtype = jnp.float32) gamma = self.__env.gamma val = 0 total_loss_q = [] total_loss_pi = [] while 1: self.__rng, rng = jrandom.split(self.__rng) e_i = int(jrandom.randint(rng, (1,), 0, len(self.__experiences))) e = self.__experiences[e_i] if not e.finished: s = s.at[val,:].set(e.observation[0]) a = a.at[val,:].set(e.action) r = r.at[val].set(float(e.reward)) n_s = n_s.at[val,:].set(e.next_state[0]) n_fin = n_fin.at[val,:].set(float(e.next_finished)) val += 1 if val >= state_shape[0]: q_learn_cnt, p_learn_cnt, temperature, loss_val_qs, loss_val_pi, loss_balances = self.__agent.shared_nn.update(gamma, s, a, r, n_s, n_fin) all_info = {} all_info["trial"] = int(trial) all_info["episode_num_per_unit"] = int(episode_num_per_unit) all_info["episode"] = int(episode_cnt) all_info["q_learn_cnt"] = int(q_learn_cnt) #all_info["p_learn_cnt"] = int(p_learn_cnt) all_info["temperature"] = float(temperature) for _i, loss_val_q in enumerate(loss_val_qs): all_info["loss_val_q{}".format(_i)] = float(loss_val_q) all_info["loss_val_pi"] = float(loss_val_pi) #for _i, loss_balance in enumerate(loss_balances): # all_info["loss_balance{}".format(_i)] = float(loss_balance) all_log_writer.write(all_info) if verbose: for value in all_info.values(): if isinstance(value, float): print("{:.3f}".format(value), end = ",") else: print(value, end = ",") print() total_loss_q.append(loss_val_q) total_loss_pi.append(loss_val_pi) val = 0 learn_cnt_per_unit += 1 if (learn_cnt_per_unit >= min(learn_num_per_unit, len(self.__experiences) // self.__batch_size)): break