예제 #1
0
    def simulate(self, name=None, resource_limit=None):
        model = ModelBuilder()
        self.models, self.rm, self.dm = model.build_all(
            resource_limit=resource_limit)

        self._initialize_queue()

        simulation = time.time()
        while not self.execution_queue.is_empty():
            current = self.execution_queue.pop()
            if current.start > self.end:
                break
            self._simulate(current)
        print('Simulation time: ' + str(time.time() - simulation))

        LogWriter.write(self.log_queue, name=name)
예제 #2
0
파일: env.py 프로젝트: rp523/rl
    def learn_episode(self, verbose = True):
        episode_num_per_unit = 1
        learn_num_per_unit = 1
        
        dst_base_dir = Path(self.__cfg.dst_dir_path)
        log_writer = None
        all_log_writer = LogWriter(dst_base_dir.joinpath("learn.csv"))
        for trial in range(self.__cfg.episode_unit_num):
            if (trial + 1) % 64 == 0:
                weight_path = dst_base_dir.joinpath("weight", "param{}.bin".format(trial))
                if not weight_path.parent.exists():
                    weight_path.parent.mkdir(parents = True)
                self.__agent.shared_nn.save(weight_path)
            explore = True
            for episode_cnt in range(episode_num_per_unit):
                log_path = dst_base_dir.joinpath("play", "{}_{}.csv".format(trial, episode_cnt))
                if log_writer is not None:
                    del log_writer
                log_writer = LogWriter(log_path)
            
                if verbose:
                    loop_fun = tqdm
                else:
                    loop_fun = lambda x : x

                for step, (info, new_es) in loop_fun(enumerate(self.__evolve(explore))):
                    if explore:
                        for new_e in new_es:
                            self.__experiences.append(new_e)

                    out_infos = {}
                    out_infos["episode"] = episode_cnt
                    out_infos["step"] = step
                    out_infos["t"] = step * self.__env.dt
                    out_infos.update(info)
                    out_infos["explore"] = explore
                    log_writer.write(out_infos)
                    
                # after episode

            if len(self.__experiences) < self.__batch_size:
                continue
            # after episode unit
            learn_cnt_per_unit = 0
            state_shape = self.__env.state_shape
            s = jnp.zeros(state_shape, dtype = jnp.float32)
            a = jnp.zeros((state_shape[0], EnAction.num), dtype = jnp.float32)
            r = jnp.zeros((state_shape[0], 1), dtype = jnp.float32)
            n_s = jnp.zeros(state_shape, dtype = jnp.float32)
            n_fin = jnp.zeros((state_shape[0], 1), dtype = jnp.float32)
            gamma = self.__env.gamma
            val = 0
            total_loss_q = []
            total_loss_pi = []
            while 1:
                self.__rng, rng = jrandom.split(self.__rng)
                e_i = int(jrandom.randint(rng, (1,), 0, len(self.__experiences)))
                e = self.__experiences[e_i]
                if not e.finished:
                    s = s.at[val,:].set(e.observation[0])
                    a = a.at[val,:].set(e.action)
                    r = r.at[val].set(float(e.reward))
                    n_s = n_s.at[val,:].set(e.next_state[0])
                    n_fin = n_fin.at[val,:].set(float(e.next_finished))
                    val += 1
                    if val >= state_shape[0]:
                        q_learn_cnt, p_learn_cnt, temperature, loss_val_qs, loss_val_pi, loss_balances = self.__agent.shared_nn.update(gamma, s, a, r, n_s, n_fin)
                        all_info = {}
                        all_info["trial"] = int(trial)
                        all_info["episode_num_per_unit"] = int(episode_num_per_unit)
                        all_info["episode"] = int(episode_cnt)
                        all_info["q_learn_cnt"] = int(q_learn_cnt)
                        #all_info["p_learn_cnt"] = int(p_learn_cnt)
                        all_info["temperature"] = float(temperature)
                        for _i, loss_val_q in enumerate(loss_val_qs):
                            all_info["loss_val_q{}".format(_i)] = float(loss_val_q)
                        all_info["loss_val_pi"] = float(loss_val_pi)
                        #for _i, loss_balance in enumerate(loss_balances):
                        #    all_info["loss_balance{}".format(_i)] = float(loss_balance)
                        all_log_writer.write(all_info)
                        if verbose:
                            for value in all_info.values():
                                if isinstance(value, float):
                                    print("{:.3f}".format(value), end = ",")
                                else:
                                    print(value, end = ",")
                            print()
                        total_loss_q.append(loss_val_q)
                        total_loss_pi.append(loss_val_pi)
                        val = 0
                        learn_cnt_per_unit += 1
                        if (learn_cnt_per_unit >= min(learn_num_per_unit, len(self.__experiences) // self.__batch_size)):
                            break