Esempio n. 1
0
    def observation(self, observation: State) -> np.ndarray:
        """

        :param observation: as defined in ContinuousSimulation
        :return: np.ndarray with info in this order
            station_locations, station_occs, station_maxes, car_locs
            car_dest_idx, car_dest_loc, t, query_loc
            dtype: float32
            size: (n_stations * 4) + (max_cars * 5) + 3
        """
        max_cars: int = self.env.max_cars
        observation: Dict = observation._asdict()
        arrs: List[np.ndarray] = []
        for key in observation.keys():
            if key == 'station_idx':
                pass
                # station indices are constant - ignore them
            correct_shape: Tuple[int] = self.env.observation_space[key].shape
            # the shape specified in observation_space isn't accurate, because
            # the dimensions are variable. Here, we're fixing it to max size
            padded: np.ndarray = self._pad_zeros_(observation[key], correct_shape)
            # np.ndarray[Any] : correct_shape
            flattened: np.ndarray = padded.flatten().astype(np.float32)
            arrs.append(flattened)
        return np.concatenate(arrs, axis=0)
Esempio n. 2
0
 def len_to_finish(self, state):
     # 如果在区域内,计算点到目标到距离
     if self.area.state_in(state):
         return State.getDist(state, self.goal)
     # 否则计算点到区域中心+区域中心到goal的距离
     else:
         return State.getDist(state, self.area.center) \
                + State.getDist(self.area.center, self.goal)
Esempio n. 3
0
def get_random_state():
    state = State()
    symbol = np.random.choice([-1, 1])

    for i in range(BOARD_ROWS):
        for j in range(BOARD_COLS):
            escolha = np.random.choice([0, symbol])
            state = state.next_state(i, j, escolha)

            if escolha == symbol:
                symbol = -symbol

    return state
Esempio n. 4
0
def state_from_ctype(ctype_state):
    s = State()
    s.pos = ctype_state.pos
    s.state = int(ctype_state.state.decode(), 16)
    s.history = ctype_state.history
    s.move = ctype_state.move
    s.no_change_count = ctype_state.no_change_count
    s.depth = ctype_state.depth
    return s
Esempio n. 5
0
    def __init__(self):
        # 环境初始化
        self.global_arg = arg.init_global_arg()
        env_arg = arg.init_env_arg(self.global_arg)
        # 增加nk的一个读入操作
        self.main_env = Env(env_arg)
        for model_type in ['st', 'ed']:
            if all_config['checkpoint']['env'][model_type]['enable']:
                self.main_env.nkmodel_load(all_config['checkpoint']['env']['path'], model_type)
            self.main_env.nkmodel_save(all_config["nkmodel_path"][model_type], model_type)
        # 个体初始化
        self.agents = []
        csv_head_agent = ['agent_no'] + ['st_state'] + ['st_value'] + ['insight'] + ['xplr'] + ['xplt'] + ['enable']
        moniter.AppendToCsv(csv_head_agent, all_config['agent_csv_path'])
        for i in range(self.global_arg["Nagent"]):
            # 个体随机初始位置
            start_st_label = [randint(0, self.main_env.P - 1) for j in range(self.main_env.N)]
            state_start = State(start_st_label)
            self.agents.append(Agent(arg.init_agent_arg(self.global_arg,
                                                        self.main_env.arg),
                                     self.main_env))
            self.agents[i].state_now = deepcopy(state_start)
            self.agents[i].agent_id = i

            # 去除了一开始给一个全局area,改为添加一个包含起点的点area
            start_area = Area(self.agents[i].state_now, [False] * self.main_env.N, 0)
            start_area.info = get_area_sample_distr(env=self.main_env, area=start_area, state=self.agents[i].state_now,
                                                    T_stmp=0, sample_num=1, dfs_r=1)
            start_area.sign = Sign(i, 0, 'start')
            self.agents[i].renew_m_info(start_area, 0)
            self.a_plan = None
            logging.info("state:%s, st_value:%s,insight:%.5s ,xplr:%.5s, xplt:%.5s, enable:%.5s" % (
                str(self.agents[i].state_now),
                self.main_env.getValue(self.agents[i].state_now, 0),
                self.agents[i].agent_arg['a']['insight'],
                self.agents[i].agent_arg['a']['xplr'],
                self.agents[i].agent_arg['a']['xplt'],
                self.agents[i].agent_arg['a']['enable']))
            # 记录agent信息
            csv_info_agent = ['agent%d' % i] \
                             + [self.agents[i].state_now] \
                             + [self.main_env.getValue(self.agents[i].state_now, 0)] \
                             + [self.agents[i].agent_arg['a']['insight']] \
                             + [self.agents[i].agent_arg['a']['xplr']] \
                             + [self.agents[i].agent_arg['a']['xplt']] \
                             + [self.agents[i].agent_arg['a']['enable']]
            moniter.AppendToCsv(csv_info_agent, all_config['agent_csv_path'])

        # 社会网络初始化
        soclnet_arg = arg.init_soclnet_arg(self.global_arg, env_arg)
        self.socl_net = SoclNet(soclnet_arg)
        self.socl_net.new_flat_init()  # 修改初始化方法
        # self.socl_net.flat_init()
        if all_config['checkpoint']['socl_network']['enable']:
            self.socl_net.power_load(all_config['checkpoint']['socl_network']['power'])
            self.socl_net.relat_load(all_config['checkpoint']['socl_network']['relat'])
        self.record = Record()

        self.metric = metrics.register_all_metrics(metrics.Metrics())
Esempio n. 6
0
 def observation(self, observation: State):
     return State(station_idx=observation.station_idx,
                  station_locations=observation.station_locations,
                  station_occs=observation.station_occs,
                  station_maxes=observation.station_maxes,
                  car_locs=observation.car_locs,
                  car_dest_idx=observation.car_dest_idx,
                  car_dest_loc=observation.car_dest_loc,
                  t=self.encoder.encode(observation.t),
                  query_loc=observation.query_loc,
                  remaining_queries=observation.remaining_queries)
Esempio n. 7
0
 def astar_h2_solution(self, ):
     print(
         '[===============================A* Heuristic2 Solution===============================]'
     )
     sTime = time.time()
     game = State(bfs_board, dfs_goal, 7, 7, count_pegs(dfs_board))
     print(game)
     solution = self.aStar(game, self.heuristic2)
     eTime = time.time()
     self.print_solution(solution)
     print('Total Time: approx. %s sec \n' % str(eTime - sTime))
Esempio n. 8
0
 def bfs_solution(self, ):
     print(
         '[===============================BFS Solution===============================]'
     )
     sTime = time.time()
     game = State(bfs_board, bfs_goal, 7, 7, count_pegs(bfs_board))
     print(game)
     solution = self.bfs(game)
     eTime = time.time()
     self.print_solution(solution)
     print('Total Time: approx. %s sec \n' % str(eTime - sTime))
 def observation(self, observation: State) -> State:
     new_t: float = (observation.t / self._max_t).astype(np.float32)
     return State(station_idx=observation.station_idx,
                  station_locations=observation.station_locations,
                  station_occs=observation.station_occs,
                  station_maxes=observation.station_maxes,
                  car_locs=observation.car_locs,
                  car_dest_idx=observation.car_dest_idx,
                  car_dest_loc=observation.car_dest_loc,
                  t=new_t,
                  query_loc=observation.query_loc,
                  remaining_queries=observation.remaining_queries)
Esempio n. 10
0
 def observation(self, observation: State):
     return State(station_idx=observation.station_idx,
                  station_locations=self._encode_loc(
                      observation.station_locations),
                  station_occs=observation.station_occs,
                  station_maxes=observation.station_maxes,
                  car_locs=self._encode_loc(observation.car_locs),
                  car_dest_idx=observation.car_dest_idx,
                  car_dest_loc=self._encode_loc(observation.car_dest_loc),
                  t=observation.t,
                  query_loc=self._encode_loc(observation.query_loc),
                  remaining_queries=observation.remaining_queries)
Esempio n. 11
0
    def __init__(self, step_size=0.1, epsilon=0.1, symbol=0):
        self.step_size = step_size
        self.epsilon = epsilon
        self.previous_state = State()
        self.state = None
        self.symbol = symbol
        self.td_errors = []

        self.estimator = Estimator()
        self.policy = make_epsilon_greedy_policy(self.estimator)
        self.action = (0, 0)

        self.actions = []
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                self.actions.append((i, j))
Esempio n. 12
0
 def observation(self,
         observation: State) -> State:
     car_indices: np.ndarray = np_onehot(observation.car_dest_idx, max=self.n_stations-1)
     # np.ndarray[int32] : [cur_n_cars, 1, self.n_stations]
     car_indices= car_indices[:, 0, :]
     # [cur_n_cars, self.n_stations]
     station_indices: np.ndarray = np_onehot(observation.station_idx, max=self.n_stations-1)
     station_indices = station_indices[:, 0, :]
     # [n_stations, n_stations] should be eye(n_stations)
     return State(
         station_idx=station_indices,
         station_locations=observation.station_locations,
         station_maxes=observation.station_maxes,
         station_occs=observation.station_occs,
         car_dest_loc=observation.car_dest_loc,
         car_dest_idx=car_indices,
         car_locs=observation.car_locs,
         query_loc=observation.query_loc,
         t=observation.t,
         remaining_queries=observation.remaining_queries)
Esempio n. 13
0
  def __init__(self):
    observations = [get_random_state().data.reshape(-1) for _ in range(10000)]
    self.scaler = sklearn.preprocessing.StandardScaler()
    self.scaler.fit(observations)

    self.featurizer = sklearn.pipeline.FeatureUnion([
      ("rbf1", RBFSampler(gamma=5.0, n_components=150)),
      ("rbf2", RBFSampler(gamma=2.0, n_components=150)),
      ("rbf3", RBFSampler(gamma=1.0, n_components=150)),
      ("rbf4", RBFSampler(gamma=0.5, n_components=150))
    ], n_jobs=1)
    # self.featurizer.fit(observations)
    self.featurizer.fit(self.scaler.transform(observations))

    self.models = dict()
    for i in range(BOARD_ROWS):
        for j in range(BOARD_COLS):
            model = SGDRegressor(learning_rate="constant")
            model.partial_fit([self.featurize_state(State())], [0])
            self.models[(i,j)] = model
Esempio n. 14
0
    def run_exp(self):
        up_info = {}

        for i in range(self.global_arg["Nagent"]):
            self.agents.append(
                Agent(arg.init_agent_arg(self.global_arg, self.main_env.arg)))
            self.agents[i].state_now = State(
                [0 for _ in range(self.main_env.N)])
            self.agents[i].inter_area.info = get_area_sample_distr(
                env=self.main_env,
                T=0,
                area=self.agents[i].inter_area,
                state=self.agents[i].state_now,
                sample_num=self.main_env.arg['ACT']['hqxx']['sample_n'],
                dfs_r=self.main_env.arg['ACT']['hqxx']['dfs_p'])
            self.agents[i].inter_area.info['start_t'] = 0

        stage_num = self.global_arg['T'] // self.global_arg['Ts']
        for k in range(self.global_arg["Nagent"]):
            csv_head = [
                'frame', 'SSMfi', 'SSM_f-req', 'proc_action', 'SSM_f_need',
                'nkmax', 'nkmin', 'nkmid', 'nkavg', 'nk0.75', "nk0.25"
            ]
            #                        'peakmax', 'peakmin', 'peakmid', 'peakavg', 'peak0.75', "peak0.25"]
            moniter.AppendToCsv(csv_head, all_config['result_csv_path'][k])
        csv_head = ['frame'] \
                   + ["%s%d" % (using_brain[k].func_name, k) for k in range(self.global_arg['Nagent'])] \
                   + ["agent_avg"] \
                   + ['nkmax', 'nkmin', 'nkmid', 'nkavg', 'nk0.75', "nk0.25"]
        moniter.AppendToCsv(csv_head, all_config['result_csv_path'][-1])

        for i in range(stage_num):
            Ti = i * self.global_arg['Ts'] + 1
            logging.info("stage %3d , Ti:%3d" % (i, Ti))
            up_info['nkinfo'] = self.main_env.getModelDistri(Ti)
            #            up_info['nk_peak'] = self.main_env.getModelPeakDistri(Ti)

            # 运行一个Stage,Ti表示每个Stage的第一帧
            self.run_stage(Ti, up_info)
Esempio n. 15
0
    def play(self, train=False, print_state=False):
        alternator = self.alternate()
        current_state = State()
        self.p1.set_state(current_state)
        self.p2.set_state(current_state)
        if print_state:
            current_state.print_state()
        while True:
            player = next(alternator)
            current_state, is_end = player.act()

            if print_state:
                current_state.print_state()
            if is_end:
                if train:
                    player = next(alternator)
                    player.backup(current_state, True)
                return current_state.winner

            self.p1.set_state(current_state)
            self.p2.set_state(current_state)
Esempio n. 16
0
def bfs_multiproccess(file, env, no_change_cnt, truncate):
    print_time = time.time()
    if math.ceil(env.state_len / 8) >= STATE_LEN:
        print(f"Increase state len to {math.ceil(env.state_len/4)}")
        return None
    queues = []
    for i in range(num_proc):
        queues.append(Manager().Queue())

    hashes = dict()
    lis = []
    lock = Lock()
    init_state = env.get_init_state()
    init_state.history = b''
    init_state.move = b'N'
    lis.append(ctype_state(init_state))  # push the initial state

    found_state = None
    count = 0
    max_shared_block = 100000
    best_depth = 200
    while len(lis) > 0:
        act_proc = min(max(1, len(lis)), num_proc)
        all_list = lis
        lis = []
        chunk_count = 0
        for pos_in_all_lis in range(0, len(all_list), max_shared_block):

            chunk_size = math.ceil(
                min(max_shared_block, len(all_list)) / num_proc)
            chunk_count += 1
            #print_lock(lock, f"Makeing shared memory at {pos_in_all_lis}-{pos_in_all_lis+max_shared_block} ({chunk_count}/{math.ceil(len(all_list)/max_shared_block)})")
            #    print_tuple_state("copy to shared mem:",x,lock)
            shared_array = Array(CState,
                                 all_list[pos_in_all_lis:pos_in_all_lis +
                                          max_shared_block],
                                 lock=False)
            #print_lock(lock, f"making input for processes. all_size:{len(all_list)} chunk_size:{chunk_size}")
            input = [(i, queues[i], env, shared_array, i * chunk_size,
                      chunk_size, lock, no_change_cnt)
                     for i in list(range(act_proc))]
            #print_lock(lock, f"Starting {len(input)} jobs")
            pool = []
            for i in input:
                proc = Process(target=bfs_job, args=(i, ))
                pool.append(proc)
            for p in pool:
                p.start()
            while any(pool):
                proc_loc = -1
                while True:
                    for p in range(len(pool)):
                        if pool[p] and not pool[p].is_alive():
                            proc_loc = p
                    if proc_loc != -1:
                        break
                    time.sleep(0.01)
                pool[proc_loc].join()
                #print_lock(lock, f'Pulling from process {proc_loc}')
                for state in queues[proc_loc].get():
                    if state[5] > best_depth:
                        continue
                    s = int(state[1].decode(), 16)
                    state_hash = (state[0] << env.state_len) + s
                    #if state_hash in hashes and hashes[state_hash] <= state[5]:
                    if hashes.get(state_hash, 200) <= state[5]:
                        continue
                    hashes[state_hash] = state[5]
                    lis.append(state)
                    if s == env.goal:
                        found_state = State()
                        found_state.history = state[2]
                        best_depth = state[5]
                        #print_lock(lock, f'{s} {found_state.history}')
                        #return DummyState(list(found_state.history))
                #print_lock(lock, f'Completed pulling from process {proc_loc}')
                pool[proc_loc].terminate()
                pool[proc_loc] = None
        count += 1
        #print(f'truncating hash {len(hashes)}')
        if truncate:
            truncate_hash(hashes, count)
        t = "{0:.3f}".format(time.time() - print_time)
        print(
            f'\r{t} :=======> count:{count} tree:{len(lis)} hash:{len(hashes)} found:{"true" if found_state else "false"}',
            end='')
        #print(f'{t} :=======> count:{count} tree:{len(lis)} hash:{len(hashes)}')

    print("\r", end='')
    if not found_state:
        print(f'"{file} solution not found !!!')
        return None
    else:
        print(f'{file} found solution at level {best_depth}')
    return DummyState([chr(x).encode() for x in found_state.history])
    def grow(self) -> bool:
        actions, log_probs = self.model(self.state, topk=self.beam_size)

        x = log_probs.view(
            -1, self.beam_size,
            self.beam_size)  # batch_size x beam_size x beam_size
        x = self.log_probs[~self.finished].unsqueeze(-1) + x
        y = x.view(-1, self.beam_size *
                   self.beam_size)  # batch_size x (beam_size * beam_size)
        values, indices = y.topk(self.beam_size, dim=-1)
        self.log_probs[~self.finished] = values

        partial_trees = []
        tokens_word = []
        tokens_emb = []
        batch_idx = []
        cnt = 0

        for i in range(self.batch_size):
            if self.finished[i]:
                continue

            for j in range(self.beam_size):
                idx = indices[cnt, j].item()
                m = cnt * self.beam_size + idx // self.beam_size
                n = idx % self.beam_size
                current_tree = self.state.partial_trees[m]
                action = actions[m][n]
                tag = self.tags[i][self.n_step]
                word = self.tokens_word[i][self.n_step]
                tree = AttachJuxtapose.execute(
                    current_tree,
                    action,
                    self.n_step,
                    tag,
                    word,
                    immutable=True,
                )
                assert isinstance(tree, InternalParseNode)
                if self.n_step >= len(self.tokens_word[i]) - 1:
                    self.finished[i] = True
                    self.pred_trees[i][j] = tree
                else:
                    partial_trees.append(tree)
                    tokens_word.append(self.tokens_word[i])
                    tokens_emb.append(self.tokens_emb[i])
                    batch_idx.append(self.state.batch_idx[m])

            cnt += 1

        if tokens_emb == []:
            assert self.done()
            return True

        tokens_emb_t = torch.stack(tokens_emb)

        self.n_step += 1
        self.state = State(
            partial_trees,  # type: ignore
            tokens_word,
            tokens_emb_t,
            self.state.next_token_pos.new_full((len(partial_trees), ),
                                               fill_value=self.n_step),
            n_step=self.n_step,
            batch_idx=batch_idx,
        )
        return False
    def __init__(
        self,
        tokens_word: List[List[str]],
        tags: List[List[str]],
        tokens_emb: torch.Tensor,
        model: Parser,
        cfg: DictConfig,
    ) -> None:
        self.batch_size = len(tokens_word)
        self.beam_size = cfg.beam_size
        self.tokens_word = tokens_word
        self.tokens_emb = tokens_emb
        self.tags = tags
        self.model = model
        device = tokens_emb.device

        init_state = State(
            [None for _ in range(self.batch_size)],
            tokens_word,
            tokens_emb,
            next_token_pos=torch.zeros(self.batch_size,
                                       dtype=torch.int64,
                                       device=device),
            n_step=0,
            batch_idx=list(range(self.batch_size)),
        )

        actions, log_probs = self.model(init_state, topk=self.beam_size)
        self.log_probs = log_probs

        partial_trees = []
        tokens_word_expanded = []
        tokens_emb_expanded = []
        batch_idx = []
        self.finished = torch.zeros(self.batch_size,
                                    dtype=torch.bool,
                                    device=device)
        self.pred_trees = [[None for _ in range(self.beam_size)]
                           for _ in range(self.batch_size)]

        for i in range(self.batch_size):
            for j in range(self.beam_size):
                tag = self.tags[i][0]
                word = self.tokens_word[i][0]
                tree = AttachJuxtapose.execute(None,
                                               actions[i][j],
                                               0,
                                               tag,
                                               word,
                                               immutable=False)
                assert isinstance(tree, InternalParseNode)
                if len(self.tokens_word[i]) > 1:
                    partial_trees.append(tree)
                    tokens_word_expanded.append(tokens_word[i])
                    tokens_emb_expanded.append(tokens_emb[i])
                    batch_idx.append(i)
                else:
                    self.finished[i] = True
                    self.pred_trees[i][j] = tree

        tokens_emb_expanded_t = torch.stack(tokens_emb_expanded)

        self.state = State(
            partial_trees,  # type: ignore
            tokens_word_expanded,
            tokens_emb_expanded_t,
            next_token_pos=torch.ones(len(partial_trees),
                                      dtype=torch.int64,
                                      device=device),
            n_step=1,
            batch_idx=batch_idx,
        )
        self.n_step = 1