def observation(self, observation: State) -> np.ndarray: """ :param observation: as defined in ContinuousSimulation :return: np.ndarray with info in this order station_locations, station_occs, station_maxes, car_locs car_dest_idx, car_dest_loc, t, query_loc dtype: float32 size: (n_stations * 4) + (max_cars * 5) + 3 """ max_cars: int = self.env.max_cars observation: Dict = observation._asdict() arrs: List[np.ndarray] = [] for key in observation.keys(): if key == 'station_idx': pass # station indices are constant - ignore them correct_shape: Tuple[int] = self.env.observation_space[key].shape # the shape specified in observation_space isn't accurate, because # the dimensions are variable. Here, we're fixing it to max size padded: np.ndarray = self._pad_zeros_(observation[key], correct_shape) # np.ndarray[Any] : correct_shape flattened: np.ndarray = padded.flatten().astype(np.float32) arrs.append(flattened) return np.concatenate(arrs, axis=0)
def len_to_finish(self, state): # 如果在区域内,计算点到目标到距离 if self.area.state_in(state): return State.getDist(state, self.goal) # 否则计算点到区域中心+区域中心到goal的距离 else: return State.getDist(state, self.area.center) \ + State.getDist(self.area.center, self.goal)
def get_random_state(): state = State() symbol = np.random.choice([-1, 1]) for i in range(BOARD_ROWS): for j in range(BOARD_COLS): escolha = np.random.choice([0, symbol]) state = state.next_state(i, j, escolha) if escolha == symbol: symbol = -symbol return state
def state_from_ctype(ctype_state): s = State() s.pos = ctype_state.pos s.state = int(ctype_state.state.decode(), 16) s.history = ctype_state.history s.move = ctype_state.move s.no_change_count = ctype_state.no_change_count s.depth = ctype_state.depth return s
def __init__(self): # 环境初始化 self.global_arg = arg.init_global_arg() env_arg = arg.init_env_arg(self.global_arg) # 增加nk的一个读入操作 self.main_env = Env(env_arg) for model_type in ['st', 'ed']: if all_config['checkpoint']['env'][model_type]['enable']: self.main_env.nkmodel_load(all_config['checkpoint']['env']['path'], model_type) self.main_env.nkmodel_save(all_config["nkmodel_path"][model_type], model_type) # 个体初始化 self.agents = [] csv_head_agent = ['agent_no'] + ['st_state'] + ['st_value'] + ['insight'] + ['xplr'] + ['xplt'] + ['enable'] moniter.AppendToCsv(csv_head_agent, all_config['agent_csv_path']) for i in range(self.global_arg["Nagent"]): # 个体随机初始位置 start_st_label = [randint(0, self.main_env.P - 1) for j in range(self.main_env.N)] state_start = State(start_st_label) self.agents.append(Agent(arg.init_agent_arg(self.global_arg, self.main_env.arg), self.main_env)) self.agents[i].state_now = deepcopy(state_start) self.agents[i].agent_id = i # 去除了一开始给一个全局area,改为添加一个包含起点的点area start_area = Area(self.agents[i].state_now, [False] * self.main_env.N, 0) start_area.info = get_area_sample_distr(env=self.main_env, area=start_area, state=self.agents[i].state_now, T_stmp=0, sample_num=1, dfs_r=1) start_area.sign = Sign(i, 0, 'start') self.agents[i].renew_m_info(start_area, 0) self.a_plan = None logging.info("state:%s, st_value:%s,insight:%.5s ,xplr:%.5s, xplt:%.5s, enable:%.5s" % ( str(self.agents[i].state_now), self.main_env.getValue(self.agents[i].state_now, 0), self.agents[i].agent_arg['a']['insight'], self.agents[i].agent_arg['a']['xplr'], self.agents[i].agent_arg['a']['xplt'], self.agents[i].agent_arg['a']['enable'])) # 记录agent信息 csv_info_agent = ['agent%d' % i] \ + [self.agents[i].state_now] \ + [self.main_env.getValue(self.agents[i].state_now, 0)] \ + [self.agents[i].agent_arg['a']['insight']] \ + [self.agents[i].agent_arg['a']['xplr']] \ + [self.agents[i].agent_arg['a']['xplt']] \ + [self.agents[i].agent_arg['a']['enable']] moniter.AppendToCsv(csv_info_agent, all_config['agent_csv_path']) # 社会网络初始化 soclnet_arg = arg.init_soclnet_arg(self.global_arg, env_arg) self.socl_net = SoclNet(soclnet_arg) self.socl_net.new_flat_init() # 修改初始化方法 # self.socl_net.flat_init() if all_config['checkpoint']['socl_network']['enable']: self.socl_net.power_load(all_config['checkpoint']['socl_network']['power']) self.socl_net.relat_load(all_config['checkpoint']['socl_network']['relat']) self.record = Record() self.metric = metrics.register_all_metrics(metrics.Metrics())
def observation(self, observation: State): return State(station_idx=observation.station_idx, station_locations=observation.station_locations, station_occs=observation.station_occs, station_maxes=observation.station_maxes, car_locs=observation.car_locs, car_dest_idx=observation.car_dest_idx, car_dest_loc=observation.car_dest_loc, t=self.encoder.encode(observation.t), query_loc=observation.query_loc, remaining_queries=observation.remaining_queries)
def astar_h2_solution(self, ): print( '[===============================A* Heuristic2 Solution===============================]' ) sTime = time.time() game = State(bfs_board, dfs_goal, 7, 7, count_pegs(dfs_board)) print(game) solution = self.aStar(game, self.heuristic2) eTime = time.time() self.print_solution(solution) print('Total Time: approx. %s sec \n' % str(eTime - sTime))
def bfs_solution(self, ): print( '[===============================BFS Solution===============================]' ) sTime = time.time() game = State(bfs_board, bfs_goal, 7, 7, count_pegs(bfs_board)) print(game) solution = self.bfs(game) eTime = time.time() self.print_solution(solution) print('Total Time: approx. %s sec \n' % str(eTime - sTime))
def observation(self, observation: State) -> State: new_t: float = (observation.t / self._max_t).astype(np.float32) return State(station_idx=observation.station_idx, station_locations=observation.station_locations, station_occs=observation.station_occs, station_maxes=observation.station_maxes, car_locs=observation.car_locs, car_dest_idx=observation.car_dest_idx, car_dest_loc=observation.car_dest_loc, t=new_t, query_loc=observation.query_loc, remaining_queries=observation.remaining_queries)
def observation(self, observation: State): return State(station_idx=observation.station_idx, station_locations=self._encode_loc( observation.station_locations), station_occs=observation.station_occs, station_maxes=observation.station_maxes, car_locs=self._encode_loc(observation.car_locs), car_dest_idx=observation.car_dest_idx, car_dest_loc=self._encode_loc(observation.car_dest_loc), t=observation.t, query_loc=self._encode_loc(observation.query_loc), remaining_queries=observation.remaining_queries)
def __init__(self, step_size=0.1, epsilon=0.1, symbol=0): self.step_size = step_size self.epsilon = epsilon self.previous_state = State() self.state = None self.symbol = symbol self.td_errors = [] self.estimator = Estimator() self.policy = make_epsilon_greedy_policy(self.estimator) self.action = (0, 0) self.actions = [] for i in range(BOARD_ROWS): for j in range(BOARD_COLS): self.actions.append((i, j))
def observation(self, observation: State) -> State: car_indices: np.ndarray = np_onehot(observation.car_dest_idx, max=self.n_stations-1) # np.ndarray[int32] : [cur_n_cars, 1, self.n_stations] car_indices= car_indices[:, 0, :] # [cur_n_cars, self.n_stations] station_indices: np.ndarray = np_onehot(observation.station_idx, max=self.n_stations-1) station_indices = station_indices[:, 0, :] # [n_stations, n_stations] should be eye(n_stations) return State( station_idx=station_indices, station_locations=observation.station_locations, station_maxes=observation.station_maxes, station_occs=observation.station_occs, car_dest_loc=observation.car_dest_loc, car_dest_idx=car_indices, car_locs=observation.car_locs, query_loc=observation.query_loc, t=observation.t, remaining_queries=observation.remaining_queries)
def __init__(self): observations = [get_random_state().data.reshape(-1) for _ in range(10000)] self.scaler = sklearn.preprocessing.StandardScaler() self.scaler.fit(observations) self.featurizer = sklearn.pipeline.FeatureUnion([ ("rbf1", RBFSampler(gamma=5.0, n_components=150)), ("rbf2", RBFSampler(gamma=2.0, n_components=150)), ("rbf3", RBFSampler(gamma=1.0, n_components=150)), ("rbf4", RBFSampler(gamma=0.5, n_components=150)) ], n_jobs=1) # self.featurizer.fit(observations) self.featurizer.fit(self.scaler.transform(observations)) self.models = dict() for i in range(BOARD_ROWS): for j in range(BOARD_COLS): model = SGDRegressor(learning_rate="constant") model.partial_fit([self.featurize_state(State())], [0]) self.models[(i,j)] = model
def run_exp(self): up_info = {} for i in range(self.global_arg["Nagent"]): self.agents.append( Agent(arg.init_agent_arg(self.global_arg, self.main_env.arg))) self.agents[i].state_now = State( [0 for _ in range(self.main_env.N)]) self.agents[i].inter_area.info = get_area_sample_distr( env=self.main_env, T=0, area=self.agents[i].inter_area, state=self.agents[i].state_now, sample_num=self.main_env.arg['ACT']['hqxx']['sample_n'], dfs_r=self.main_env.arg['ACT']['hqxx']['dfs_p']) self.agents[i].inter_area.info['start_t'] = 0 stage_num = self.global_arg['T'] // self.global_arg['Ts'] for k in range(self.global_arg["Nagent"]): csv_head = [ 'frame', 'SSMfi', 'SSM_f-req', 'proc_action', 'SSM_f_need', 'nkmax', 'nkmin', 'nkmid', 'nkavg', 'nk0.75', "nk0.25" ] # 'peakmax', 'peakmin', 'peakmid', 'peakavg', 'peak0.75', "peak0.25"] moniter.AppendToCsv(csv_head, all_config['result_csv_path'][k]) csv_head = ['frame'] \ + ["%s%d" % (using_brain[k].func_name, k) for k in range(self.global_arg['Nagent'])] \ + ["agent_avg"] \ + ['nkmax', 'nkmin', 'nkmid', 'nkavg', 'nk0.75', "nk0.25"] moniter.AppendToCsv(csv_head, all_config['result_csv_path'][-1]) for i in range(stage_num): Ti = i * self.global_arg['Ts'] + 1 logging.info("stage %3d , Ti:%3d" % (i, Ti)) up_info['nkinfo'] = self.main_env.getModelDistri(Ti) # up_info['nk_peak'] = self.main_env.getModelPeakDistri(Ti) # 运行一个Stage,Ti表示每个Stage的第一帧 self.run_stage(Ti, up_info)
def play(self, train=False, print_state=False): alternator = self.alternate() current_state = State() self.p1.set_state(current_state) self.p2.set_state(current_state) if print_state: current_state.print_state() while True: player = next(alternator) current_state, is_end = player.act() if print_state: current_state.print_state() if is_end: if train: player = next(alternator) player.backup(current_state, True) return current_state.winner self.p1.set_state(current_state) self.p2.set_state(current_state)
def bfs_multiproccess(file, env, no_change_cnt, truncate): print_time = time.time() if math.ceil(env.state_len / 8) >= STATE_LEN: print(f"Increase state len to {math.ceil(env.state_len/4)}") return None queues = [] for i in range(num_proc): queues.append(Manager().Queue()) hashes = dict() lis = [] lock = Lock() init_state = env.get_init_state() init_state.history = b'' init_state.move = b'N' lis.append(ctype_state(init_state)) # push the initial state found_state = None count = 0 max_shared_block = 100000 best_depth = 200 while len(lis) > 0: act_proc = min(max(1, len(lis)), num_proc) all_list = lis lis = [] chunk_count = 0 for pos_in_all_lis in range(0, len(all_list), max_shared_block): chunk_size = math.ceil( min(max_shared_block, len(all_list)) / num_proc) chunk_count += 1 #print_lock(lock, f"Makeing shared memory at {pos_in_all_lis}-{pos_in_all_lis+max_shared_block} ({chunk_count}/{math.ceil(len(all_list)/max_shared_block)})") # print_tuple_state("copy to shared mem:",x,lock) shared_array = Array(CState, all_list[pos_in_all_lis:pos_in_all_lis + max_shared_block], lock=False) #print_lock(lock, f"making input for processes. all_size:{len(all_list)} chunk_size:{chunk_size}") input = [(i, queues[i], env, shared_array, i * chunk_size, chunk_size, lock, no_change_cnt) for i in list(range(act_proc))] #print_lock(lock, f"Starting {len(input)} jobs") pool = [] for i in input: proc = Process(target=bfs_job, args=(i, )) pool.append(proc) for p in pool: p.start() while any(pool): proc_loc = -1 while True: for p in range(len(pool)): if pool[p] and not pool[p].is_alive(): proc_loc = p if proc_loc != -1: break time.sleep(0.01) pool[proc_loc].join() #print_lock(lock, f'Pulling from process {proc_loc}') for state in queues[proc_loc].get(): if state[5] > best_depth: continue s = int(state[1].decode(), 16) state_hash = (state[0] << env.state_len) + s #if state_hash in hashes and hashes[state_hash] <= state[5]: if hashes.get(state_hash, 200) <= state[5]: continue hashes[state_hash] = state[5] lis.append(state) if s == env.goal: found_state = State() found_state.history = state[2] best_depth = state[5] #print_lock(lock, f'{s} {found_state.history}') #return DummyState(list(found_state.history)) #print_lock(lock, f'Completed pulling from process {proc_loc}') pool[proc_loc].terminate() pool[proc_loc] = None count += 1 #print(f'truncating hash {len(hashes)}') if truncate: truncate_hash(hashes, count) t = "{0:.3f}".format(time.time() - print_time) print( f'\r{t} :=======> count:{count} tree:{len(lis)} hash:{len(hashes)} found:{"true" if found_state else "false"}', end='') #print(f'{t} :=======> count:{count} tree:{len(lis)} hash:{len(hashes)}') print("\r", end='') if not found_state: print(f'"{file} solution not found !!!') return None else: print(f'{file} found solution at level {best_depth}') return DummyState([chr(x).encode() for x in found_state.history])
def grow(self) -> bool: actions, log_probs = self.model(self.state, topk=self.beam_size) x = log_probs.view( -1, self.beam_size, self.beam_size) # batch_size x beam_size x beam_size x = self.log_probs[~self.finished].unsqueeze(-1) + x y = x.view(-1, self.beam_size * self.beam_size) # batch_size x (beam_size * beam_size) values, indices = y.topk(self.beam_size, dim=-1) self.log_probs[~self.finished] = values partial_trees = [] tokens_word = [] tokens_emb = [] batch_idx = [] cnt = 0 for i in range(self.batch_size): if self.finished[i]: continue for j in range(self.beam_size): idx = indices[cnt, j].item() m = cnt * self.beam_size + idx // self.beam_size n = idx % self.beam_size current_tree = self.state.partial_trees[m] action = actions[m][n] tag = self.tags[i][self.n_step] word = self.tokens_word[i][self.n_step] tree = AttachJuxtapose.execute( current_tree, action, self.n_step, tag, word, immutable=True, ) assert isinstance(tree, InternalParseNode) if self.n_step >= len(self.tokens_word[i]) - 1: self.finished[i] = True self.pred_trees[i][j] = tree else: partial_trees.append(tree) tokens_word.append(self.tokens_word[i]) tokens_emb.append(self.tokens_emb[i]) batch_idx.append(self.state.batch_idx[m]) cnt += 1 if tokens_emb == []: assert self.done() return True tokens_emb_t = torch.stack(tokens_emb) self.n_step += 1 self.state = State( partial_trees, # type: ignore tokens_word, tokens_emb_t, self.state.next_token_pos.new_full((len(partial_trees), ), fill_value=self.n_step), n_step=self.n_step, batch_idx=batch_idx, ) return False
def __init__( self, tokens_word: List[List[str]], tags: List[List[str]], tokens_emb: torch.Tensor, model: Parser, cfg: DictConfig, ) -> None: self.batch_size = len(tokens_word) self.beam_size = cfg.beam_size self.tokens_word = tokens_word self.tokens_emb = tokens_emb self.tags = tags self.model = model device = tokens_emb.device init_state = State( [None for _ in range(self.batch_size)], tokens_word, tokens_emb, next_token_pos=torch.zeros(self.batch_size, dtype=torch.int64, device=device), n_step=0, batch_idx=list(range(self.batch_size)), ) actions, log_probs = self.model(init_state, topk=self.beam_size) self.log_probs = log_probs partial_trees = [] tokens_word_expanded = [] tokens_emb_expanded = [] batch_idx = [] self.finished = torch.zeros(self.batch_size, dtype=torch.bool, device=device) self.pred_trees = [[None for _ in range(self.beam_size)] for _ in range(self.batch_size)] for i in range(self.batch_size): for j in range(self.beam_size): tag = self.tags[i][0] word = self.tokens_word[i][0] tree = AttachJuxtapose.execute(None, actions[i][j], 0, tag, word, immutable=False) assert isinstance(tree, InternalParseNode) if len(self.tokens_word[i]) > 1: partial_trees.append(tree) tokens_word_expanded.append(tokens_word[i]) tokens_emb_expanded.append(tokens_emb[i]) batch_idx.append(i) else: self.finished[i] = True self.pred_trees[i][j] = tree tokens_emb_expanded_t = torch.stack(tokens_emb_expanded) self.state = State( partial_trees, # type: ignore tokens_word_expanded, tokens_emb_expanded_t, next_token_pos=torch.ones(len(partial_trees), dtype=torch.int64, device=device), n_step=1, batch_idx=batch_idx, ) self.n_step = 1