def logging(logger: Stack, pos, state, action, timestep, reward, next_pos): x, y, z = pos[0], pos[1], pos[2] nx, ny, nz = next_pos[0], next_pos[1], next_pos[2] logger.push([ state.id, state.no, action.input_key, timestep, reward, str((x, y, z)) + '->' + str((nx, ny, nz)) ]) return
def change_base(number, base): digits = "0123456789ABCDEF" remStack = Stack() b = '' while (number != 0): rem = number % base remStack.push(rem) number = number // base while not remStack.is_empty(): b = b + digits[remStack.pop()] return b
def evaluate(eq): # Step 1: Convert to Postfix notation postfix_eq = convert_to_postfix(eq) #print "Postfix Notation:", postfix_eq # Evaluate the postfix expression operator = '' equation_list = postfix_eq.split() operand_stack = Stack.Stack() for token in equation_list: if is_operand(token): operand_stack.push(token) else: operand2 = operand_stack.pop() operand1 = operand_stack.pop() result = calculate(operand1, operand2, token) if result == None: sys.exit(1) else: operand_stack.push(result) return postfix_eq, operand_stack.pop()
def print_log(logger: Stack, n=20): print('') print('logs>') len_log = len(logger) if len_log < 2 * n: for log in logger.getTotal(): log_msg = gen_log_msg(log) print(log_msg) elif len_log >= 2 * n: for log in logger.getTotal(n): log_msg = gen_log_msg(log) print(log_msg) print('\n\n') print('A Few Moments Later...') print('\n\n') for log in logger.getTotal(-n): log_msg = gen_log_msg(log) print(log_msg) return
def __init__(self, id, agent, map_info, goal_position, num_states, num_actions, state_ids, action_ids, fall_damage, fall_min_height, MAX_timestep=256, MAX_stamina=200, unit_time=1.3, parachute_height=5, gravitial_acc=9.8, climb_angle=math.pi/3, gliding_down=10.0): self.id = id self.initial_agent = agent self.agent = Agent.from_agent(agent) #State(remained_distance, state_id, state_no, spend_time=0) self.initial_state = State(EuclideanDistance(self.initial_agent.get_current_position(), goal_position), state_id='field', state_no=len_stamina_area-1) self.state = State.from_state(self.initial_state) self.map_info = map_info self.goal_position = goal_position self.num_states = num_states self.num_actions = num_actions self.state_ids = state_ids self.action_ids = action_ids self._keys_ = list(self.action_ids.keys()) self.action_probs = softmax(np.ones(len(action_ids))) self.action_probs_vWall = softmax(np.ones(3)) #self.consume_stamina_info = consume_stamina_info self.fall_damage = fall_damage self.fall_min_height = fall_min_height self.MAX_timestep = MAX_timestep self.MAX_stamina = MAX_stamina self.unit_time = unit_time self.parachute_height = parachute_height self.g = np.array([0., -gravitial_acc, 0.]) self.climb_angle = climb_angle self.gliding_down = np.array([0., gliding_down, 0.]) self.dataset = [] self.death_cnt = 0 self.goal_cnt = 0 self.TL_cnt = 0 self.logs = Stack()
def test_stack_isEmpty(): test = Stack() assert (test.isEmpty == True) test.push(1) assert (test.isEmpty == False) test.pop() assert (test.isEmpty == True)
def check_balance(string): ''' Check to see if we have balanced parethesis. Parameters: ----------- string: str. The string contains the text with parenthesis that needs to be check balanced. Return: Boolean. Tell if parenthesis is matched or not ''' parethesis = Stack() par_dict = {')': '(', '}': '{', ']': '['} for s in string.strip(): if s in par_dict.values(): parethesis.push(s) elif s in par_dict.keys(): try: t = parethesis.pop() if t != par_dict[s]: return False except TypeError: return False if parethesis.size() != 0: return False return True
def save_log(logger: Stack, id, goal_position, state_id, cnt): t = time.strftime('%Y%m%d_%H-%M-%S', time.localtime(time.time())) gx = int(goal_position[0]) gz = int(goal_position[2]) g_pos = f'x{gx}z{gz}' path = f'logs/env{id}_gpos{g_pos}/' if not os.path.exists(path): os.makedirs(path) filename = path + f'{state_id}{cnt}_{t}.log' with open(f'{filename}', 'w') as f: for log in logger.getTotal(): log_msg = gen_log_msg(log) f.write(log_msg) return
def balancedparantheses(parantheses_string): s = Stack() balanced = True index = 0 while index < len(parantheses_string) and balanced: symbol = parantheses_string[index] if symbol == "(": s.push(symbol) elif s.isEmpty(): balanced = False else: s.pop() index += 1 if balanced and s.isEmpty(): return True else: return False
def balancedsymbols(symbol_string): s = Stack() balanced = True index = 0 lastopensymbol = '' while index < len(symbol_string) and balanced: symbol = symbol_string[index] lastopensymbol = s.peek() if symbol in "({[": s.push(symbol) elif s.isEmpty(): balanced = False else: balanced = symbolchecker(lastopensymbol, symbol) if balanced: s.pop() index += 1 if balanced and s.isEmpty(): return True else: return False
def baseconverter(number, base): s = Stack() number_string = '0123456789ABCDEF' while number > 0: remainder = number % base s.push(remainder) number = number // base base_builder = '' while not s.isEmpty(): base_builder = base_builder + str(number_string[s.pop()]) return base_builder
def stepDFS(timestep, state:State, action:Action): # 시작부터 goal인 건 data 만들기 전에 그러지 않다고 가정 if self.goal_cnt >= n: return if timestep > self.MAX_timestep: # time over # It works well even though the scene is empty print('Time over') scene['observations'].pop() state.id = 'death' # fixed step timeout scene['observations'].push(state.get_state_vector()) scene['rewards'].push(MINUS_INF) scene['timesteps'].push(timestep) if 'terminals' not in scene: scene['terminals'] = Stack() scene['terminals'].push(MINUS_INF) # save point if self.TL_cnt < 10: self.TL_cnt += 1 _save_scene_(scene, state.id, self.TL_cnt) logging(self.logs, self.agent.pos, state, action, timestep, MINUS_INF, self.agent.pos) save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='TL', cnt=self.TL_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(self.agent.pos, check=False) return if timestep > 1 and action.input_key == 'Wait' and (self.agent.stamina >= self.MAX_stamina or state.id == 'wall'): # 아무 득도 안 되는 행동 return scene['observations'].push(state.get_state_vector()) scene['actions'].push(action.get_action_vector()) action = action.get_action_vector() # step ns, r, d, agent = self.step(action) # npos is used to update agent and determine whether it can unfold parachute ns = cnv_state_vec2obj(ns) action = cnv_action_vec2obj(action) scene['rewards'].push(r) scene['timesteps'].push(timestep) if r < threshold: for K in scene.keys(): scene[K].pop() return else: check_visit(agent.pos) logging(self.logs, self.agent.pos, state, action, timestep, r, agent.pos) if d == True: # same with goal # savepoint self.goal_cnt += 1 if 'dones' not in scene: scene['dones'] = Stack() scene['dones'].push(r) _save_scene_(scene, 'goal', self.goal_cnt) print(f'env{self.id} found out {self.goal_cnt} path(s)!') save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='G', cnt=self.goal_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(agent.pos, check=False) return elif ns.id == 'death': # save point if 'terminals' not in scene: scene['terminals'] = Stack() scene['terminals'].push(MINUS_INF) if self.death_cnt < 50: self.death_cnt += 1 print(f'You Died - {self.id}') _save_scene_(scene, 'death', self.death_cnt) save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='D', cnt=self.death_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(agent.pos, check=False) return action_list = self.get_valid_action_list(ns.id, agent.stamina) np.random.shuffle(action_list) # PRINT_DEBUG if log_printing == True: print(f'state: {state.id}->{ns.id}') print(f'action: {action.input_key}') print(f'agent: {self.agent.pos}->{agent.pos}') print(f'valid key list: {action_list}') for next_action_key_input in action_list: passing_agent = Agent.from_agent(agent) if ns.id == 'air' and 'j' in next_action_key_input: if passing_agent.stamina <= 0 or self.canParachute(passing_agent.pos) == False: continue elif ns.id == 'wall' and state.id != 'wall': passing_agent.update_direction(action.velocity) velocity, stamina_consume, acting_time, given = get_next_action(ns.id, next_action_key_input, self.action_ids[next_action_key_input], prev_velocity=action.velocity) next_action = Action.from_action(action) next_action.action_update(self.action_ids[next_action_key_input], next_action_key_input, stamina_consume, acting_time, passing_agent.dir, velocity, given) self.agent.Update(passing_agent) self.agent.update_action(next_action) self.state.Update(ns) stepDFS(timestep+1, state=ns, action=next_action) self.agent.Update(agent) self.state.Update(state) if self.goal_cnt >= n: break for K in scene.keys(): scene[K].pop() delogging(self.logs) check_visit(self.agent.pos, check=False) return
def convert_to_postfix(eq): paMap = { '^': { 'p': 4, 'a': 'right' }, '*': { 'p': 3, 'a': 'left' }, '/': { 'p': 3, 'a': 'left' }, '+': { 'p': 2, 'a': 'left' }, '-': { 'p': 2, 'a': 'left' }, '(': { 'p': 1, 'a': 'left' } } postfix = '' i = 0 op_stack = Stack.Stack() while i < len(eq): token, i = get_next_token(eq, i) if token == ' ': continue elif is_operand(token): postfix += token + ' ' continue else: if token == '(': op_stack.push(token) elif token == ')': while op_stack.peek() != '(': postfix += op_stack.pop() + ' ' op_stack.pop() else: #print "Stack Empty:", op_stack.isEmpty #print "Stack Top:", op_stack.peek() while not op_stack.isEmpty and ( paMap[op_stack.peek()]['p'] > paMap[token]['p'] or (paMap[op_stack.peek()]['p'] == paMap[token]['p'] and paMap[token]['a'] == 'left') and op_stack.peek() != '('): postfix += op_stack.pop() + ' ' op_stack.push(token) # Expression parsing is complete, dump whatever is left in the stack into the postfix expression while not op_stack.isEmpty: postfix += op_stack.pop() + ' ' return postfix
def test_stack_push(): test = Stack() test.push(1) test.push(2) assert (len(test._contents) == 2) assert (test._contents[0] == 1)
def test_stack_empty_pop(): test = Stack() with pytest.raises(TypeError) as empty_pop: test.pop()
def test_stack_peek(): test = Stack() test.push(1) test.push(2) test.push(3) assert (test.peek() == 3)
def test_stack_pop(): test = Stack() test.push('a') test.push('b') assert (test.pop() == 'b') assert (len(test._contents) == 1)
class Environment: # agent : agent object # map_info : 2d array to represent current map's heights # goal_position : position object # num_states : number of states # num_actions : number of actions # state_ids : {'state_kind':id} # action_ids : {'key_input':id} # consume_stamina_info : stamina consume amount / fps with respect to action id(integer) # fall_damage : damage that reduces agents' HP when he fall. # fall_min_height : the height that can make damage # MAX_timestep : cutting size # MAX_stamina : maximum stamina value that agent can have # waiting_time : time until gain next action, unit_time=sec # parachute_height : minimum height that agent can unfold his parachute. def __init__(self, id, agent, map_info, goal_position, num_states, num_actions, state_ids, action_ids, fall_damage, fall_min_height, MAX_timestep=256, MAX_stamina=200, unit_time=1.3, parachute_height=5, gravitial_acc=9.8, climb_angle=math.pi/3, gliding_down=10.0): self.id = id self.initial_agent = agent self.agent = Agent.from_agent(agent) #State(remained_distance, state_id, state_no, spend_time=0) self.initial_state = State(EuclideanDistance(self.initial_agent.get_current_position(), goal_position), state_id='field', state_no=len_stamina_area-1) self.state = State.from_state(self.initial_state) self.map_info = map_info self.goal_position = goal_position self.num_states = num_states self.num_actions = num_actions self.state_ids = state_ids self.action_ids = action_ids self._keys_ = list(self.action_ids.keys()) self.action_probs = softmax(np.ones(len(action_ids))) self.action_probs_vWall = softmax(np.ones(3)) #self.consume_stamina_info = consume_stamina_info self.fall_damage = fall_damage self.fall_min_height = fall_min_height self.MAX_timestep = MAX_timestep self.MAX_stamina = MAX_stamina self.unit_time = unit_time self.parachute_height = parachute_height self.g = np.array([0., -gravitial_acc, 0.]) self.climb_angle = climb_angle self.gliding_down = np.array([0., gliding_down, 0.]) self.dataset = [] self.death_cnt = 0 self.goal_cnt = 0 self.TL_cnt = 0 self.logs = Stack() def convert_agent(self, agent): self.initial_agent = agent self.agent = Agent.from_agent(agent) def convert_map_info(self, map_info, goal_position): self.map_info = map_info self.goal_position = goal_position def isGoal(self, pos): d = EuclideanDistance(self.goal_position, pos) return d <= math.sqrt(3)/2 # sphere in a cubic with side=1 def inBound(self, x, z): return not (x < 0 or x >= len(self.map_info) or z < 0 or z >= len(self.map_info[0])) def isWall(self, x1, z1, x2, z2): x1 = int(x1) z1 = int(z1) x2 = int(x2) z2 = int(z2) tangent = (self.map_info[x1, z1] - self.map_info[x2, z2]) e = EuclideanDistance(np.array([x1, 0, z1]), np.array([x2, 0, z2])) if e == 0 and self.map_info[x1, z1] != self.map_info[x2, z2]: return True angle = np.arctan(abs(tangent)) return angle >= self.climb_angle def canParachute(self, pos): x, y, z = pos[0], pos[1], pos[2] return y - self.map_info[int(x), int(z)] >= self.parachute_height def calc_fall_damage(self, y, ny): fall_height = y - ny - self.fall_min_height return 0 if fall_height <= 0 else fall_height * self.fall_damage def cal_next_pos(self, state, action): agent = Agent.from_agent(self.agent) # copy current agent object next_pos = agent.get_current_position() next_state_id = state.id if state.id == 'death' or state.id == 'goal' or ((state.id == 'field' or state.id == 'wall') and action.input_key == 'Wait'): return state.id, agent elif state.id == 'air' and 'j' in action.input_key: # 현재 air 상태인데 입력된 action에 점프 키가 있다 next_state_id = 'parachute' return next_state_id, agent # parachute mode on elif state.id == 'parachute' and 'j' in action.input_key: next_state_id = 'air' return next_state_id, agent elif state.id == 'field' and 'j' in action.input_key: next_state_id = 'air' # 60 frame으로 나눠서 충돌 테스트 t = action.acting_time / 60 st = action.stamina_consume / 60 next_stamina = agent.stamina prev_pos = agent.pos # DO NOT make be updated v_xz = np.array([action.velocity[0], 0., action.velocity[2]]) # distribute by x,z v_y = np.array([0., action.velocity[1], 0.]) # distribute by y def goFront(pos, dir, cof): yangshim_cnt = 0 nx, nz = pos[0], pos[2] while yangshim_cnt < 1000: nx += 0.05 * dir[0] * cof nz += 0.05 * dir[2] * cof yangshim_cnt += 1 if int(pos[0]) != int(nx) or int(pos[2]) != int(nz): break return np.array([nx, pos[1], nz]) def isNextField(pos:np.ndarray, dir:np.ndarray, key_input='W') -> bool: cof = 1 if 'W' in key_input else (-1 if 'S' in key_input else 0) next_pos = goFront(pos, dir, cof) if not self.inBound(next_pos[0], next_pos[2]): return False ny = self.map_info[int(pos[0]), int(pos[2])] c = EuclideanDistance(np.array([int(next_pos[0]), ny, int(next_pos[2])]), pos) a = EuclideanDistance(np.array([int(next_pos[0]), 0 , int(next_pos[2])]), np.array([pos[0], 0, pos[2]])) angle = math.pi / 2 if a == 0 else math.acos(a / c) return angle < self.climb_angle if next_state_id == 'wall': # agent의 기저 벡터 space의 y-z평면을 기반으로 하지 않고, # 기존처럼 하되, 방향만 y-axis 방향으로 돌려둔 상태라고 가정. # 옆으로는 움직일 수 없음 for _ in range(60): next_pos += v_y * t if next_stamina > 0 and st >= 0: next_stamina -= st if next_stamina <= 0: # 오르다가 air상태로 변경 next_stamina = 0 next_state_id = 'air' agent.pos = next_pos agent.stamina = next_stamina return next_state_id, agent y_hat = next_pos[1] if 'W' in action.input_key and y_hat > self.map_info[int(next_pos[0]), int(next_pos[2])]: a = abs(y_hat - prev_pos[1]) _next_pos_ = goFront(next_pos, agent.dir, 1) b = abs(_next_pos_[1] - y_hat) next_pos = interpolate(next_pos, _next_pos_, a, b) elif 'S' in action.input_key: _next_pos_ = goFront(next_pos, agent.dir, -1) if _next_pos_[1] < self.map_info[int(_next_pos_[0]), int(_next_pos_[2])]: a = abs(y_hat - prev_pos[1]) b = abs(_next_pos_[1] - y_hat) next_pos = interpolate(next_pos, _next_pos_, a, b) if isNextField(next_pos, agent.dir, action.input_key) == True: next_state_id = 'field' # if wall end else: for _ in range(60): next_pos = next_pos + (v_xz + v_y)*t if next_state_id == 'air': v_y += self.g * t x, y, z = next_pos[0], next_pos[1], next_pos[2] if not self.inBound(x, z): next_state_id = 'death' break next_stamina -= st if next_stamina < 1: next_stamina = 0 elif next_stamina >= self.MAX_stamina: next_stamina = self.MAX_stamina if self.isWall(prev_pos[0], prev_pos[2], x, z) == True: next_state_id = 'wall' break if y <= self.map_info[int(x), int(z)]: next_pos[1] = self.map_info[int(x), int(z)] if next_state_id == 'air': damage = self.calc_fall_damage(y=prev_pos[1], ny=next_pos[1]) agent.HP -= damage if agent.HP < 1: agent.HP = 0 next_state_id = 'death' else: next_state_id = 'field' break elif next_state_id == 'parachute': next_state_id = 'field' break elif next_state_id == 'field': #걸어 내려가는 상황 """if isNextField(next_pos, agent.dir, action.input_key) == True: next_state_id = 'field' else:""" next_state_id = 'air' #break #prev_pos = np.copy(next_pos) agent.stamina = int(next_stamina) agent.pos = next_pos agent.action = action # ref return next_state_id, agent def state_transition(self, state, action): if state.id == 'death' or state.id == 'goal': return state, Agent.from_agent(self.agent) next_state_id, agent = self.cal_next_pos(state, action) next_pos = agent.pos remained_distance = EuclideanDistance(next_pos, self.goal_position) next_state_no = state.no if next_state_id != 'death' and next_state_id != 'goal': for i in range(len_stamina_area): if agent.stamina / self.MAX_stamina * 100.0 <= stamina_area[i]: next_state_no = state_maps[next_state_id] + i break if self.isGoal(next_pos) == True: next_state_id = 'goal' next_state = State(remained_distance, next_state_id, next_state_no, spend_time=state.spend_time+action.acting_time) return next_state, agent def get_random_action(self): key = np.random.randint(self.num_actions) return self._keys_[key], self.action_ids[self._keys_[key]] def update_softmax_prob(self, idx, kind='general'): if kind == 'general': self.action_probs[idx] *= 1.2 self.action_probs = softmax(self.action_probs) else: self.action_probs_vWall[idx] *= 1.2 self.action_probs_vWall = softmax(self.action_probs_vWall) def get_softmax_action_vWall(self, before_key_input='W'): _keys = ['W', 'S', 'Wj'] _idx = {'W':0, 'S':1, 'Wj':2} r = np.random.random() k = 0 for i in range(len(self.action_probs_vWall)): key_input = _keys[i] key_id = self.action_ids[key_input] if self.action_probs_vWall[i] + k > r and r >= k: self.update_softmax_prob(idx=_idx[key_input], kind='wall') return key_input, key_id self.update_softmax_prob(idx=_idx[before_key_input], kind='wall') return before_key_input, self.action_ids[before_key_input] def get_softmax_action(self, before_key_input, excepts=[], only=[]): if len(only) > 0: p = softmax(np.ones(len(only))) r = np.random.random() k = 0 for i in range(len(only)): key_input = only[i] key_id = self.action_ids[key_input] if p[i] + k > r and r >= k: self.update_softmax_prob(idx=key_id) return key_input, key_id k += p[i] key_input = only[-1] key_id = self.action_ids[key_input] self.update_softmax_prob(idx=key_id) return key_input, key_id only = list(set(self._keys_) - set(excepts)) key_input, key_id = before_key_input, self.action_ids[before_key_input] r = np.random.random() k = 0 for i in range(len(only)): key_input = only[i] key_id = self.action_ids[key_input] if self.action_probs[key_id] + k > r and r >= k: self.update_softmax_prob(idx=key_id) return key_input, key_id k += self.action_probs[key_id] key_input = only[-1] key_id = self.action_ids[key_input] self.update_softmax_prob(idx=key_id) return key_input, key_id def reward(self, state, action): next_state, agent = self.state_transition(state, action) deltaDistance = next_state.remained_distance - state.remained_distance return -deltaDistance, next_state, agent # action is ndarray vector def step(self, action): action = cnv_action_vec2obj(action) reward, state, agent = self.reward(self.state, action) done = (state.id == 'goal') return state.get_state_vector(), reward, done, agent def get_valid_action_list(self, state_id, stamina): if state_id == 'field': if stamina <= 0: return list(set(self.action_ids)- set([key for key in self.action_ids if 's' in key]) - set([key for key in self.action_ids if 'j' in key])) elif stamina >= self.MAX_stamina: return list(set(list(self.action_ids.keys())) - set(['Wait'])) else: return list(self.action_ids.keys()) elif state_id == 'air' and stamina > 0: return ['Wait', 'j'] elif state_id == 'wall' and stamina > 0: return ['Wait', 'W', 'S', 'Wj'] elif state_id == 'parachute' and stamina > 0: return list(set(self.action_ids)- set([key for key in self.action_ids if 's' in key])) return ['Wait'] def make_scenarios(self, n:int=10, threshold=MINUS_INF, log_printing=False): #tle_cnt = 0 #scenario = [] #task_no = 1 #death_cnt = 0 print(f'{self.id} - initialized to make scenarios') print(f'Max time step={self.MAX_timestep}') print(f'objective: find {n} paths') # while complete < n: # initialize scene = dict() self.reset() # state: 현재 보는 local object 대상 # self.state: 현상을 유지해야 하는 state to calculate several things state = State.from_state(self.initial_state) action = Action(action_id=self.action_ids['Wait'], velocity=np.array([0.,0.,0.])) self.agent.action.Update(action) scene['observations'] = Stack() scene['actions'] = Stack() scene['rewards'] = Stack() scene['timesteps'] = Stack() #time_out = False #next_key_input, next_action_id = 'Wait', 0 def _save_scene_(scene, state_id, postfix): time_t = time.strftime('%Y%m%d_%H-%M-%S', time.localtime(time.time())) path = f'pkl/scenario/{state_id}/env_{self.id}/' if not os.path.exists(path): os.makedirs(path) scene_filename = path + f'{time_t}_{postfix}.scn' save_scene = {} for K, V in scene.items(): # K: observations, actions, rewards, timesteps save_scene[K] = np.array(V.getTotal()) # list of numpy array scene[K].pop() # 마지막 step을 roll-back with open(scene_filename, 'wb') as f: pickle.dump(save_scene, f) return visit = np.zeros((25, 101, 101), dtype=bool) def isVisited(pos) -> bool: x, y, z = int(pos[0]), int(pos[1]), int(pos[2]) if y > 20: return True return visit[y, x, z] def check_visit(pos, check=True) -> None: x, y, z = int(pos[0]), int(pos[1]), int(pos[2]) visit[y, x, z] = check return def stepDFS(timestep, state:State, action:Action): # 시작부터 goal인 건 data 만들기 전에 그러지 않다고 가정 if self.goal_cnt >= n: return if timestep > self.MAX_timestep: # time over # It works well even though the scene is empty print('Time over') scene['observations'].pop() state.id = 'death' # fixed step timeout scene['observations'].push(state.get_state_vector()) scene['rewards'].push(MINUS_INF) scene['timesteps'].push(timestep) if 'terminals' not in scene: scene['terminals'] = Stack() scene['terminals'].push(MINUS_INF) # save point if self.TL_cnt < 10: self.TL_cnt += 1 _save_scene_(scene, state.id, self.TL_cnt) logging(self.logs, self.agent.pos, state, action, timestep, MINUS_INF, self.agent.pos) save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='TL', cnt=self.TL_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(self.agent.pos, check=False) return if timestep > 1 and action.input_key == 'Wait' and (self.agent.stamina >= self.MAX_stamina or state.id == 'wall'): # 아무 득도 안 되는 행동 return scene['observations'].push(state.get_state_vector()) scene['actions'].push(action.get_action_vector()) action = action.get_action_vector() # step ns, r, d, agent = self.step(action) # npos is used to update agent and determine whether it can unfold parachute ns = cnv_state_vec2obj(ns) action = cnv_action_vec2obj(action) scene['rewards'].push(r) scene['timesteps'].push(timestep) if r < threshold: for K in scene.keys(): scene[K].pop() return else: check_visit(agent.pos) logging(self.logs, self.agent.pos, state, action, timestep, r, agent.pos) if d == True: # same with goal # savepoint self.goal_cnt += 1 if 'dones' not in scene: scene['dones'] = Stack() scene['dones'].push(r) _save_scene_(scene, 'goal', self.goal_cnt) print(f'env{self.id} found out {self.goal_cnt} path(s)!') save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='G', cnt=self.goal_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(agent.pos, check=False) return elif ns.id == 'death': # save point if 'terminals' not in scene: scene['terminals'] = Stack() scene['terminals'].push(MINUS_INF) if self.death_cnt < 50: self.death_cnt += 1 print(f'You Died - {self.id}') _save_scene_(scene, 'death', self.death_cnt) save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='D', cnt=self.death_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(agent.pos, check=False) return action_list = self.get_valid_action_list(ns.id, agent.stamina) np.random.shuffle(action_list) # PRINT_DEBUG if log_printing == True: print(f'state: {state.id}->{ns.id}') print(f'action: {action.input_key}') print(f'agent: {self.agent.pos}->{agent.pos}') print(f'valid key list: {action_list}') for next_action_key_input in action_list: passing_agent = Agent.from_agent(agent) if ns.id == 'air' and 'j' in next_action_key_input: if passing_agent.stamina <= 0 or self.canParachute(passing_agent.pos) == False: continue elif ns.id == 'wall' and state.id != 'wall': passing_agent.update_direction(action.velocity) velocity, stamina_consume, acting_time, given = get_next_action(ns.id, next_action_key_input, self.action_ids[next_action_key_input], prev_velocity=action.velocity) next_action = Action.from_action(action) next_action.action_update(self.action_ids[next_action_key_input], next_action_key_input, stamina_consume, acting_time, passing_agent.dir, velocity, given) self.agent.Update(passing_agent) self.agent.update_action(next_action) self.state.Update(ns) stepDFS(timestep+1, state=ns, action=next_action) self.agent.Update(agent) self.state.Update(state) if self.goal_cnt >= n: break for K in scene.keys(): scene[K].pop() delogging(self.logs) check_visit(self.agent.pos, check=False) return stepDFS(timestep=1, state=state, action=action) """ for t in range(self.MAX_timestep): # PRINT_DEBUG if log_printing == True: print(f'before: s_id={self.state.id}, pos={self.agent.pos}') # step action = action.get_action_vector() ns, r, done, next_agent = self.step(action) action = cnv_action_vec2obj(action) next_pos = next_agent.get_current_pos() logging(self.logs, self.agent.pos, self.state, action, timestep=t+1, reward=r, next_pos=next_pos) # PRINT_DEBUG if log_printing == True: print(f'after : s_id={ns.id}, pos={next_pos}, action={action.input_key}') print('='*50) scene['rewards'].push(r) scene['timesteps'].push(ns.spend_time) if done == True: if 'dones' not in scene: scene['dones'] = Stack() scene['dones'].push(r) elif t == self.MAX_timestep - 1: tle_cnt += 1 time_out = True print(f'Time over. - {task_no}') #print('failed:agent({}) / goal({})'.format(self.agent.get_current_position(), self.goal_position)) '''if 'terminals' not in scene: scene['terminals'] = [] scene['terminals'].append(1)''' break # calculate next situation state = ns # ok if ns.id == 'death' or ns.id == 'goal': #scenario.append(scene) if ns.id == 'death': death_cnt += 1 #print(f'You Died. - {task_no}') scene['terminals'] = [1] scene['rewards'][-1] = r = -999999 break #scenario.append(scene) # 다음 action을 randomly generate하고, 기초적인 parameter를 초기화한다. next_key_input, next_action_id = self.get_softmax_action(before_key_input=next_key_input) stamina_consume = base_stamina_consume # 회복수치, -4.8 acting_time = base_acting_time # 1.3sec # 경우에 따라 parameter 값을 조정한다. velocity = None given = 'None' if ns.id == 'air': stamina_consume = 0 # no recover, no consume if self.canParachute(next_pos) == True: next_key_input, next_action_id = self.get_softmax_action(before_key_input=next_key_input, only=['Wait', 'j']) else: next_key_input, next_action_id = 'Wait', self.action_ids['Wait'] self.update_softmax_prob(idx=next_action_id) elif ns.id == 'field': if 's' in next_key_input: # sprint stamina_consume = 20 acting_time = 1 if 'j' in next_key_input: stamina_consume = 1 if stamina_consume == base_stamina_consume else stamina_consume + 1 elif ns.id == 'wall': stamina_consume = 10 if self.state.id != 'wall': self.agent.update_direction(action.velocity) # x-z 방향 전환 # Only can be W, S, and Wj next_key_input, next_action_id = self.get_softmax_action_vWall() given = 'wall' if 'W' in next_key_input: velocity = np.array([0., 1., 0.]) else: # 'S' velocity = np.array([0., -1., 0.]) if 'j' in next_key_input: stamina_consume = 25 velocity *= 2 elif ns.id == 'parachute': next_key_input, next_action_id = self.get_softmax_action(next_key_input, only=['W', 'A', 'S', 'D', 'WA', 'WD', 'SA', 'SD', 'j']) stamina_consume = 2 given = 'parachute' # Note: 각 구체적인 값은 parameter table 참조 self.state.Update(ns) self.agent.update_position(next_pos) # return value of action_update is newly constructed. # So, it is okay. action.action_update(next_action_id, next_key_input, stamina_consume, acting_time, self.agent.dir, velocity=velocity, given=given) self.agent.action.Update(action) scene['observations'].append(self.state.get_state_vector()) scene['actions'].append(action.get_action_vector()) # steps ended. if log_printing == True: print_log() for key in scene.keys(): if key != 'observations' and key != 'actions': scene[key] = np.array(scene[key]) # make {key:np.array(), ...} #scenario.append(scene) # save scene at each file instead of memorizeing scenes in scenario array if not time_out and (ns.id == 'goal' or death_cnt <= 95): time_t = time.strftime('%Y%m%d_%H-%M-%S', time.localtime(time.time())) path = f'pkl/scenario/{ns.id}/env_{self.id}/' if not os.path.exists(path): os.makedirs(path) scene_filename = path + f'{time_t}.scn' for scene_key in scene: scene[scene_key] = np.array(scene[scene_key]) with open(scene_filename, 'wb') as f: pickle.dump(scene, f) if ns.id == 'goal': complete += 1 print(f'complete - {complete} / {n}') save_log(self.logs, self.id, self.goal_position, task_no) if log_printing == True: print_log() """ # self.dataset.append(scenario) # Probably unused print(f'env{self.id} succeeded with {self.goal_cnt}.') self.logs.clear() for K in scene.keys(): scene[K].clear() return self.goal_cnt # function make_scenario end. def get_dataset(self): return self.dataset def reset(self, dataset_initialize=False): # print('action_id["Wait"] =', self.action_ids['Wait']) self.action_probs = softmax(np.ones(len(self.action_ids))) self.action_probs_vWall = softmax(np.ones(3)) self.agent.Update(self.initial_agent) self.state.Update(self.initial_state) self.logs.clear() self.goal_cnt = self.death_cnt = self.TL_cnt = 0 if dataset_initialize == True: self.dataset = [] return self.state.get_state_vector()
class StackTest(unittest.TestCase): def setUp(self): self.s = Stack() def tearDown(self): while not self.s.isEmpty(): self.s.pop() def test_push(self): value = 'A' self.s.push(value) self.assertEqual(1, self.s.size()) print self.s.get() x = self.s.pop() print self.s.get() self.assertEqual(value, x) self.assertEqual(0, self.s.size()) def test_peek(self): self.s.push('A') self.s.push('B') x = self.s.peek() self.assertEqual('B', x) self.assertEqual(2, self.s.size()) def test_empty(self): self.assertEqual(self.s.peek(), None)
def make_scenarios(self, n:int=10, threshold=MINUS_INF, log_printing=False): #tle_cnt = 0 #scenario = [] #task_no = 1 #death_cnt = 0 print(f'{self.id} - initialized to make scenarios') print(f'Max time step={self.MAX_timestep}') print(f'objective: find {n} paths') # while complete < n: # initialize scene = dict() self.reset() # state: 현재 보는 local object 대상 # self.state: 현상을 유지해야 하는 state to calculate several things state = State.from_state(self.initial_state) action = Action(action_id=self.action_ids['Wait'], velocity=np.array([0.,0.,0.])) self.agent.action.Update(action) scene['observations'] = Stack() scene['actions'] = Stack() scene['rewards'] = Stack() scene['timesteps'] = Stack() #time_out = False #next_key_input, next_action_id = 'Wait', 0 def _save_scene_(scene, state_id, postfix): time_t = time.strftime('%Y%m%d_%H-%M-%S', time.localtime(time.time())) path = f'pkl/scenario/{state_id}/env_{self.id}/' if not os.path.exists(path): os.makedirs(path) scene_filename = path + f'{time_t}_{postfix}.scn' save_scene = {} for K, V in scene.items(): # K: observations, actions, rewards, timesteps save_scene[K] = np.array(V.getTotal()) # list of numpy array scene[K].pop() # 마지막 step을 roll-back with open(scene_filename, 'wb') as f: pickle.dump(save_scene, f) return visit = np.zeros((25, 101, 101), dtype=bool) def isVisited(pos) -> bool: x, y, z = int(pos[0]), int(pos[1]), int(pos[2]) if y > 20: return True return visit[y, x, z] def check_visit(pos, check=True) -> None: x, y, z = int(pos[0]), int(pos[1]), int(pos[2]) visit[y, x, z] = check return def stepDFS(timestep, state:State, action:Action): # 시작부터 goal인 건 data 만들기 전에 그러지 않다고 가정 if self.goal_cnt >= n: return if timestep > self.MAX_timestep: # time over # It works well even though the scene is empty print('Time over') scene['observations'].pop() state.id = 'death' # fixed step timeout scene['observations'].push(state.get_state_vector()) scene['rewards'].push(MINUS_INF) scene['timesteps'].push(timestep) if 'terminals' not in scene: scene['terminals'] = Stack() scene['terminals'].push(MINUS_INF) # save point if self.TL_cnt < 10: self.TL_cnt += 1 _save_scene_(scene, state.id, self.TL_cnt) logging(self.logs, self.agent.pos, state, action, timestep, MINUS_INF, self.agent.pos) save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='TL', cnt=self.TL_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(self.agent.pos, check=False) return if timestep > 1 and action.input_key == 'Wait' and (self.agent.stamina >= self.MAX_stamina or state.id == 'wall'): # 아무 득도 안 되는 행동 return scene['observations'].push(state.get_state_vector()) scene['actions'].push(action.get_action_vector()) action = action.get_action_vector() # step ns, r, d, agent = self.step(action) # npos is used to update agent and determine whether it can unfold parachute ns = cnv_state_vec2obj(ns) action = cnv_action_vec2obj(action) scene['rewards'].push(r) scene['timesteps'].push(timestep) if r < threshold: for K in scene.keys(): scene[K].pop() return else: check_visit(agent.pos) logging(self.logs, self.agent.pos, state, action, timestep, r, agent.pos) if d == True: # same with goal # savepoint self.goal_cnt += 1 if 'dones' not in scene: scene['dones'] = Stack() scene['dones'].push(r) _save_scene_(scene, 'goal', self.goal_cnt) print(f'env{self.id} found out {self.goal_cnt} path(s)!') save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='G', cnt=self.goal_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(agent.pos, check=False) return elif ns.id == 'death': # save point if 'terminals' not in scene: scene['terminals'] = Stack() scene['terminals'].push(MINUS_INF) if self.death_cnt < 50: self.death_cnt += 1 print(f'You Died - {self.id}') _save_scene_(scene, 'death', self.death_cnt) save_log(logger=self.logs, id=self.id, goal_position=self.goal_position, state_id='D', cnt=self.death_cnt) delogging(self.logs) for K in scene.keys(): scene[K].pop() check_visit(agent.pos, check=False) return action_list = self.get_valid_action_list(ns.id, agent.stamina) np.random.shuffle(action_list) # PRINT_DEBUG if log_printing == True: print(f'state: {state.id}->{ns.id}') print(f'action: {action.input_key}') print(f'agent: {self.agent.pos}->{agent.pos}') print(f'valid key list: {action_list}') for next_action_key_input in action_list: passing_agent = Agent.from_agent(agent) if ns.id == 'air' and 'j' in next_action_key_input: if passing_agent.stamina <= 0 or self.canParachute(passing_agent.pos) == False: continue elif ns.id == 'wall' and state.id != 'wall': passing_agent.update_direction(action.velocity) velocity, stamina_consume, acting_time, given = get_next_action(ns.id, next_action_key_input, self.action_ids[next_action_key_input], prev_velocity=action.velocity) next_action = Action.from_action(action) next_action.action_update(self.action_ids[next_action_key_input], next_action_key_input, stamina_consume, acting_time, passing_agent.dir, velocity, given) self.agent.Update(passing_agent) self.agent.update_action(next_action) self.state.Update(ns) stepDFS(timestep+1, state=ns, action=next_action) self.agent.Update(agent) self.state.Update(state) if self.goal_cnt >= n: break for K in scene.keys(): scene[K].pop() delogging(self.logs) check_visit(self.agent.pos, check=False) return stepDFS(timestep=1, state=state, action=action) """ for t in range(self.MAX_timestep): # PRINT_DEBUG if log_printing == True: print(f'before: s_id={self.state.id}, pos={self.agent.pos}') # step action = action.get_action_vector() ns, r, done, next_agent = self.step(action) action = cnv_action_vec2obj(action) next_pos = next_agent.get_current_pos() logging(self.logs, self.agent.pos, self.state, action, timestep=t+1, reward=r, next_pos=next_pos) # PRINT_DEBUG if log_printing == True: print(f'after : s_id={ns.id}, pos={next_pos}, action={action.input_key}') print('='*50) scene['rewards'].push(r) scene['timesteps'].push(ns.spend_time) if done == True: if 'dones' not in scene: scene['dones'] = Stack() scene['dones'].push(r) elif t == self.MAX_timestep - 1: tle_cnt += 1 time_out = True print(f'Time over. - {task_no}') #print('failed:agent({}) / goal({})'.format(self.agent.get_current_position(), self.goal_position)) '''if 'terminals' not in scene: scene['terminals'] = [] scene['terminals'].append(1)''' break # calculate next situation state = ns # ok if ns.id == 'death' or ns.id == 'goal': #scenario.append(scene) if ns.id == 'death': death_cnt += 1 #print(f'You Died. - {task_no}') scene['terminals'] = [1] scene['rewards'][-1] = r = -999999 break #scenario.append(scene) # 다음 action을 randomly generate하고, 기초적인 parameter를 초기화한다. next_key_input, next_action_id = self.get_softmax_action(before_key_input=next_key_input) stamina_consume = base_stamina_consume # 회복수치, -4.8 acting_time = base_acting_time # 1.3sec # 경우에 따라 parameter 값을 조정한다. velocity = None given = 'None' if ns.id == 'air': stamina_consume = 0 # no recover, no consume if self.canParachute(next_pos) == True: next_key_input, next_action_id = self.get_softmax_action(before_key_input=next_key_input, only=['Wait', 'j']) else: next_key_input, next_action_id = 'Wait', self.action_ids['Wait'] self.update_softmax_prob(idx=next_action_id) elif ns.id == 'field': if 's' in next_key_input: # sprint stamina_consume = 20 acting_time = 1 if 'j' in next_key_input: stamina_consume = 1 if stamina_consume == base_stamina_consume else stamina_consume + 1 elif ns.id == 'wall': stamina_consume = 10 if self.state.id != 'wall': self.agent.update_direction(action.velocity) # x-z 방향 전환 # Only can be W, S, and Wj next_key_input, next_action_id = self.get_softmax_action_vWall() given = 'wall' if 'W' in next_key_input: velocity = np.array([0., 1., 0.]) else: # 'S' velocity = np.array([0., -1., 0.]) if 'j' in next_key_input: stamina_consume = 25 velocity *= 2 elif ns.id == 'parachute': next_key_input, next_action_id = self.get_softmax_action(next_key_input, only=['W', 'A', 'S', 'D', 'WA', 'WD', 'SA', 'SD', 'j']) stamina_consume = 2 given = 'parachute' # Note: 각 구체적인 값은 parameter table 참조 self.state.Update(ns) self.agent.update_position(next_pos) # return value of action_update is newly constructed. # So, it is okay. action.action_update(next_action_id, next_key_input, stamina_consume, acting_time, self.agent.dir, velocity=velocity, given=given) self.agent.action.Update(action) scene['observations'].append(self.state.get_state_vector()) scene['actions'].append(action.get_action_vector()) # steps ended. if log_printing == True: print_log() for key in scene.keys(): if key != 'observations' and key != 'actions': scene[key] = np.array(scene[key]) # make {key:np.array(), ...} #scenario.append(scene) # save scene at each file instead of memorizeing scenes in scenario array if not time_out and (ns.id == 'goal' or death_cnt <= 95): time_t = time.strftime('%Y%m%d_%H-%M-%S', time.localtime(time.time())) path = f'pkl/scenario/{ns.id}/env_{self.id}/' if not os.path.exists(path): os.makedirs(path) scene_filename = path + f'{time_t}.scn' for scene_key in scene: scene[scene_key] = np.array(scene[scene_key]) with open(scene_filename, 'wb') as f: pickle.dump(scene, f) if ns.id == 'goal': complete += 1 print(f'complete - {complete} / {n}') save_log(self.logs, self.id, self.goal_position, task_no) if log_printing == True: print_log() """ # self.dataset.append(scenario) # Probably unused print(f'env{self.id} succeeded with {self.goal_cnt}.') self.logs.clear() for K in scene.keys(): scene[K].clear() return self.goal_cnt
def setUp(self): self.s = Stack()
def delogging(logger: Stack): logger.pop() return