def main(): parser = argparse.ArgumentParser(description='FloatReset') parser.add_argument('--gamma', type=float, default=0.99) config = parser.parse_args() pos_space = one_to_one.RangeSpace(5) state_space = one_to_one.NamedTupleSpace(old=pos_space, new=pos_space) state_space = one_to_one.SubSpace(state_space, is_adjacent) actions = 'float', 'reset_' action_space = one_to_one.DomainSpace(actions) obs_space = one_to_one.RangeSpace(2) print( """# Float/Reset Environment; # @inproceedings{littman_predictive_2002, # title = {Predictive representations of state}, # booktitle = {Advances in neural information processing systems}, # author = {Littman, Michael L. and Sutton, Richard S.}, # year = {2002}, # pages = {1555--1561}, # } # State-space (5) : current position. # Action-space (2) : `float` and `reset_`. # Observation-space (2) : 0 and 1.""" ) print() print(f'# This specific file was generated with parameters:') print(f'# {config}') print() print(f'discount: {config.gamma}') print('values: reward') print(f'states: {" ".join(sfmt(s) for s in state_space.elems())}') print(f'actions: {" ".join(afmt(a) for a in action_space.elems())}') print(f'observations: {len(obs_space)}') # START print() s = state_space.elem(0) s.value = s.value._replace(old=0, new=0) print(f'start: {sfmt(s)}') # TRANSITIONS print() a = action_space.elem(value='reset_') for s in state_space.elems(): s1 = state_space.elem(value=s.value._replace(old=s.value.new, new=0)) print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0') a = action_space.elem(value='float') for s in state_space.elems(): try: s1.value = s.value._replace(old=s.value.new, new=s.value.new - 1) except ValueError: s1 = state_space.elem(value=s.value._replace(old=s.value.new)) print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 0.5') try: s1.value = s.value._replace(old=s.value.new, new=s.value.new + 1) except ValueError: s1 = state_space.elem(value=s.value._replace(old=s.value.new)) print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 0.5') # OBSERVATIONS print() print(f'O: *: * 1.0 0.0') a = action_space.elem(value='reset_') s1 = state_space.elem(0) s1.value = s1.value._replace(old=0, new=0) print(f'O: {afmt(a)}: {sfmt(s1)} 0.0 1.0') # REWARDS print() a = action_space.elem(value='reset_') for s in state_space.elems(): print(f'R: {afmt(a)}: {sfmt(s)}: *: * {s.value.new:.1f}')
def new_space(): return one_to_one.DomainSpace('abc')
def main(): parser = argparse.ArgumentParser(description='RockSample') parser.add_argument('n', type=int) parser.add_argument('k', type=int) parser.add_argument('--gamma', type=float, default=0.95) config = parser.parse_args() assert config.n > 1 assert config.k > 0 if config.n == 5 and config.k == 6: # ####### # # R # # #R R # # #A # # # RR # # # R# # ####### base, d0 = 2, 20 rock_positions = [(0, 1), (1, 3), (2, 0), (2, 3), (3, 1), (4, 4)] elif config.n == 7 and config.k == 8: # ######### # # R # # #R R # # # # # #A R# # # RR # # # R # # # R # # ######### base, d0 = 2, 20 rock_positions = [ (0, 1), (1, 6), (2, 0), (2, 4), (3, 1), (3, 4), (5, 5), (6, 3), ] elif config.n == 11 and config.k == 11: # ############# # # # # # R # # # # # #R RR R # # # R # # #A # # # # # #R # # # R R R # # # R # # # # # ############# base, d0 = 8, 20 rock_positions = [ (0, 3), (0, 7), (1, 8), (2, 4), (3, 3), (3, 8), (4, 3), (5, 8), (6, 1), (9, 3), (9, 9), ] else: raise ValueError(f'Invalid sizes (n={config.n}, k={config.k})') pos_space = one_to_one.NamedTupleSpace( x=one_to_one.RangeSpace(config.n), y=one_to_one.RangeSpace(config.n) ) rock_space = one_to_one.BoolSpace() rocks_space = one_to_one.TupleSpace(*[rock_space] * config.k) state_space = one_to_one.NamedTupleSpace(pos=pos_space, rocks=rocks_space) actions = ['N', 'S', 'E', 'W', 'sample'] + [ f'check_{i}' for i in range(config.k) ] action_space = one_to_one.DomainSpace(actions) obs = ['none', 'good', 'bad'] obs_space = one_to_one.DomainSpace(obs) print(f'# This specific file was generated with parameters:') print(f'# {config}') print() print(f'discount: {config.gamma}') print('values: reward') print(f'states: {" ".join(sfmt(s) for s in state_space.elems())}') print(f'actions: {" ".join(afmt(a) for a in action_space.elems())}') print(f'observations: {" ".join(ofmt(o) for o in obs_space.elems())}') start_states = [ s for s in state_space.elems() if s.pos.x.value == 0 and s.pos.y.value == config.n // 2 ] # START print() print(f'start include: {" ".join(sfmt(s) for s in start_states)}') # TRANSITIONS print() for a in action_space.elems(): print(f'T: {afmt(a)} identity') if a.value == 'N': for s in state_space.elems(): if s.pos.y.value < config.n - 1: s1 = copy(s) s1.pos.y.value += 1 print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0') print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0') elif a.value == 'S': for s in state_space.elems(): if s.pos.y.value > 0: s1 = copy(s) s1.pos.y.value -= 1 print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0') print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0') elif a.value == 'E': for s in state_space.elems(): if s.pos.x.value == config.n - 1: print(f'T: {afmt(a)}: {sfmt(s)} reset') else: s1 = copy(s) s1.pos.x.value += 1 print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0') print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0') elif a.value == 'W': for s in state_space.elems(): if s.pos.x.value > 0: s1 = copy(s) s1.pos.x.value -= 1 print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0') print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0') elif a.value == 'sample': for s in state_space.elems(): try: rock_i = rock_positions.index( (s.pos.x.value, s.pos.y.value) ) except ValueError: pass else: if s.rocks[rock_i]: s1 = copy(s) s1.rocks[rock_i].value = False print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0') print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0') elif a.value.startswith('check_'): pass # no state-transition # OBSERVATIONS print() print('O: *: *: none 1.0') for a in action_space.elems(): if a.value.startswith('check_'): print(f'O: {afmt(a)}: *: none 0.0') for s1 in state_space.elems(): rock_i = int(a.value[len('check_') :]) rock_pos = rock_positions[rock_i] rock_good = bool(s1.rocks[rock_i].value) pos = s1.pos.x.value, s1.pos.y.value dist = math.sqrt( (pos[0] - rock_pos[0]) ** 2 + (pos[1] - rock_pos[1]) ** 2 ) efficiency = base ** (-dist / d0) pcorrect = 0.5 * (1 + efficiency) pgood = pcorrect if rock_good else 1 - pcorrect print(f'O: {afmt(a)}: {sfmt(s1)}: good {pgood:.6f}') print(f'O: {afmt(a)}: {sfmt(s1)}: bad {1 - pgood:.6f}') # REWARDS print() for a in action_space.elems(): if a.value == 'E': for s in state_space.elems(): if s.pos.x.value == config.n - 1: print(f'R: {afmt(a)}: {sfmt(s)}: *: * 10.0') elif a.value == 'sample': # TODO how to handle -100.0 actions, like bumping into a wall? print(f'R: {afmt(a)}: *: *: * -10.0') for s in state_space.elems(): try: rock_i = rock_positions.index( (s.pos.x.value, s.pos.y.value) ) except ValueError: pass else: if s.rocks[rock_i].value: print(f'R: {afmt(a)}: {sfmt(s)}: *: * 10.0')
def main(): parser = argparse.ArgumentParser(description='ArrowMaze') parser.add_argument('--gamma', type=float, default=0.99) config = parser.parse_args() pos_space = one_to_one.NamedTupleSpace( x=one_to_one.RangeSpace(10), y=one_to_one.RangeSpace(10) ) state_space = one_to_one.NamedTupleSpace( reflect_h=one_to_one.BoolSpace(), reflect_v=one_to_one.BoolSpace(), reverse=one_to_one.BoolSpace(), pos=pos_space, ) actions = 'up', 'down', 'left', 'right' action_space = one_to_one.DomainSpace(actions) observations = 'up', 'down', 'left', 'right' obs_space = one_to_one.DomainSpace(observations) print( """# ArrowTrail Environment; # The agent navigates a 10x10 grid-world. Each tile is associated with an # arrow indicating one of the four cardinal directions; the arrows form a path # which covers all the tiles in a single loop, and the task is to follow the # trail of arrows. The agent does not observe its own position, only the # direction indicated by the current tile. # This environment was designed to have an easy control task and a difficult # prediction task. # State-space (800) : position of the agent (10x10 grid) times 8 possible paths, # obtained from a base path through horizontal reflection, vertical reflection, # and/or path reversal. # Action-space (4) : directional movements {`up`, `down`, `left`, `right`}. # Observation-space (4) : direction of the tile arrow {`up`, `down`, `left`, # `right`}.""" ) print() print(f'# This specific file was generated with parameters:') print(f'# {config}') print() print(f'discount: {config.gamma}') print('values: reward') print(f'states: {" ".join(sfmt(s) for s in state_space.elems())}') print(f'actions: {" ".join(afmt(a) for a in action_space.elems())}') print(f'observations: {" ".join(ofmt(o) for o in obs_space.elems())}') # # START # print() # print(f'start include: uniform') # TRANSITIONS print() for s in state_space.elems(): for a in action_space.elems(): s1 = copy.copy(s) if a.value == 'up': s1.pos.y.value = max(s1.pos.y.value - 1, 0) elif a.value == 'down': s1.pos.y.value = min(s1.pos.y.value + 1, 9) elif a.value == 'right': s1.pos.x.value = min(s1.pos.x.value + 1, 9) elif a.value == 'left': s1.pos.x.value = max(s1.pos.x.value - 1, 0) print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0') # OBSERVATIONS translation = {'U': 'up', 'D': 'down', 'L': 'left', 'R': 'right'} print() for s1 in state_space.elems(): tile = get_tile(s1) direction = translation[tile] o = obs_space.elem(value=direction) print(f'O: *: {sfmt(s1)}: {ofmt(o)} 1.0') # REWARDS print() print('R: *: *: *: * -1.0') for s in state_space.elems(): tile = get_tile(s) direction = translation[tile] a = action_space.elem(value=direction) print(f'R: {afmt(a)}: {sfmt(s)}: *: * 0.0')
parser.add_argument('n', type=int, default=None) # parser.add_argument('--episodic', action='store_true') parser.add_argument('--gamma', type=float, default=0.99) config = parser.parse_args() # TODO change size to width and height assert config.n > 1 assert 0 < config.gamma <= 1 pos_space = one_to_one.NamedTupleSpace(x=one_to_one.RangeSpace(config.n), y=one_to_one.RangeSpace(config.n)) state_space = one_to_one.NamedTupleSpace(agent=pos_space, item=pos_space) actions = 'query', 'left', 'right', 'up', 'down', 'buy' action_space = one_to_one.DomainSpace(actions) postypes = 'agent', 'item' postype_space = one_to_one.DomainSpace(postypes) obs_space = one_to_one.NamedTupleSpace(postype=postype_space, pos=pos_space) # print('states') # for s in state_space.elems(): # print(sfmt(s)) # print('actions') # for a in action_space.elems(): # print(afmt(a)) # print('observations')
def new_space(): return one_to_one.UnionSpace( one_to_one.BoolSpace(), one_to_one.DomainSpace('abc'), one_to_one.RangeSpace(10, 14), )
return o.value if __name__ == '__main__': parser = argparse.ArgumentParser(description='Shopping') parser.add_argument('n', type=int, default=None) # parser.add_argument('--episodic', action='store_true') parser.add_argument('--gamma', type=float, default=0.99) config = parser.parse_args() assert config.n >= 1 assert 0 < config.gamma <= 1 ncells = 2 + 4 * config.n cell_space = one_to_one.RangeSpace(ncells) heaven_space = one_to_one.DomainSpace(['left', 'right']) state_space = one_to_one.NamedTupleSpace(heaven=heaven_space, cell=cell_space) actions = ['N', 'S', 'E', 'W'] action_space = one_to_one.DomainSpace(actions) obs = [f'o{i}' for i in range(len(cell_space) - 1)] + ['left', 'right'] obs_space = one_to_one.DomainSpace(obs) print("""# A robot will be rewarded +1 for attaining heaven in one # if it accidently reaches hell it will get -1 # Problem is attributed to Sebastian Thrun but first appeared in Geffner # & Bonet: Solving Large POMDPs using Real Time DP 1998. # A priest is available to tell it where heaven is (left or right)
pos_space = one_to_one.NamedTupleSpace(x=one_to_one.RangeSpace(config.n), y=one_to_one.RangeSpace(config.n)) state_space = one_to_one.NamedTupleSpace(agent=pos_space, item=pos_space) def pstr(p): return f'{p.x}_{p.y}' def sstr(s): return f'{pstr(s.value.agent)}_{pstr(s.value.item)}' def ostr(o): return pstr(o) actions = 'query', 'left', 'right', 'up', 'down', 'buy' action_space = one_to_one.DomainSpace(actions) # TODO different observation space for queries and for positions..? # NO! with this version, the observation makes sense in the context of # the action! obs_space = pos_space print("""# Shopping Environment; # The agent is in a store and needs to remember which item to purchase # (preselected at the beginning of the environment). A reactive policy with # insufficient memory will need to periodically query which item needs to be # purchased. # State-space (n ** 4) : position of the agent in the store (n ** 2 grid), and # position of the target item in store (n ** 2 grid).