def main():
    parser = argparse.ArgumentParser(description='FloatReset')
    parser.add_argument('--gamma', type=float, default=0.99)
    config = parser.parse_args()

    pos_space = one_to_one.RangeSpace(5)

    state_space = one_to_one.NamedTupleSpace(old=pos_space, new=pos_space)
    state_space = one_to_one.SubSpace(state_space, is_adjacent)

    actions = 'float', 'reset_'
    action_space = one_to_one.DomainSpace(actions)

    obs_space = one_to_one.RangeSpace(2)

    print(
        """# Float/Reset Environment;

# @inproceedings{littman_predictive_2002,
#     title = {Predictive representations of state},
#     booktitle = {Advances in neural information processing systems},
#     author = {Littman, Michael L. and Sutton, Richard S.},
#     year = {2002},
#     pages = {1555--1561},
# }

# State-space (5) : current position.

# Action-space (2) : `float` and `reset_`.

# Observation-space (2) : 0 and 1."""
    )

    print()
    print(f'# This specific file was generated with parameters:')
    print(f'# {config}')
    print()
    print(f'discount: {config.gamma}')
    print('values: reward')

    print(f'states: {" ".join(sfmt(s) for s in state_space.elems())}')

    print(f'actions: {" ".join(afmt(a) for a in action_space.elems())}')

    print(f'observations: {len(obs_space)}')

    # START
    print()
    s = state_space.elem(0)
    s.value = s.value._replace(old=0, new=0)
    print(f'start: {sfmt(s)}')

    # TRANSITIONS
    print()

    a = action_space.elem(value='reset_')
    for s in state_space.elems():
        s1 = state_space.elem(value=s.value._replace(old=s.value.new, new=0))
        print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0')

    a = action_space.elem(value='float')
    for s in state_space.elems():
        try:
            s1.value = s.value._replace(old=s.value.new, new=s.value.new - 1)
        except ValueError:
            s1 = state_space.elem(value=s.value._replace(old=s.value.new))

        print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 0.5')

        try:
            s1.value = s.value._replace(old=s.value.new, new=s.value.new + 1)
        except ValueError:
            s1 = state_space.elem(value=s.value._replace(old=s.value.new))

        print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 0.5')

    # OBSERVATIONS
    print()

    print(f'O: *: * 1.0 0.0')

    a = action_space.elem(value='reset_')
    s1 = state_space.elem(0)
    s1.value = s1.value._replace(old=0, new=0)
    print(f'O: {afmt(a)}: {sfmt(s1)} 0.0 1.0')

    # REWARDS
    print()
    a = action_space.elem(value='reset_')
    for s in state_space.elems():
        print(f'R: {afmt(a)}: {sfmt(s)}: *: * {s.value.new:.1f}')
def main():
    parser = argparse.ArgumentParser(description='RockSample')
    parser.add_argument('n', type=int)
    parser.add_argument('k', type=int)
    parser.add_argument('--gamma', type=float, default=0.95)
    config = parser.parse_args()

    assert config.n > 1
    assert config.k > 0

    if config.n == 5 and config.k == 6:

        # #######
        # #  R  #
        # #R  R #
        # #A    #
        # # RR  #
        # #    R#
        # #######

        base, d0 = 2, 20
        rock_positions = [(0, 1), (1, 3), (2, 0), (2, 3), (3, 1), (4, 4)]
    elif config.n == 7 and config.k == 8:

        # #########
        # #  R    #
        # #R  R   #
        # #       #
        # #A     R#
        # #  RR   #
        # #     R #
        # # R     #
        # #########

        base, d0 = 2, 20
        rock_positions = [
            (0, 1),
            (1, 6),
            (2, 0),
            (2, 4),
            (3, 1),
            (3, 4),
            (5, 5),
            (6, 3),
        ]
    elif config.n == 11 and config.k == 11:

        # #############
        # #           #
        # #      R    #
        # #           #
        # #R  RR    R #
        # #  R        #
        # #A          #
        # #           #
        # #R          #
        # # R R R     #
        # #         R #
        # #           #
        # #############

        base, d0 = 8, 20
        rock_positions = [
            (0, 3),
            (0, 7),
            (1, 8),
            (2, 4),
            (3, 3),
            (3, 8),
            (4, 3),
            (5, 8),
            (6, 1),
            (9, 3),
            (9, 9),
        ]
    else:
        raise ValueError(f'Invalid sizes (n={config.n}, k={config.k})')

    pos_space = one_to_one.NamedTupleSpace(
        x=one_to_one.RangeSpace(config.n), y=one_to_one.RangeSpace(config.n)
    )

    rock_space = one_to_one.BoolSpace()
    rocks_space = one_to_one.TupleSpace(*[rock_space] * config.k)

    state_space = one_to_one.NamedTupleSpace(pos=pos_space, rocks=rocks_space)

    actions = ['N', 'S', 'E', 'W', 'sample'] + [
        f'check_{i}' for i in range(config.k)
    ]
    action_space = one_to_one.DomainSpace(actions)

    obs = ['none', 'good', 'bad']
    obs_space = one_to_one.DomainSpace(obs)

    print(f'# This specific file was generated with parameters:')
    print(f'# {config}')
    print()
    print(f'discount: {config.gamma}')
    print('values: reward')

    print(f'states: {" ".join(sfmt(s) for s in state_space.elems())}')
    print(f'actions: {" ".join(afmt(a) for a in action_space.elems())}')
    print(f'observations: {" ".join(ofmt(o) for o in obs_space.elems())}')

    start_states = [
        s
        for s in state_space.elems()
        if s.pos.x.value == 0 and s.pos.y.value == config.n // 2
    ]

    # START
    print()
    print(f'start include: {" ".join(sfmt(s) for s in start_states)}')

    # TRANSITIONS
    print()
    for a in action_space.elems():
        print(f'T: {afmt(a)} identity')

        if a.value == 'N':
            for s in state_space.elems():
                if s.pos.y.value < config.n - 1:
                    s1 = copy(s)
                    s1.pos.y.value += 1
                    print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0')
                    print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0')

        elif a.value == 'S':
            for s in state_space.elems():
                if s.pos.y.value > 0:
                    s1 = copy(s)
                    s1.pos.y.value -= 1
                    print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0')
                    print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0')

        elif a.value == 'E':
            for s in state_space.elems():
                if s.pos.x.value == config.n - 1:
                    print(f'T: {afmt(a)}: {sfmt(s)} reset')
                else:
                    s1 = copy(s)
                    s1.pos.x.value += 1
                    print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0')
                    print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0')

        elif a.value == 'W':
            for s in state_space.elems():
                if s.pos.x.value > 0:
                    s1 = copy(s)
                    s1.pos.x.value -= 1
                    print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0')
                    print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0')

        elif a.value == 'sample':
            for s in state_space.elems():
                try:
                    rock_i = rock_positions.index(
                        (s.pos.x.value, s.pos.y.value)
                    )
                except ValueError:
                    pass
                else:
                    if s.rocks[rock_i]:
                        s1 = copy(s)
                        s1.rocks[rock_i].value = False
                        print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s)} 0.0')
                        print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0')

        elif a.value.startswith('check_'):
            pass  # no state-transition

    # OBSERVATIONS
    print()
    print('O: *: *: none 1.0')
    for a in action_space.elems():
        if a.value.startswith('check_'):
            print(f'O: {afmt(a)}: *: none 0.0')
            for s1 in state_space.elems():
                rock_i = int(a.value[len('check_') :])
                rock_pos = rock_positions[rock_i]
                rock_good = bool(s1.rocks[rock_i].value)
                pos = s1.pos.x.value, s1.pos.y.value

                dist = math.sqrt(
                    (pos[0] - rock_pos[0]) ** 2 + (pos[1] - rock_pos[1]) ** 2
                )
                efficiency = base ** (-dist / d0)
                pcorrect = 0.5 * (1 + efficiency)
                pgood = pcorrect if rock_good else 1 - pcorrect

                print(f'O: {afmt(a)}: {sfmt(s1)}: good {pgood:.6f}')
                print(f'O: {afmt(a)}: {sfmt(s1)}: bad {1 - pgood:.6f}')

    # REWARDS
    print()
    for a in action_space.elems():

        if a.value == 'E':
            for s in state_space.elems():
                if s.pos.x.value == config.n - 1:
                    print(f'R: {afmt(a)}: {sfmt(s)}: *: * 10.0')

        elif a.value == 'sample':
            # TODO how to handle -100.0 actions, like bumping into a wall?
            print(f'R: {afmt(a)}: *: *: * -10.0')
            for s in state_space.elems():
                try:
                    rock_i = rock_positions.index(
                        (s.pos.x.value, s.pos.y.value)
                    )
                except ValueError:
                    pass
                else:
                    if s.rocks[rock_i].value:
                        print(f'R: {afmt(a)}: {sfmt(s)}: *: * 10.0')
Exemple #3
0
def ofmt(o):
    return f'{o.postype}_{pfmt(o.pos)}'


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Shopping')
    parser.add_argument('n', type=int, default=None)
    # parser.add_argument('--episodic', action='store_true')
    parser.add_argument('--gamma', type=float, default=0.99)
    config = parser.parse_args()

    # TODO change size to width and height
    assert config.n > 1
    assert 0 < config.gamma <= 1

    pos_space = one_to_one.NamedTupleSpace(x=one_to_one.RangeSpace(config.n),
                                           y=one_to_one.RangeSpace(config.n))

    state_space = one_to_one.NamedTupleSpace(agent=pos_space, item=pos_space)

    actions = 'query', 'left', 'right', 'up', 'down', 'buy'
    action_space = one_to_one.DomainSpace(actions)

    postypes = 'agent', 'item'
    postype_space = one_to_one.DomainSpace(postypes)
    obs_space = one_to_one.NamedTupleSpace(postype=postype_space,
                                           pos=pos_space)

    # print('states')
    # for s in state_space.elems():
    #     print(sfmt(s))
def main():
    parser = argparse.ArgumentParser(description='ArrowMaze')
    parser.add_argument('--gamma', type=float, default=0.99)
    config = parser.parse_args()

    pos_space = one_to_one.NamedTupleSpace(
        x=one_to_one.RangeSpace(10), y=one_to_one.RangeSpace(10)
    )

    state_space = one_to_one.NamedTupleSpace(
        reflect_h=one_to_one.BoolSpace(),
        reflect_v=one_to_one.BoolSpace(),
        reverse=one_to_one.BoolSpace(),
        pos=pos_space,
    )

    actions = 'up', 'down', 'left', 'right'
    action_space = one_to_one.DomainSpace(actions)

    observations = 'up', 'down', 'left', 'right'
    obs_space = one_to_one.DomainSpace(observations)

    print(
        """# ArrowTrail Environment;

# The agent navigates a 10x10 grid-world.  Each tile is associated with an
# arrow indicating one of the four cardinal directions;  the arrows form a path
# which covers all the tiles in a single loop, and the task is to follow the
# trail of arrows.  The agent does not observe its own position, only the
# direction indicated by the current tile.

# This environment was designed to have an easy control task and a difficult
# prediction task.

# State-space (800) : position of the agent (10x10 grid) times 8 possible paths,
# obtained from a base path through horizontal reflection, vertical reflection,
# and/or path reversal.

# Action-space (4) : directional movements {`up`, `down`, `left`, `right`}.

# Observation-space (4) : direction of the tile arrow {`up`, `down`, `left`,
# `right`}."""
    )

    print()
    print(f'# This specific file was generated with parameters:')
    print(f'# {config}')
    print()
    print(f'discount: {config.gamma}')
    print('values: reward')

    print(f'states: {" ".join(sfmt(s) for s in state_space.elems())}')

    print(f'actions: {" ".join(afmt(a) for a in action_space.elems())}')

    print(f'observations: {" ".join(ofmt(o) for o in obs_space.elems())}')

    # # START
    # print()
    # print(f'start include: uniform')

    # TRANSITIONS
    print()
    for s in state_space.elems():
        for a in action_space.elems():
            s1 = copy.copy(s)

            if a.value == 'up':
                s1.pos.y.value = max(s1.pos.y.value - 1, 0)
            elif a.value == 'down':
                s1.pos.y.value = min(s1.pos.y.value + 1, 9)
            elif a.value == 'right':
                s1.pos.x.value = min(s1.pos.x.value + 1, 9)
            elif a.value == 'left':
                s1.pos.x.value = max(s1.pos.x.value - 1, 0)

            print(f'T: {afmt(a)}: {sfmt(s)}: {sfmt(s1)} 1.0')

    # OBSERVATIONS
    translation = {'U': 'up', 'D': 'down', 'L': 'left', 'R': 'right'}

    print()
    for s1 in state_space.elems():
        tile = get_tile(s1)
        direction = translation[tile]
        o = obs_space.elem(value=direction)
        print(f'O: *: {sfmt(s1)}: {ofmt(o)} 1.0')

    # REWARDS
    print()
    print('R: *: *: *: * -1.0')
    for s in state_space.elems():
        tile = get_tile(s)
        direction = translation[tile]
        a = action_space.elem(value=direction)
        print(f'R: {afmt(a)}: {sfmt(s)}: *: * 0.0')
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Shopping')
    parser.add_argument('n', type=int, default=None)
    # parser.add_argument('--episodic', action='store_true')
    parser.add_argument('--gamma', type=float, default=0.99)
    config = parser.parse_args()

    assert config.n >= 1
    assert 0 < config.gamma <= 1

    ncells = 2 + 4 * config.n
    cell_space = one_to_one.RangeSpace(ncells)
    heaven_space = one_to_one.DomainSpace(['left', 'right'])

    state_space = one_to_one.NamedTupleSpace(heaven=heaven_space,
                                             cell=cell_space)

    actions = ['N', 'S', 'E', 'W']
    action_space = one_to_one.DomainSpace(actions)

    obs = [f'o{i}' for i in range(len(cell_space) - 1)] + ['left', 'right']
    obs_space = one_to_one.DomainSpace(obs)

    print("""# A robot will be rewarded +1 for attaining heaven in one
# if it accidently reaches hell it will get -1
# Problem is attributed to Sebastian Thrun but first appeared in Geffner
# & Bonet: Solving Large POMDPs using Real Time DP 1998.
# A priest is available to tell it where heaven is (left or right)
#
#        Heaven  4  3  2  5  6  Hell
#                      1