コード例 #1
0
        positions[found] = None
    return positions


if __name__ == '__main__':
    start_positions = {
        'player': (3, 4),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }
    env = WarpFrame(
        CollectEnv(start_positions=start_positions,
                   goal_condition=lambda x: x.shape == 'square' and x.colour ==
                   'blue'))

    dqn_blue = load('../../models/blue/model.dqn', env)
    dqn_crate = load('../../models/crate/model.dqn', env)
    dqn = ComposedDQN([dqn_blue, dqn_crate], [1, 1], or_compose=False)

    values = np.zeros_like(env.env.board, dtype=float)
    for pos in env.env.free_spaces:
        positions = copy.deepcopy(start_positions)

        positions = remove(positions, pos)

        positions['player'] = pos
        env = WarpFrame(
            CollectEnv(start_positions=positions,
コード例 #2
0
    def step(self, action):
        ob, reward, done, info = self.env.step(action)
        self.steps += 1
        if self.steps == self.max_length:
            done = True
        return ob, reward, done, info


if __name__ == '__main__':
    max_episodes = 50000
    max_trajectory = 200

    targets = {('purple', 'square'), ('purple', 'circle'), ('blue', 'square'), ('blue', 'circle'), ('beige', 'square'),
               ('beige', 'circle')}
    task = MaxLength(WarpFrame(
        MultiCollectEnv(lambda collected: targets.issubset({(c.colour, c.shape) for c in collected}),
                        lambda x: (x.colour, x.shape) in targets)), max_trajectory)

    #agent = train('./models/temporal3/results', task) # 1 million
    #save('./models/temporal3/model.dqn', agent)

    dqn = load('./models/temporal3/model.dqn', task)  # dqn trained on full task

    max_episodes = 50000
    max_trajectory = 50

    dqn1 = load('./models/purple/model.dqn', task)
    dqn2 = load('./models/blue/model.dqn', task)
    dqn3 = load('./models/beige/model.dqn', task)
    dqn_composed = ComposedDQN([dqn1, dqn2, dqn3])
コード例 #3
0
def evaluate(name='or', save_trajectories=True, max_trajectory=20):

    if name == 'B.S':
        dqn = dqn_and
        goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square'
    elif name == 'B.-S':
        dqn = ComposedDQN([dqn_blue, dqn_not_square], compose="and")
        goal_condition = lambda x: x.colour == 'blue' and not x.shape == 'square'
    elif name == 'S.-B':
        dqn = ComposedDQN([dqn_square, dqn_not_blue], compose="and")
        goal_condition = lambda x: x.shape == 'square' and not x.colour == 'blue'
    elif name == '-(B+S)':
        dqn = dqn_not_or
        goal_condition = lambda x: not (x.colour == 'blue' or x.shape ==
                                        'square')
    elif name == 'P':
        dqn = dqn_purple
        goal_condition = lambda x: x.colour == 'purple'
    elif name == 'B':
        dqn = dqn_blue
        goal_condition = lambda x: x.colour == 'blue'
    elif name == '-B':
        dqn = dqn_not_blue
        goal_condition = lambda x: not x.colour == 'blue'
    elif name == 'S':
        dqn = dqn_square
        goal_condition = lambda x: x.shape == 'square'
    elif name == '-S':
        dqn = dqn_not_square
        goal_condition = lambda x: not x.shape == 'square'
    elif name == 'P+B':
        dqn = dqn_or_purple
        goal_condition = lambda x: x.colour == 'purple' or x.colour == 'blue'
    elif name == 'B+S':
        dqn = dqn_or
        goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square'
    elif name == 'B+-S':
        dqn = ComposedDQN([dqn_blue, dqn_not_square], compose="or")
        goal_condition = lambda x: x.colour == 'blue' or not x.shape == 'square'
    elif name == 'S+-B':
        dqn = ComposedDQN([dqn_square, dqn_not_blue], compose="or")
        goal_condition = lambda x: x.shape == 'square' or not x.colour == 'blue'
    elif name == '-(B.S)':
        dqn = dqn_not_and
        goal_condition = lambda x: not (x.colour == 'blue' and x.shape ==
                                        'square')
    elif name == '-BxorS':
        dqn = ComposedDQN([dqn_xor], dqn_max=max_evf, compose="not")
        goal_condition = lambda x: not (
            (x.colour == 'blue' or x.shape == 'square') and not (
                x.colour == 'blue' and x.shape == 'square'))
    elif name == 'BxorS':
        dqn = dqn_xor
        goal_condition = lambda x: (x.colour == 'blue' or x.shape == 'square'
                                    ) and not (x.colour == 'blue' and x.shape
                                               == 'square')
    else:
        print("Invalid name")
        return

    env = MaxLength(WarpFrame(CollectEnv(goal_condition=goal_condition)),
                    max_trajectory)

    G = 0
    with torch.no_grad():
        obs = env.reset()
        for _ in range(max_trajectory):
            obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
            values = []
            for goal in goals:
                goal = torch.from_numpy(
                    np.array(goal)).type(FloatTensor).unsqueeze(0)
                x = torch.cat((obs, goal), dim=3)
                values.append(dqn(x).squeeze(0))
            values = torch.stack(values, 1).t()
            action = values.data.max(0)[0].max(0)[1].item()
            obs, reward, done, _ = env.step(action)
            G += reward

            if done:
                break
    return G
コード例 #4
0
Experiment for approximating blue AND square
"""

from gym.wrappers import Monitor

from dqn import ComposedDQN, get_action
from gym_repoman.envs import CollectEnv
from trainer import load
from wrappers import WarpFrame, MaxLength

if __name__ == '__main__':

    max_episodes = 50000
    max_trajectory = 50

    task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square')),
                     max_trajectory)

    dqn_blue_crate = load('./models/blue_crate/model.dqn', task)
    dqn_blue = load('./models/blue/model.dqn', task)
    dqn_crate = load('./models/crate/model.dqn', task)

    dqn_composed_or = ComposedDQN([dqn_blue, dqn_crate])
    dqn_composed_and = ComposedDQN([dqn_blue, dqn_crate], or_compose=False)

    for dqn, name in [(dqn_blue_crate, 'blue_crate'), (dqn_composed_or, 'blue_or_crate'),
                      (dqn_composed_and, 'blue_and_crate')]:

        env = Monitor(task, './experiment_approx_and/' + name + '/', video_callable=False, force=True)
        for episode in range(max_episodes):
            if episode % 1000 == 0:
コード例 #5
0
def learn(colour, shape, condition):
    name = colour + shape
    base_path = './models/{}/'.format(name)
    env = WarpFrame(CollectEnv(goal_condition=condition))
    agent = train(base_path + 'results', env)
    save(base_path + 'model.dqn', agent)
コード例 #6
0
WEIGHTS = [0, 1]

start_positions = {
    'crate_beige': (3, 4),
    'player': (6, 3),
    'circle_purple': (7, 7),
    'circle_beige': (1, 7),
    'crate_blue': (1, 1),
    'crate_purple': (8, 1),
    'circle_blue': (1, 8)
}

goal_condition = lambda x: False
env = (WarpFrame(
    CollectEnv(start_positions=start_positions,
               goal_condition=goal_condition)))

goals = []
if os.path.exists('./goals.h5'):
    goals = dd.io.load('goals.h5')

dqn_blue = load('./models/blue/model.dqn', env)
dqn_purple = load('./models/purple/model.dqn', env)
dqn_crate = load('./models/crate/model.dqn', env)
if torch.cuda.is_available():
    dqn_blue.cuda()
    dqn_purple.cuda()
    dqn_crate.cuda()

dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or")
コード例 #7
0
    def exp(name='or',
            save_trajectories=True,
            max_episodes=4,
            max_trajectory=7):

        env = CollectEnv()
        dqn_blue = load('./models/blue/model.dqn', env)
        dqn_crate = load('./models/crate/model.dqn', env)
        if torch.cuda.is_available():
            dqn_blue.cuda()
            dqn_crate.cuda()

        dqn_not = ComposedDQN([dqn_blue], compose="not")
        dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or")
        dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and")
        dqn_not_and = ComposedDQN([dqn_and], compose="not")
        dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and")

        goals = []
        if os.path.exists('./goals.h5'):
            goals = dd.io.load('goals.h5')

        if name == 'blue':
            dqn = dqn_blue
            goal_condition = lambda x: x.colour == 'blue'
        elif name == 'square':
            dqn = dqn_crate
            goal_condition = lambda x: x.shape == 'square'
        elif name == 'not':
            dqn = dqn_not
            goal_condition = lambda x: not x.colour == 'blue'
        elif name == 'or':
            dqn = dqn_or
            goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square'
        elif name == 'and':
            dqn = dqn_and
            goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square'
        elif name == 'xor':
            dqn = dqn_xor
            goal_condition = lambda x: (
                x.colour == 'blue' or x.shape == 'square') and not (
                    x.colour == 'blue' and x.shape == 'square')
        # else:
        #     print("Invalid name")
        #     return

        # env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=lambda x: True)), max_trajectory)

        poss = [(3, 4), (1, 2), (5, 7), (5, 2)]
        trajectories = []
        with torch.no_grad():
            episode = 0
            while episode < max_episodes:
                start_positions['crate_beige'] = poss[episode]
                env = (WarpFrame(
                    CollectEnv(start_positions=start_positions,
                               changePlayerPos=False,
                               goal_condition=lambda x: True)))
                obs = env.reset()

                trajectory = []
                for _ in range(max_trajectory):
                    trajectory.append(
                        Image.fromarray(np.uint8(env.render(mode='rgb_img'))))

                    obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn_blue(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    valuesb = values.data.max(0)[0]
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn_crate(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    valuess = values.data.max(0)[0]
                    values = torch.stack((valuesb, valuess), 0).min(0)[0]
                    action = values.max(0)[1].item()
                    obs, reward, done, _ = env.step(action)
                    if done:
                        break
                trajectories += trajectory[:-1]
                episode += 1

        if save_trajectories:
            trajectories[0].save('./trajectories/' + name + '.gif',
                                 save_all=True,
                                 append_images=trajectories[1:],
                                 optimize=False,
                                 duration=250,
                                 loop=0)
コード例 #8
0
    def step(self, action):
        ob, reward, done, info = self.env.step(action)
        self.steps += 1
        if self.steps == self.max_length:
            done = True
        return ob, reward, done, info


if __name__ == '__main__':

    max_iterations = 80
    max_episodes = 100
    max_trajectory = 50

    task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
                                                                   or (x.colour == 'purple' and x.shape == 'circle'))),
                     max_trajectory)
    env = Monitor(task, './experiment_weighted_or/', video_callable=False, force=True)

    dqn_purple_circle = load('./models/purple_circle/model.dqn', task)  # entropy regularised functions
    dqn_beige_crate = load('./models/beige_crate/model.dqn', task)  # entropy regularised functions
    weights = np.arange(1/3, 3.01, 0.05)

    tally = {i: [] for i in range(len(weights))}

    for iter in range(max_iterations):
        for i, weight in enumerate(weights):
            collected_count = [0, 0]
            weight = 1
            dqn_composed = ComposedDQN([dqn_beige_crate, dqn_purple_circle], [weight, 1])
            for episode in range(max_episodes):
コード例 #9
0
from gym_repoman.envs import CollectEnv
from wrappers import WarpFrame
import imageio


if __name__ == '__main__':


    start_positions = {'crate_beige': (3, 4),
                       'player': (6, 3),
                       'circle_purple': (7, 7),
                       'circle_beige': (1, 7),
                       'crate_blue': (1, 1),
                       'crate_purple': (8, 1),
                       'circle_blue': (1, 8)}
    env = WarpFrame(CollectEnv(start_positions=start_positions, changePlayerPos=False,
                               goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue'))

    env.reset()
    image = env.render()

    imageio.imwrite('map.png', image)

コード例 #10
0
    if found is not None:
        positions[found] = None
    return positions


start_positions = {
    'player': (3, 4),
    'crate_purple': (6, 3),
    'circle_purple': (7, 7),
    'circle_beige': (1, 7),
    'crate_beige': (2, 2),
    'crate_blue': (8, 1),
    'circle_blue': (2, 8)
}
env = WarpFrame(
    CollectEnv(
        start_positions=start_positions,
        goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue'))

dqn_blue = load('../../models/blue/model.dqn', env)
dqn_purple = load('../../models/purple/model.dqn', env)
dqn = ComposedDQN([dqn_blue, dqn_purple], [1, 1])

values = np.zeros_like(env.env.board, dtype=float)
for pos in env.env.free_spaces:
    positions = copy.deepcopy(start_positions)

    positions = remove(positions, pos)

    positions['player'] = pos
    env = WarpFrame(
        CollectEnv(start_positions=positions,
コード例 #11
0
from wrappers import WarpFrame
import numpy as np
import torch

if __name__ == '__main__':
    start_positions = {
        'player': (2, 1),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }
    env = WarpFrame(
        CollectEnv(start_positions=start_positions,
                   goal_condition=lambda x: x.colour == 'purple' or x.colour ==
                   'blue'))

    dqn1 = load('../../models/purple/model.dqn', env)
    dqn2 = load('../../models/blue/model.dqn', env)
    dqn = ComposedDQN([dqn1, dqn2], [1, 1.1])
    obs = env.reset()
    positions = list()
    positions.append(env.env.player.position)
    env.render()

    for _ in range(100):
        obs = np.array(obs)
        obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)

        action = get_action(dqn, obs)
コード例 #12
0
WEIGHTS = [0, 1]

start_positions = {
    'crate_beige': (3, 4),
    'player': (6, 3),
    'circle_purple': (7, 7),
    'circle_beige': (1, 7),
    'crate_blue': (1, 1),
    'crate_purple': (8, 1),
    'circle_blue': (1, 8)
}

goal_condition = lambda x: False
env = (WarpFrame(
    CollectEnv(start_positions=start_positions,
               goal_condition=goal_condition)))

goals = []
if os.path.exists('./goals.h5'):
    goals = dd.io.load('goals.h5')

dqn_blue = load('./models/blue/model.dqn', env)
dqn_crate = load('./models/crate/model.dqn', env)
if torch.cuda.is_available():
    dqn_blue.cuda()
    dqn_crate.cuda()

dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or")
dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and")
dqn_not_blue = ComposedDQN([dqn_blue], compose="not")
コード例 #13
0
        'player': (5, 5),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }

    targets = {('purple', 'square'), ('blue', 'circle'), ('blue', 'square'),
               ('beige', 'square')}

    env = WarpFrame(
        MultiCollectEnv(termination_condition=lambda collected: targets.
                        issubset({(c.colour, c.shape)
                                  for c in collected}),
                        reward_condition=lambda x:
                        (x.colour, x.shape) in targets,
                        start_positions=start_positions))

    dqn1 = load('../../models/purple/model.dqn', env)
    # dqn2 = load('../../models/purple_circle/model.dqn', env)
    dqn3 = load('../../models/blue/model.dqn', env)
    dqn4 = load('../../models/beige/model.dqn', env)
    # dqn = ComposedDQN([dqn1, dqn2, dqn3, dqn4], [1,1,1,1])
    #dqn1 = load('../../models/crate/model.dqn', env)
    #dqn2 = load('../../models/blue/model.dqn', env)
    dqn = ComposedDQN([dqn1, dqn3, dqn4])

    obs = env.reset()
    positions = list()
コード例 #14
0
        positions[found] = None
    return positions


if __name__ == '__main__':

    start_positions = {'player': (3, 4),
                       'crate_purple': (6, 3),
                       'circle_purple': (7, 7),
                       'circle_beige': (1, 7),
                       'crate_beige': (2, 2),
                       'crate_blue': (8, 1),
                       'circle_blue': (2, 8)}

    env = WarpFrame(CollectEnv(start_positions=start_positions,
                               goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
                                                        or (x.colour == 'purple' and x.shape == 'circle')))

    dqn_purple_circle = load('../../models/purple_circle/model.dqn', env)
    dqn_beige_crate = load('../../models/beige_crate/model.dqn', env)
    dqn = ComposedDQN([dqn_purple_circle, dqn_beige_crate], [3, 2])  # TODO put weights here!

    values = np.zeros_like(env.env.board, dtype=float)
    for pos in env.env.free_spaces:
        positions = copy.deepcopy(start_positions)

        positions = remove(positions, pos)

        positions['player'] = pos
        env = WarpFrame(CollectEnv(start_positions=positions,
                                   goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
コード例 #15
0
import torch
from gym.wrappers import Monitor

from dqn import ComposedDQN, FloatTensor, get_action
from trainer import load
from gym_repoman.envs import CollectEnv
from wrappers import WarpFrame, MaxLength

if __name__ == '__main__':

    max_episodes = 50000
    max_trajectory = 50

    task = MaxLength(
        WarpFrame(
            CollectEnv(goal_condition=lambda x: x.colour == 'blue' or x.colour
                       == 'purple')), max_trajectory)

    dqn_blue = load('./models/blue/model.dqn', task)
    dqn_purple = load('./models/purple/model.dqn', task)
    dqn_composed = ComposedDQN([dqn_blue, dqn_purple], [1, 1])

    for dqn, name in [(dqn_blue, 'blue'), (dqn_purple, 'purple'),
                      (dqn_composed, 'composed')]:
        env = Monitor(task,
                      './experiment_or/' + name + '/',
                      video_callable=False,
                      force=True)
        for episode in range(max_episodes):
            if episode % 1000 == 0:
                print(episode)
コード例 #16
0
    def exp(name='or',
            save_trajectories=True,
            max_episodes=4,
            max_trajectory=20):

        env = CollectEnv()
        dqn_purple = load('./models/purple/model.dqn', env)
        dqn_blue = load('./models/blue/model.dqn', env)
        dqn_crate = load('./models/crate/model.dqn', env)
        if torch.cuda.is_available():
            dqn_purple.cuda()
            dqn_blue.cuda()
            dqn_crate.cuda()

        dqn_max = ComposedDQN([dqn_purple, dqn_blue, dqn_crate], compose="or")
        dqn_not = ComposedDQN([dqn_blue], dqn_max=dqn_max, compose="not")
        dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or")
        dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and")
        dqn_not_and = ComposedDQN([dqn_and], dqn_max=dqn_max, compose="not")
        dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and")

        goals = []
        if os.path.exists('./goals.h5'):
            goals = dd.io.load('goals.h5')

        if name == 'blue':
            dqn = dqn_blue
            goal_condition = lambda x: x.colour == 'blue'
        elif name == 'purple':
            dqn = dqn_purple
            goal_condition = lambda x: x.colour == 'purple'
        elif name == 'square':
            dqn = dqn_crate
            goal_condition = lambda x: x.shape == 'square'
        if name == 'not':
            dqn = dqn_not
            goal_condition = lambda x: not x.colour == 'blue'
        elif name == 'or':
            dqn = dqn_or
            goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square'
        elif name == 'and':
            dqn = dqn_and
            goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square'
        elif name == 'xor':
            dqn = dqn_xor
            goal_condition = lambda x: (
                x.colour == 'blue' or x.shape == 'square') and not (
                    x.colour == 'blue' and x.shape == 'square')
        else:
            print("Invalid name")
            return

        env = MaxLength(WarpFrame(CollectEnv(goal_condition=goal_condition)),
                        max_trajectory)

        trajectories = []
        with torch.no_grad():
            episode = 0
            while episode < max_episodes:
                obs = env.reset()
                trajectory = []
                for _ in range(max_trajectory):
                    trajectory.append(
                        Image.fromarray(np.uint8(env.render(mode='rgb_img'))))

                    obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    action = values.data.max(0)[0].max(0)[1].item()
                    obs, reward, done, _ = env.step(action)
                    if done:
                        episode += 1
                        trajectories += trajectory[:-1]
                        break

        if save_trajectories:
            trajectories[0].save('./trajectories/' + name + '.gif',
                                 save_all=True,
                                 append_images=trajectories[1:],
                                 optimize=False,
                                 duration=250,
                                 loop=0)