Beispiel #1
0
def learn(colour, shape, condition):
    name = colour + shape
    base_path = './models/{}/'.format(name)
    env = WarpFrame(
        CollectEnv(start_positions=start_positions, goal_condition=condition))
    agent = train(base_path, env)
    save(base_path + 'model.dqn', agent)
    def experiment(task=None,
                   save_trajectories=True,
                   max_episodes=4,
                   max_trajectory=20):

        exp = task_exp(base_tasks, task, len(all_goals), list(models.keys()))
        dqn = exp_EVF(exp, models)

        goals = all_goals[task == 1]
        goal_condition = lambda x: ((x.colour, x.shape) in goals)
        env = WarpFrame(CollectEnv(goal_condition=goal_condition))

        trajectories = []
        with torch.no_grad():
            episode = 0
            while episode < max_episodes:
                obs = env.reset()
                trajectory = []
                for _ in range(max_trajectory):
                    trajectory.append(
                        Image.fromarray(np.uint8(env.render(mode='rgb_img'))))

                    obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
                    values = []
                    for goal in mgoals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    action = values.data.max(0)[0].max(0)[1].item()
                    obs, reward, done, _ = env.step(action)
                    if done:
                        episode += 1
                        trajectories += trajectory[:-1]
                        break

        if save_trajectories:
            exp = str(exp.simplify()).replace('~', 'NOT ')
            exp = exp.replace('|', ' OR ')
            exp = exp.replace('&', ' AND ')
            trajectories[0].save('./trajectories/all_/' + exp + '.gif',
                                 save_all=True,
                                 append_images=trajectories[1:],
                                 optimize=False,
                                 duration=250,
                                 loop=0)
Beispiel #3
0
def evaluate(name='or', max_trajectory = 20):    
            
    if name == 'B.S':
        dqn = dqn_and
        goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square'
    elif name == 'B.-S':
        dqn = ComposedDQN([dqn_blue,dqn_not_square], compose="and")
        goal_condition=lambda x: x.colour == 'blue' and not x.shape == 'square'
    elif name == 'S.-B':
        dqn = ComposedDQN([dqn_square,dqn_not_blue], compose="and")
        goal_condition=lambda x: x.shape == 'square' and not x.colour == 'blue'
    elif name == '-(B+S)':
        dqn = dqn_not_or
        goal_condition=lambda x: not (x.colour == 'blue' or x.shape == 'square')
    elif name == 'B':
        dqn = dqn_blue
        goal_condition=lambda x: x.colour == 'blue'
    elif name == '-B':
        dqn = dqn_not_blue
        goal_condition=lambda x: not x.colour == 'blue'
    elif name == 'S':
        dqn = dqn_square
        goal_condition=lambda x: x.shape == 'square'
    elif name == '-S':
        dqn = dqn_not_square
        goal_condition=lambda x: not x.shape == 'square'
    elif name == 'B+S':
        dqn = dqn_or
        goal_condition=lambda x: x.colour == 'blue' or x.shape == 'square'
    elif name == 'B+-S':
        dqn = ComposedDQN([dqn_blue,dqn_not_square], compose="or")
        goal_condition=lambda x: x.colour == 'blue' or not x.shape == 'square'
    elif name == 'S+-B':
        dqn = ComposedDQN([dqn_square,dqn_not_blue], compose="or")
        goal_condition=lambda x: x.shape == 'square' or not x.colour == 'blue'
    elif name == '-(B.S)':
        dqn = dqn_not_and
        goal_condition=lambda x: not (x.colour == 'blue' and x.shape == 'square')
    elif name == '-BxorS':
        dqn = ComposedDQN([dqn_xor], compose="not")
        goal_condition=lambda x: not((x.colour == 'blue' or x.shape == 'square') and not (x.colour == 'blue' and x.shape == 'square'))
    elif name == 'BxorS':
        dqn = dqn_xor
        goal_condition=lambda x: (x.colour == 'blue' or x.shape == 'square') and not (x.colour == 'blue' and x.shape == 'square')
    else:
        print("Invalid name")
        return
    
    env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=goal_condition)), max_trajectory)
    
    G = 0
    with torch.no_grad():
        obs = env.reset()                
        for _ in range(max_trajectory):
            obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
            values = []
            for goal in goals:
                goal = torch.from_numpy(np.array(goal)).type(FloatTensor).unsqueeze(0)
                x = torch.cat((obs,goal),dim=3)
                values.append(dqn(x).squeeze(0))
            values = torch.stack(values,1).t()
            action = values.data.max(0)[0].max(0)[1].item()
            obs, reward, done, _ = env.step(action)        
            G += reward

            if done:
                break
    return G
Beispiel #4
0
                   'crate_blue': (1, 1),
                   'crate_purple': (8, 1),
                   'circle_blue': (1, 8)}
all_goals = ['BC','BS','bS','PS','bC','PC']
all_goals_P = [(1,8),(8,1),(1,1),(6,3),(1,7),(7,7)]

# Tasks = ["B.S", "B.-S", "S.-B", "-(B+S)", "B", "-B", "S", "-S", "B+S", "B+-S", "S+-B", "-(B.S)", "-BxorS", "BxorS"]
# Tasks_N = [1, 1, 2, 2, 2, 4, 3, 3, 4, 4, 5, 5, 3, 3]
Tasks = ["B", "S", "B+S", "B.S", "BxorS"]
Tasks_P = [[(1,8),(8,1)], [(8,1),(1,1),(6,3)], [(1,8),(8,1),(1,1),(6,3)], [(8,1)], [(1,8),(1,1),(6,3)]]

goals = []
if os.path.exists('./goals.h5'):
    goals = dd.io.load('goals.h5')

env = CollectEnv()
dqn_blue = load('./models/blue/model.dqn', env)
dqn_square = load('./models/crate/model.dqn', env)
if torch.cuda.is_available():
    dqn_blue.cuda()
    dqn_square.cuda()
dqn_not_blue = ComposedDQN([dqn_blue], compose="not")
dqn_not_square = ComposedDQN([dqn_square], compose="not")
dqn_or = ComposedDQN([dqn_blue,dqn_square], compose="or")
dqn_not_or = ComposedDQN([dqn_or], compose="not")
dqn_and = ComposedDQN([dqn_blue,dqn_square], compose="and")
dqn_not_and = ComposedDQN([dqn_and], compose="not")
dqn_xor = ComposedDQN([dqn_or,dqn_not_and], compose="and")
    
def evaluate(name='or', max_trajectory = 20):    
            
Beispiel #5
0
from gym_repoman.envs import CollectEnv
from wrappers import WarpFrame
import scipy.misc

if __name__ == '__main__':

    start_positions = {
        'player': (3, 4),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }
    env = WarpFrame(
        CollectEnv(start_positions=start_positions,
                   goal_condition=lambda x: x.colour == 'purple' or x.colour ==
                   'blue'))

    env.reset()
    image = env.render()

    scipy.misc.imsave('map.png', image)
    return positions


if __name__ == '__main__':
    start_positions = {
        'player': (3, 4),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }
    env = WarpFrame(
        CollectEnv(start_positions=start_positions,
                   goal_condition=lambda x: x.shape == 'square' and x.colour ==
                   'blue'))

    dqn_blue = load('../../models/blue/model.dqn', env)
    dqn_crate = load('../../models/crate/model.dqn', env)
    dqn = ComposedDQN([dqn_blue, dqn_crate], [1, 1], or_compose=False)

    values = np.zeros_like(env.env.board, dtype=float)
    for pos in env.env.free_spaces:
        positions = copy.deepcopy(start_positions)

        positions = remove(positions, pos)

        positions['player'] = pos
        env = WarpFrame(
            CollectEnv(start_positions=positions,
Experiment for approximating blue AND square
"""

from gym.wrappers import Monitor

from dqn import ComposedDQN, get_action
from gym_repoman.envs import CollectEnv
from trainer import load
from wrappers import WarpFrame, MaxLength

if __name__ == '__main__':

    max_episodes = 50000
    max_trajectory = 50

    task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square')),
                     max_trajectory)

    dqn_blue_crate = load('./models/blue_crate/model.dqn', task)
    dqn_blue = load('./models/blue/model.dqn', task)
    dqn_crate = load('./models/crate/model.dqn', task)

    dqn_composed_or = ComposedDQN([dqn_blue, dqn_crate])
    dqn_composed_and = ComposedDQN([dqn_blue, dqn_crate], or_compose=False)

    for dqn, name in [(dqn_blue_crate, 'blue_crate'), (dqn_composed_or, 'blue_or_crate'),
                      (dqn_composed_and, 'blue_and_crate')]:

        env = Monitor(task, './experiment_approx_and/' + name + '/', video_callable=False, force=True)
        for episode in range(max_episodes):
            if episode % 1000 == 0:
    def exp(name='or',
            save_trajectories=True,
            max_episodes=4,
            max_trajectory=7):

        env = CollectEnv()
        dqn_blue = load('./models/blue/model.dqn', env)
        dqn_crate = load('./models/crate/model.dqn', env)
        if torch.cuda.is_available():
            dqn_blue.cuda()
            dqn_crate.cuda()

        dqn_not = ComposedDQN([dqn_blue], compose="not")
        dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or")
        dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and")
        dqn_not_and = ComposedDQN([dqn_and], compose="not")
        dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and")

        goals = []
        if os.path.exists('./goals.h5'):
            goals = dd.io.load('goals.h5')

        if name == 'blue':
            dqn = dqn_blue
            goal_condition = lambda x: x.colour == 'blue'
        elif name == 'square':
            dqn = dqn_crate
            goal_condition = lambda x: x.shape == 'square'
        elif name == 'not':
            dqn = dqn_not
            goal_condition = lambda x: not x.colour == 'blue'
        elif name == 'or':
            dqn = dqn_or
            goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square'
        elif name == 'and':
            dqn = dqn_and
            goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square'
        elif name == 'xor':
            dqn = dqn_xor
            goal_condition = lambda x: (
                x.colour == 'blue' or x.shape == 'square') and not (
                    x.colour == 'blue' and x.shape == 'square')
        # else:
        #     print("Invalid name")
        #     return

        # env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=lambda x: True)), max_trajectory)

        poss = [(3, 4), (1, 2), (5, 7), (5, 2)]
        trajectories = []
        with torch.no_grad():
            episode = 0
            while episode < max_episodes:
                start_positions['crate_beige'] = poss[episode]
                env = (WarpFrame(
                    CollectEnv(start_positions=start_positions,
                               changePlayerPos=False,
                               goal_condition=lambda x: True)))
                obs = env.reset()

                trajectory = []
                for _ in range(max_trajectory):
                    trajectory.append(
                        Image.fromarray(np.uint8(env.render(mode='rgb_img'))))

                    obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn_blue(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    valuesb = values.data.max(0)[0]
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn_crate(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    valuess = values.data.max(0)[0]
                    values = torch.stack((valuesb, valuess), 0).min(0)[0]
                    action = values.max(0)[1].item()
                    obs, reward, done, _ = env.step(action)
                    if done:
                        break
                trajectories += trajectory[:-1]
                episode += 1

        if save_trajectories:
            trajectories[0].save('./trajectories/' + name + '.gif',
                                 save_all=True,
                                 append_images=trajectories[1:],
                                 optimize=False,
                                 duration=250,
                                 loop=0)
from gym_repoman.envs import CollectEnv
from wrappers import WarpFrame
import imageio


if __name__ == '__main__':


    start_positions = {'crate_beige': (3, 4),
                       'player': (6, 3),
                       'circle_purple': (7, 7),
                       'circle_beige': (1, 7),
                       'crate_blue': (1, 1),
                       'crate_purple': (8, 1),
                       'circle_blue': (1, 8)}
    env = WarpFrame(CollectEnv(start_positions=start_positions, changePlayerPos=False,
                               goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue'))

    env.reset()
    image = env.render()

    imageio.imwrite('map.png', image)

Beispiel #10
0
    def step(self, action):
        ob, reward, done, info = self.env.step(action)
        self.steps += 1
        if self.steps == self.max_length:
            done = True
        return ob, reward, done, info


if __name__ == '__main__':

    max_iterations = 80
    max_episodes = 100
    max_trajectory = 50

    task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
                                                                   or (x.colour == 'purple' and x.shape == 'circle'))),
                     max_trajectory)
    env = Monitor(task, './experiment_weighted_or/', video_callable=False, force=True)

    dqn_purple_circle = load('./models/purple_circle/model.dqn', task)  # entropy regularised functions
    dqn_beige_crate = load('./models/beige_crate/model.dqn', task)  # entropy regularised functions
    weights = np.arange(1/3, 3.01, 0.05)

    tally = {i: [] for i in range(len(weights))}

    for iter in range(max_iterations):
        for i, weight in enumerate(weights):
            collected_count = [0, 0]
            weight = 1
            dqn_composed = ComposedDQN([dqn_beige_crate, dqn_purple_circle], [weight, 1])
            for episode in range(max_episodes):
Beispiel #11
0
        positions[found] = None
    return positions


if __name__ == '__main__':

    start_positions = {'player': (3, 4),
                       'crate_purple': (6, 3),
                       'circle_purple': (7, 7),
                       'circle_beige': (1, 7),
                       'crate_beige': (2, 2),
                       'crate_blue': (8, 1),
                       'circle_blue': (2, 8)}

    env = WarpFrame(CollectEnv(start_positions=start_positions,
                               goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
                                                        or (x.colour == 'purple' and x.shape == 'circle')))

    dqn_purple_circle = load('../../models/purple_circle/model.dqn', env)
    dqn_beige_crate = load('../../models/beige_crate/model.dqn', env)
    dqn = ComposedDQN([dqn_purple_circle, dqn_beige_crate], [3, 2])  # TODO put weights here!

    values = np.zeros_like(env.env.board, dtype=float)
    for pos in env.env.free_spaces:
        positions = copy.deepcopy(start_positions)

        positions = remove(positions, pos)

        positions['player'] = pos
        env = WarpFrame(CollectEnv(start_positions=positions,
                                   goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
Beispiel #12
0
import torch
from gym.wrappers import Monitor

from dqn import ComposedDQN, FloatTensor, get_action
from trainer import load
from gym_repoman.envs import CollectEnv
from wrappers import WarpFrame, MaxLength

if __name__ == '__main__':

    max_episodes = 50000
    max_trajectory = 50

    task = MaxLength(
        WarpFrame(
            CollectEnv(goal_condition=lambda x: x.colour == 'blue' or x.colour
                       == 'purple')), max_trajectory)

    dqn_blue = load('./models/blue/model.dqn', task)
    dqn_purple = load('./models/purple/model.dqn', task)
    dqn_composed = ComposedDQN([dqn_blue, dqn_purple], [1, 1])

    for dqn, name in [(dqn_blue, 'blue'), (dqn_purple, 'purple'),
                      (dqn_composed, 'composed')]:
        env = Monitor(task,
                      './experiment_or/' + name + '/',
                      video_callable=False,
                      force=True)
        for episode in range(max_episodes):
            if episode % 1000 == 0:
                print(episode)
            obs = env.reset()
Beispiel #13
0
    def exp(name='or',
            save_trajectories=True,
            max_episodes=4,
            max_trajectory=20):

        env = CollectEnv()
        dqn_purple = load('./models/purple/model.dqn', env)
        dqn_blue = load('./models/blue/model.dqn', env)
        dqn_crate = load('./models/crate/model.dqn', env)
        if torch.cuda.is_available():
            dqn_purple.cuda()
            dqn_blue.cuda()
            dqn_crate.cuda()

        dqn_max = ComposedDQN([dqn_purple, dqn_blue, dqn_crate], compose="or")
        dqn_not = ComposedDQN([dqn_blue], dqn_max=dqn_max, compose="not")
        dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or")
        dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and")
        dqn_not_and = ComposedDQN([dqn_and], dqn_max=dqn_max, compose="not")
        dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and")

        goals = []
        if os.path.exists('./goals.h5'):
            goals = dd.io.load('goals.h5')

        if name == 'blue':
            dqn = dqn_blue
            goal_condition = lambda x: x.colour == 'blue'
        elif name == 'purple':
            dqn = dqn_purple
            goal_condition = lambda x: x.colour == 'purple'
        elif name == 'square':
            dqn = dqn_crate
            goal_condition = lambda x: x.shape == 'square'
        if name == 'not':
            dqn = dqn_not
            goal_condition = lambda x: not x.colour == 'blue'
        elif name == 'or':
            dqn = dqn_or
            goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square'
        elif name == 'and':
            dqn = dqn_and
            goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square'
        elif name == 'xor':
            dqn = dqn_xor
            goal_condition = lambda x: (
                x.colour == 'blue' or x.shape == 'square') and not (
                    x.colour == 'blue' and x.shape == 'square')
        else:
            print("Invalid name")
            return

        env = MaxLength(WarpFrame(CollectEnv(goal_condition=goal_condition)),
                        max_trajectory)

        trajectories = []
        with torch.no_grad():
            episode = 0
            while episode < max_episodes:
                obs = env.reset()
                trajectory = []
                for _ in range(max_trajectory):
                    trajectory.append(
                        Image.fromarray(np.uint8(env.render(mode='rgb_img'))))

                    obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    action = values.data.max(0)[0].max(0)[1].item()
                    obs, reward, done, _ = env.step(action)
                    if done:
                        episode += 1
                        trajectories += trajectory[:-1]
                        break

        if save_trajectories:
            trajectories[0].save('./trajectories/' + name + '.gif',
                                 save_all=True,
                                 append_images=trajectories[1:],
                                 optimize=False,
                                 duration=250,
                                 loop=0)