def learn(colour, shape, condition): name = colour + shape base_path = './models/{}/'.format(name) env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=condition)) agent = train(base_path, env) save(base_path + 'model.dqn', agent)
def experiment(task=None, save_trajectories=True, max_episodes=4, max_trajectory=20): exp = task_exp(base_tasks, task, len(all_goals), list(models.keys())) dqn = exp_EVF(exp, models) goals = all_goals[task == 1] goal_condition = lambda x: ((x.colour, x.shape) in goals) env = WarpFrame(CollectEnv(goal_condition=goal_condition)) trajectories = [] with torch.no_grad(): episode = 0 while episode < max_episodes: obs = env.reset() trajectory = [] for _ in range(max_trajectory): trajectory.append( Image.fromarray(np.uint8(env.render(mode='rgb_img')))) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in mgoals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn(x).squeeze(0)) values = torch.stack(values, 1).t() action = values.data.max(0)[0].max(0)[1].item() obs, reward, done, _ = env.step(action) if done: episode += 1 trajectories += trajectory[:-1] break if save_trajectories: exp = str(exp.simplify()).replace('~', 'NOT ') exp = exp.replace('|', ' OR ') exp = exp.replace('&', ' AND ') trajectories[0].save('./trajectories/all_/' + exp + '.gif', save_all=True, append_images=trajectories[1:], optimize=False, duration=250, loop=0)
def evaluate(name='or', max_trajectory = 20): if name == 'B.S': dqn = dqn_and goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'B.-S': dqn = ComposedDQN([dqn_blue,dqn_not_square], compose="and") goal_condition=lambda x: x.colour == 'blue' and not x.shape == 'square' elif name == 'S.-B': dqn = ComposedDQN([dqn_square,dqn_not_blue], compose="and") goal_condition=lambda x: x.shape == 'square' and not x.colour == 'blue' elif name == '-(B+S)': dqn = dqn_not_or goal_condition=lambda x: not (x.colour == 'blue' or x.shape == 'square') elif name == 'B': dqn = dqn_blue goal_condition=lambda x: x.colour == 'blue' elif name == '-B': dqn = dqn_not_blue goal_condition=lambda x: not x.colour == 'blue' elif name == 'S': dqn = dqn_square goal_condition=lambda x: x.shape == 'square' elif name == '-S': dqn = dqn_not_square goal_condition=lambda x: not x.shape == 'square' elif name == 'B+S': dqn = dqn_or goal_condition=lambda x: x.colour == 'blue' or x.shape == 'square' elif name == 'B+-S': dqn = ComposedDQN([dqn_blue,dqn_not_square], compose="or") goal_condition=lambda x: x.colour == 'blue' or not x.shape == 'square' elif name == 'S+-B': dqn = ComposedDQN([dqn_square,dqn_not_blue], compose="or") goal_condition=lambda x: x.shape == 'square' or not x.colour == 'blue' elif name == '-(B.S)': dqn = dqn_not_and goal_condition=lambda x: not (x.colour == 'blue' and x.shape == 'square') elif name == '-BxorS': dqn = ComposedDQN([dqn_xor], compose="not") goal_condition=lambda x: not((x.colour == 'blue' or x.shape == 'square') and not (x.colour == 'blue' and x.shape == 'square')) elif name == 'BxorS': dqn = dqn_xor goal_condition=lambda x: (x.colour == 'blue' or x.shape == 'square') and not (x.colour == 'blue' and x.shape == 'square') else: print("Invalid name") return env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=goal_condition)), max_trajectory) G = 0 with torch.no_grad(): obs = env.reset() for _ in range(max_trajectory): obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in goals: goal = torch.from_numpy(np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs,goal),dim=3) values.append(dqn(x).squeeze(0)) values = torch.stack(values,1).t() action = values.data.max(0)[0].max(0)[1].item() obs, reward, done, _ = env.step(action) G += reward if done: break return G
'crate_blue': (1, 1), 'crate_purple': (8, 1), 'circle_blue': (1, 8)} all_goals = ['BC','BS','bS','PS','bC','PC'] all_goals_P = [(1,8),(8,1),(1,1),(6,3),(1,7),(7,7)] # Tasks = ["B.S", "B.-S", "S.-B", "-(B+S)", "B", "-B", "S", "-S", "B+S", "B+-S", "S+-B", "-(B.S)", "-BxorS", "BxorS"] # Tasks_N = [1, 1, 2, 2, 2, 4, 3, 3, 4, 4, 5, 5, 3, 3] Tasks = ["B", "S", "B+S", "B.S", "BxorS"] Tasks_P = [[(1,8),(8,1)], [(8,1),(1,1),(6,3)], [(1,8),(8,1),(1,1),(6,3)], [(8,1)], [(1,8),(1,1),(6,3)]] goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') env = CollectEnv() dqn_blue = load('./models/blue/model.dqn', env) dqn_square = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_blue.cuda() dqn_square.cuda() dqn_not_blue = ComposedDQN([dqn_blue], compose="not") dqn_not_square = ComposedDQN([dqn_square], compose="not") dqn_or = ComposedDQN([dqn_blue,dqn_square], compose="or") dqn_not_or = ComposedDQN([dqn_or], compose="not") dqn_and = ComposedDQN([dqn_blue,dqn_square], compose="and") dqn_not_and = ComposedDQN([dqn_and], compose="not") dqn_xor = ComposedDQN([dqn_or,dqn_not_and], compose="and") def evaluate(name='or', max_trajectory = 20):
from gym_repoman.envs import CollectEnv from wrappers import WarpFrame import scipy.misc if __name__ == '__main__': start_positions = { 'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue')) env.reset() image = env.render() scipy.misc.imsave('map.png', image)
return positions if __name__ == '__main__': start_positions = { 'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.shape == 'square' and x.colour == 'blue')) dqn_blue = load('../../models/blue/model.dqn', env) dqn_crate = load('../../models/crate/model.dqn', env) dqn = ComposedDQN([dqn_blue, dqn_crate], [1, 1], or_compose=False) values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame( CollectEnv(start_positions=positions,
Experiment for approximating blue AND square """ from gym.wrappers import Monitor from dqn import ComposedDQN, get_action from gym_repoman.envs import CollectEnv from trainer import load from wrappers import WarpFrame, MaxLength if __name__ == '__main__': max_episodes = 50000 max_trajectory = 50 task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square')), max_trajectory) dqn_blue_crate = load('./models/blue_crate/model.dqn', task) dqn_blue = load('./models/blue/model.dqn', task) dqn_crate = load('./models/crate/model.dqn', task) dqn_composed_or = ComposedDQN([dqn_blue, dqn_crate]) dqn_composed_and = ComposedDQN([dqn_blue, dqn_crate], or_compose=False) for dqn, name in [(dqn_blue_crate, 'blue_crate'), (dqn_composed_or, 'blue_or_crate'), (dqn_composed_and, 'blue_and_crate')]: env = Monitor(task, './experiment_approx_and/' + name + '/', video_callable=False, force=True) for episode in range(max_episodes): if episode % 1000 == 0:
def exp(name='or', save_trajectories=True, max_episodes=4, max_trajectory=7): env = CollectEnv() dqn_blue = load('./models/blue/model.dqn', env) dqn_crate = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_blue.cuda() dqn_crate.cuda() dqn_not = ComposedDQN([dqn_blue], compose="not") dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or") dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and") dqn_not_and = ComposedDQN([dqn_and], compose="not") dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and") goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') if name == 'blue': dqn = dqn_blue goal_condition = lambda x: x.colour == 'blue' elif name == 'square': dqn = dqn_crate goal_condition = lambda x: x.shape == 'square' elif name == 'not': dqn = dqn_not goal_condition = lambda x: not x.colour == 'blue' elif name == 'or': dqn = dqn_or goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square' elif name == 'and': dqn = dqn_and goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'xor': dqn = dqn_xor goal_condition = lambda x: ( x.colour == 'blue' or x.shape == 'square') and not ( x.colour == 'blue' and x.shape == 'square') # else: # print("Invalid name") # return # env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=lambda x: True)), max_trajectory) poss = [(3, 4), (1, 2), (5, 7), (5, 2)] trajectories = [] with torch.no_grad(): episode = 0 while episode < max_episodes: start_positions['crate_beige'] = poss[episode] env = (WarpFrame( CollectEnv(start_positions=start_positions, changePlayerPos=False, goal_condition=lambda x: True))) obs = env.reset() trajectory = [] for _ in range(max_trajectory): trajectory.append( Image.fromarray(np.uint8(env.render(mode='rgb_img')))) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn_blue(x).squeeze(0)) values = torch.stack(values, 1).t() valuesb = values.data.max(0)[0] values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn_crate(x).squeeze(0)) values = torch.stack(values, 1).t() valuess = values.data.max(0)[0] values = torch.stack((valuesb, valuess), 0).min(0)[0] action = values.max(0)[1].item() obs, reward, done, _ = env.step(action) if done: break trajectories += trajectory[:-1] episode += 1 if save_trajectories: trajectories[0].save('./trajectories/' + name + '.gif', save_all=True, append_images=trajectories[1:], optimize=False, duration=250, loop=0)
from gym_repoman.envs import CollectEnv from wrappers import WarpFrame import imageio if __name__ == '__main__': start_positions = {'crate_beige': (3, 4), 'player': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_blue': (1, 1), 'crate_purple': (8, 1), 'circle_blue': (1, 8)} env = WarpFrame(CollectEnv(start_positions=start_positions, changePlayerPos=False, goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue')) env.reset() image = env.render() imageio.imwrite('map.png', image)
def step(self, action): ob, reward, done, info = self.env.step(action) self.steps += 1 if self.steps == self.max_length: done = True return ob, reward, done, info if __name__ == '__main__': max_iterations = 80 max_episodes = 100 max_trajectory = 50 task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square') or (x.colour == 'purple' and x.shape == 'circle'))), max_trajectory) env = Monitor(task, './experiment_weighted_or/', video_callable=False, force=True) dqn_purple_circle = load('./models/purple_circle/model.dqn', task) # entropy regularised functions dqn_beige_crate = load('./models/beige_crate/model.dqn', task) # entropy regularised functions weights = np.arange(1/3, 3.01, 0.05) tally = {i: [] for i in range(len(weights))} for iter in range(max_iterations): for i, weight in enumerate(weights): collected_count = [0, 0] weight = 1 dqn_composed = ComposedDQN([dqn_beige_crate, dqn_purple_circle], [weight, 1]) for episode in range(max_episodes):
positions[found] = None return positions if __name__ == '__main__': start_positions = {'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8)} env = WarpFrame(CollectEnv(start_positions=start_positions, goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square') or (x.colour == 'purple' and x.shape == 'circle'))) dqn_purple_circle = load('../../models/purple_circle/model.dqn', env) dqn_beige_crate = load('../../models/beige_crate/model.dqn', env) dqn = ComposedDQN([dqn_purple_circle, dqn_beige_crate], [3, 2]) # TODO put weights here! values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame(CollectEnv(start_positions=positions, goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
import torch from gym.wrappers import Monitor from dqn import ComposedDQN, FloatTensor, get_action from trainer import load from gym_repoman.envs import CollectEnv from wrappers import WarpFrame, MaxLength if __name__ == '__main__': max_episodes = 50000 max_trajectory = 50 task = MaxLength( WarpFrame( CollectEnv(goal_condition=lambda x: x.colour == 'blue' or x.colour == 'purple')), max_trajectory) dqn_blue = load('./models/blue/model.dqn', task) dqn_purple = load('./models/purple/model.dqn', task) dqn_composed = ComposedDQN([dqn_blue, dqn_purple], [1, 1]) for dqn, name in [(dqn_blue, 'blue'), (dqn_purple, 'purple'), (dqn_composed, 'composed')]: env = Monitor(task, './experiment_or/' + name + '/', video_callable=False, force=True) for episode in range(max_episodes): if episode % 1000 == 0: print(episode) obs = env.reset()
def exp(name='or', save_trajectories=True, max_episodes=4, max_trajectory=20): env = CollectEnv() dqn_purple = load('./models/purple/model.dqn', env) dqn_blue = load('./models/blue/model.dqn', env) dqn_crate = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_purple.cuda() dqn_blue.cuda() dqn_crate.cuda() dqn_max = ComposedDQN([dqn_purple, dqn_blue, dqn_crate], compose="or") dqn_not = ComposedDQN([dqn_blue], dqn_max=dqn_max, compose="not") dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or") dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and") dqn_not_and = ComposedDQN([dqn_and], dqn_max=dqn_max, compose="not") dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and") goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') if name == 'blue': dqn = dqn_blue goal_condition = lambda x: x.colour == 'blue' elif name == 'purple': dqn = dqn_purple goal_condition = lambda x: x.colour == 'purple' elif name == 'square': dqn = dqn_crate goal_condition = lambda x: x.shape == 'square' if name == 'not': dqn = dqn_not goal_condition = lambda x: not x.colour == 'blue' elif name == 'or': dqn = dqn_or goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square' elif name == 'and': dqn = dqn_and goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'xor': dqn = dqn_xor goal_condition = lambda x: ( x.colour == 'blue' or x.shape == 'square') and not ( x.colour == 'blue' and x.shape == 'square') else: print("Invalid name") return env = MaxLength(WarpFrame(CollectEnv(goal_condition=goal_condition)), max_trajectory) trajectories = [] with torch.no_grad(): episode = 0 while episode < max_episodes: obs = env.reset() trajectory = [] for _ in range(max_trajectory): trajectory.append( Image.fromarray(np.uint8(env.render(mode='rgb_img')))) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn(x).squeeze(0)) values = torch.stack(values, 1).t() action = values.data.max(0)[0].max(0)[1].item() obs, reward, done, _ = env.step(action) if done: episode += 1 trajectories += trajectory[:-1] break if save_trajectories: trajectories[0].save('./trajectories/' + name + '.gif', save_all=True, append_images=trajectories[1:], optimize=False, duration=250, loop=0)