def evaluate(name='or', max_trajectory = 20): if name == 'B.S': dqn = dqn_and goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'B.-S': dqn = ComposedDQN([dqn_blue,dqn_not_square], compose="and") goal_condition=lambda x: x.colour == 'blue' and not x.shape == 'square' elif name == 'S.-B': dqn = ComposedDQN([dqn_square,dqn_not_blue], compose="and") goal_condition=lambda x: x.shape == 'square' and not x.colour == 'blue' elif name == '-(B+S)': dqn = dqn_not_or goal_condition=lambda x: not (x.colour == 'blue' or x.shape == 'square') elif name == 'B': dqn = dqn_blue goal_condition=lambda x: x.colour == 'blue' elif name == '-B': dqn = dqn_not_blue goal_condition=lambda x: not x.colour == 'blue' elif name == 'S': dqn = dqn_square goal_condition=lambda x: x.shape == 'square' elif name == '-S': dqn = dqn_not_square goal_condition=lambda x: not x.shape == 'square' elif name == 'B+S': dqn = dqn_or goal_condition=lambda x: x.colour == 'blue' or x.shape == 'square' elif name == 'B+-S': dqn = ComposedDQN([dqn_blue,dqn_not_square], compose="or") goal_condition=lambda x: x.colour == 'blue' or not x.shape == 'square' elif name == 'S+-B': dqn = ComposedDQN([dqn_square,dqn_not_blue], compose="or") goal_condition=lambda x: x.shape == 'square' or not x.colour == 'blue' elif name == '-(B.S)': dqn = dqn_not_and goal_condition=lambda x: not (x.colour == 'blue' and x.shape == 'square') elif name == '-BxorS': dqn = ComposedDQN([dqn_xor], compose="not") goal_condition=lambda x: not((x.colour == 'blue' or x.shape == 'square') and not (x.colour == 'blue' and x.shape == 'square')) elif name == 'BxorS': dqn = dqn_xor goal_condition=lambda x: (x.colour == 'blue' or x.shape == 'square') and not (x.colour == 'blue' and x.shape == 'square') else: print("Invalid name") return env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=goal_condition)), max_trajectory) G = 0 with torch.no_grad(): obs = env.reset() for _ in range(max_trajectory): obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in goals: goal = torch.from_numpy(np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs,goal),dim=3) values.append(dqn(x).squeeze(0)) values = torch.stack(values,1).t() action = values.data.max(0)[0].max(0)[1].item() obs, reward, done, _ = env.step(action) G += reward if done: break return G
'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.shape == 'square' and x.colour == 'blue')) dqn_blue = load('../../models/blue/model.dqn', env) dqn_crate = load('../../models/crate/model.dqn', env) dqn = ComposedDQN([dqn_blue, dqn_crate], [1, 1], or_compose=False) values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame( CollectEnv(start_positions=positions, goal_condition=lambda x: x.shape == 'square' and x. colour == 'blue')) obs = env.reset() obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) v = get_value(dqn, obs)
# Tasks = ["B.S", "B.-S", "S.-B", "-(B+S)", "B", "-B", "S", "-S", "B+S", "B+-S", "S+-B", "-(B.S)", "-BxorS", "BxorS"] # Tasks_N = [1, 1, 2, 2, 2, 4, 3, 3, 4, 4, 5, 5, 3, 3] Tasks = ["B", "S", "B+S", "B.S", "BxorS"] Tasks_P = [[(1,8),(8,1)], [(8,1),(1,1),(6,3)], [(1,8),(8,1),(1,1),(6,3)], [(8,1)], [(1,8),(1,1),(6,3)]] goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') env = CollectEnv() dqn_blue = load('./models/blue/model.dqn', env) dqn_square = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_blue.cuda() dqn_square.cuda() dqn_not_blue = ComposedDQN([dqn_blue], compose="not") dqn_not_square = ComposedDQN([dqn_square], compose="not") dqn_or = ComposedDQN([dqn_blue,dqn_square], compose="or") dqn_not_or = ComposedDQN([dqn_or], compose="not") dqn_and = ComposedDQN([dqn_blue,dqn_square], compose="and") dqn_not_and = ComposedDQN([dqn_and], compose="not") dqn_xor = ComposedDQN([dqn_or,dqn_not_and], compose="and") def evaluate(name='or', max_trajectory = 20): if name == 'B.S': dqn = dqn_and goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'B.-S': dqn = ComposedDQN([dqn_blue,dqn_not_square], compose="and") goal_condition=lambda x: x.colour == 'blue' and not x.shape == 'square'
('beige', 'circle')} task = MaxLength(WarpFrame( MultiCollectEnv(lambda collected: targets.issubset({(c.colour, c.shape) for c in collected}), lambda x: (x.colour, x.shape) in targets)), max_trajectory) #agent = train('./models/temporal3/results', task) # 1 million #save('./models/temporal3/model.dqn', agent) dqn = load('./models/temporal3/model.dqn', task) # dqn trained on full task max_episodes = 50000 max_trajectory = 50 dqn1 = load('./models/purple/model.dqn', task) dqn2 = load('./models/blue/model.dqn', task) dqn3 = load('./models/beige/model.dqn', task) dqn_composed = ComposedDQN([dqn1, dqn2, dqn3]) for dqn, name in [(dqn, 'full_task'), (dqn_composed, 'composed')]: env = Monitor(task, './experiment_temporal/' + name + '/', video_callable=False, force=True) for episode in range(max_episodes): if episode % 1000 == 0: print(episode) obs = env.reset() for _ in range(max_trajectory): obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) action = get_action(dqn, obs) obs, reward, done, _ = env.step(action) env.render() if done: break
Tasks_N = [2, 2, 3, 4, 1, 3] goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') env = CollectEnv() dqn_purple = load('./models/purple/model.dqn', env) dqn_blue = load('./models/blue/model.dqn', env) dqn_square = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_purple.cuda() dqn_blue.cuda() dqn_square.cuda() max_evf = ComposedDQN([dqn_purple, dqn_blue, dqn_square]) dqn_not_blue = ComposedDQN([dqn_blue], dqn_max=max_evf, compose="not") dqn_not_square = ComposedDQN([dqn_square], dqn_max=max_evf, compose="not") dqn_or_purple = ComposedDQN([dqn_purple, dqn_blue], compose="or") dqn_or = ComposedDQN([dqn_blue, dqn_square], compose="or") dqn_not_or = ComposedDQN([dqn_or], dqn_max=max_evf, compose="not") dqn_and = ComposedDQN([dqn_blue, dqn_square], compose="and") dqn_not_and = ComposedDQN([dqn_and], dqn_max=max_evf, compose="not") dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and") def evaluate(name='or', save_trajectories=True, max_trajectory=20): if name == 'B.S': dqn = dqn_and goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square'
from trainer import load from wrappers import WarpFrame, MaxLength if __name__ == '__main__': max_episodes = 50000 max_trajectory = 50 task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square')), max_trajectory) dqn_blue_crate = load('./models/blue_crate/model.dqn', task) dqn_blue = load('./models/blue/model.dqn', task) dqn_crate = load('./models/crate/model.dqn', task) dqn_composed_or = ComposedDQN([dqn_blue, dqn_crate]) dqn_composed_and = ComposedDQN([dqn_blue, dqn_crate], or_compose=False) for dqn, name in [(dqn_blue_crate, 'blue_crate'), (dqn_composed_or, 'blue_or_crate'), (dqn_composed_and, 'blue_and_crate')]: env = Monitor(task, './experiment_approx_and/' + name + '/', video_callable=False, force=True) for episode in range(max_episodes): if episode % 1000 == 0: print(episode) obs = env.reset() for _ in range(max_trajectory): action = get_action(dqn, obs) obs, reward, done, _ = env.step(action) if done: break
def compose(dqns, weights): return ComposedDQN(dqns, weights)
CollectEnv(start_positions=start_positions, goal_condition=goal_condition))) goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') dqn_blue = load('./models/blue/model.dqn', env) dqn_purple = load('./models/purple/model.dqn', env) dqn_crate = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_blue.cuda() dqn_purple.cuda() dqn_crate.cuda() dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or") dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and") dqn_not_blue = ComposedDQN([dqn_blue], compose="not") dqn_not_crate = ComposedDQN([dqn_crate], compose="not") dqn_blue_not_crate = ComposedDQN([dqn_blue, dqn_not_crate], compose="and") dqn_crate_not_blue = ComposedDQN([dqn_not_blue, dqn_crate], compose="and") dqn_xor = ComposedDQN([dqn_blue_not_crate, dqn_crate_not_blue], compose="or") dqn = dqn_and name = 'and' if name == 'blue': goal_condition = lambda x: x.colour == 'blue' elif name == 'purple': goal_condition = lambda x: x.colour == 'purple' elif name == 'square':
def exp(name='or', save_trajectories=True, max_episodes=4, max_trajectory=7): env = CollectEnv() dqn_blue = load('./models/blue/model.dqn', env) dqn_crate = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_blue.cuda() dqn_crate.cuda() dqn_not = ComposedDQN([dqn_blue], compose="not") dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or") dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and") dqn_not_and = ComposedDQN([dqn_and], compose="not") dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and") goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') if name == 'blue': dqn = dqn_blue goal_condition = lambda x: x.colour == 'blue' elif name == 'square': dqn = dqn_crate goal_condition = lambda x: x.shape == 'square' elif name == 'not': dqn = dqn_not goal_condition = lambda x: not x.colour == 'blue' elif name == 'or': dqn = dqn_or goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square' elif name == 'and': dqn = dqn_and goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'xor': dqn = dqn_xor goal_condition = lambda x: ( x.colour == 'blue' or x.shape == 'square') and not ( x.colour == 'blue' and x.shape == 'square') # else: # print("Invalid name") # return # env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=lambda x: True)), max_trajectory) poss = [(3, 4), (1, 2), (5, 7), (5, 2)] trajectories = [] with torch.no_grad(): episode = 0 while episode < max_episodes: start_positions['crate_beige'] = poss[episode] env = (WarpFrame( CollectEnv(start_positions=start_positions, changePlayerPos=False, goal_condition=lambda x: True))) obs = env.reset() trajectory = [] for _ in range(max_trajectory): trajectory.append( Image.fromarray(np.uint8(env.render(mode='rgb_img')))) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn_blue(x).squeeze(0)) values = torch.stack(values, 1).t() valuesb = values.data.max(0)[0] values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn_crate(x).squeeze(0)) values = torch.stack(values, 1).t() valuess = values.data.max(0)[0] values = torch.stack((valuesb, valuess), 0).min(0)[0] action = values.max(0)[1].item() obs, reward, done, _ = env.step(action) if done: break trajectories += trajectory[:-1] episode += 1 if save_trajectories: trajectories[0].save('./trajectories/' + name + '.gif', save_all=True, append_images=trajectories[1:], optimize=False, duration=250, loop=0)
'player': (2, 1), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue')) dqn1 = load('../../models/purple/model.dqn', env) dqn2 = load('../../models/blue/model.dqn', env) dqn = ComposedDQN([dqn1, dqn2], [1, 1.1]) obs = env.reset() positions = list() positions.append(env.env.player.position) env.render() for _ in range(100): obs = np.array(obs) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) action = get_action(dqn, obs) obs, reward, done, _ = env.step(action) env.render() positions.append(env.env.player.position) if done: obs = env.reset()
task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square') or (x.colour == 'purple' and x.shape == 'circle'))), max_trajectory) env = Monitor(task, './experiment_weighted_or/', video_callable=False, force=True) dqn_purple_circle = load('./models/purple_circle/model.dqn', task) # entropy regularised functions dqn_beige_crate = load('./models/beige_crate/model.dqn', task) # entropy regularised functions weights = np.arange(1/3, 3.01, 0.05) tally = {i: [] for i in range(len(weights))} for iter in range(max_iterations): for i, weight in enumerate(weights): collected_count = [0, 0] weight = 1 dqn_composed = ComposedDQN([dqn_beige_crate, dqn_purple_circle], [weight, 1]) for episode in range(max_episodes): if episode % 1000 == 0: print(episode) obs = env.reset() for _ in range(max_trajectory): obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) # action = dqn_composed(Variable(obs, volatile=True)).data.max(1)[1].view(1, 1)[0][0] action = get_action(dqn_composed, obs) obs, reward, done, info = env.step(action) if done: collected = info['collected'] if len([c for c in collected if c.colour == 'beige' and c.shape == 'square']) > 0: collected_count[0] += 1 elif len([c for c in collected if c.colour == 'purple' and c.shape == 'circle']) > 0:
'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv( start_positions=start_positions, goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue')) dqn_blue = load('../../models/blue/model.dqn', env) dqn_purple = load('../../models/purple/model.dqn', env) dqn = ComposedDQN([dqn_blue, dqn_purple], [1, 1]) values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame( CollectEnv(start_positions=positions, goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue')) obs = env.reset() obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) v = get_value(dqn, obs)
'player': (5, 5), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.shape == 'square' and x.colour == 'blue')) dqn1 = load('../../models/crate/model.dqn', env) dqn2 = load('../../models/blue/model.dqn', env) dqn = ComposedDQN([dqn1, dqn2], [1, 1], or_compose=False) obs = env.reset() positions = list() positions.append(env.env.player.position) env.render() for _ in range(100): obs = np.array(obs) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) action = get_action(dqn, obs) obs, reward, done, _ = env.step(action) env.render() positions.append(env.env.player.position) if done: obs = env.reset()
env = WarpFrame( MultiCollectEnv(termination_condition=lambda collected: targets. issubset({(c.colour, c.shape) for c in collected}), reward_condition=lambda x: (x.colour, x.shape) in targets, start_positions=start_positions)) dqn1 = load('../../models/purple/model.dqn', env) # dqn2 = load('../../models/purple_circle/model.dqn', env) dqn3 = load('../../models/blue/model.dqn', env) dqn4 = load('../../models/beige/model.dqn', env) # dqn = ComposedDQN([dqn1, dqn2, dqn3, dqn4], [1,1,1,1]) #dqn1 = load('../../models/crate/model.dqn', env) #dqn2 = load('../../models/blue/model.dqn', env) dqn = ComposedDQN([dqn1, dqn3, dqn4]) obs = env.reset() positions = list() positions.append(env.env.player.position) env.render() for _ in range(100): obs = np.array(obs) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) action = get_action(dqn, obs) obs, reward, done, _ = env.step(action) env.render() positions.append(env.env.player.position) if done: obs = env.reset()
start_positions = {'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8)} env = WarpFrame(CollectEnv(start_positions=start_positions, goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square') or (x.colour == 'purple' and x.shape == 'circle'))) dqn_purple_circle = load('../../models/purple_circle/model.dqn', env) dqn_beige_crate = load('../../models/beige_crate/model.dqn', env) dqn = ComposedDQN([dqn_purple_circle, dqn_beige_crate], [3, 2]) # TODO put weights here! values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame(CollectEnv(start_positions=positions, goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square') or (x.colour == 'purple' and x.shape == 'circle'))) obs = env.reset() obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) v = get_value(dqn, obs)
def exp(name='or', save_trajectories=True, max_episodes=4, max_trajectory=20): env = CollectEnv() dqn_purple = load('./models/purple/model.dqn', env) dqn_blue = load('./models/blue/model.dqn', env) dqn_crate = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_purple.cuda() dqn_blue.cuda() dqn_crate.cuda() dqn_max = ComposedDQN([dqn_purple, dqn_blue, dqn_crate], compose="or") dqn_not = ComposedDQN([dqn_blue], dqn_max=dqn_max, compose="not") dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or") dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and") dqn_not_and = ComposedDQN([dqn_and], dqn_max=dqn_max, compose="not") dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and") goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') if name == 'blue': dqn = dqn_blue goal_condition = lambda x: x.colour == 'blue' elif name == 'purple': dqn = dqn_purple goal_condition = lambda x: x.colour == 'purple' elif name == 'square': dqn = dqn_crate goal_condition = lambda x: x.shape == 'square' if name == 'not': dqn = dqn_not goal_condition = lambda x: not x.colour == 'blue' elif name == 'or': dqn = dqn_or goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square' elif name == 'and': dqn = dqn_and goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'xor': dqn = dqn_xor goal_condition = lambda x: ( x.colour == 'blue' or x.shape == 'square') and not ( x.colour == 'blue' and x.shape == 'square') else: print("Invalid name") return env = MaxLength(WarpFrame(CollectEnv(goal_condition=goal_condition)), max_trajectory) trajectories = [] with torch.no_grad(): episode = 0 while episode < max_episodes: obs = env.reset() trajectory = [] for _ in range(max_trajectory): trajectory.append( Image.fromarray(np.uint8(env.render(mode='rgb_img')))) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn(x).squeeze(0)) values = torch.stack(values, 1).t() action = values.data.max(0)[0].max(0)[1].item() obs, reward, done, _ = env.step(action) if done: episode += 1 trajectories += trajectory[:-1] break if save_trajectories: trajectories[0].save('./trajectories/' + name + '.gif', save_all=True, append_images=trajectories[1:], optimize=False, duration=250, loop=0)