def __init__(self, make_env_f, *args, gpus=get_available_gpus() * 4, input_queue=None, done_queue=None, **kwargs): self.sess = None if not gpus: gpus = ['/cpu:0'] with tf.Session() as sess: import gym_tensorflow ref_batch = gym_tensorflow.get_ref_batch(make_env_f, sess, 128) ref_batch = ref_batch[:, ...] if input_queue is None and done_queue is None: self.workers = [ RLEvalutionWorker(make_env_f, *args, ref_batch=ref_batch, **dict(kwargs, device=gpus[i])) for i in range(len(gpus)) ] self.model = self.workers[0].model self.steps_counter = sum([w.steps_counter for w in self.workers]) self.async_hub = AsyncTaskHub() self.hub = WorkerHub(self.workers, self.async_hub.input_queue, self.async_hub) else: fake_worker = RLEvalutionWorker(*args, **dict(kwargs, device=gpus[0])) self.model = fake_worker.model self.workers = [] self.hub = None self.steps_counter = tf.constant(0) self.async_hub = AsyncTaskHub(input_queue, done_queue)
def __init__(self, make_env_fs, *args, gpus=get_available_gpus() * 4, **kwargs): tlogger.info("=== Calling MTConcurrentWorkers()") self.sess = None if not gpus: gpus = ['/cpu:0'] print("GPUS: {}".format(gpus)) with tf.Session() as sess: import gym_tensorflow self.workers = [] for i in range(len(gpus)): # alternate between games for multi task learning if (i + 1) % 2 == 0: game_index = 1 # second game else: game_index = 0 # first game game_make_env = make_env_fs[game_index] ref_batch = gym_tensorflow.get_ref_batch( game_make_env, sess, 128, game_max_action_space=4) ref_batch = ref_batch[:, ...] worker = RLEvalutionWorkerCappedActionSpace( game_index, game_make_env, *args, ref_batch=ref_batch, **dict(kwargs, device=gpus[i])) self.workers.append(worker) self.model = self.workers[0].model self.steps_counter = sum([w.steps_counter for w in self.workers]) self.async_hub = AsyncTaskHub() self.hub = WorkerHub(self.workers, self.async_hub.input_queue, self.async_hub)
def main(game, filename=None, outfile=None, model_name="LargeModel", no_video=False, add_text=False, num_runs=RUNS, graph=None): seeds = default_seeds outvid = None viewer = None iteration = None state = None if filename: with open(filename, 'rb+') as file: state = pickle.load(file) #if hasattr(state, 'best_score'): # seeds = state.best_score.seeds # iteration = len(seeds) # print("Loading GA snapshot from best_score, iteration: ", len(seeds)) if hasattr(state, 'elite'): seeds = state.elite.seeds iteration = state.it print("Loading GA snapshot from elite, iteration: {} / {}", len(seeds), iteration) else: seeds = None iteration = state.it print("Loading ES snapshot, iteration: {}".format(state.it)) if outfile: pass fourcc = cv.VideoWriter_fourcc(*'MJPG') outvid = cv.VideoWriter(outfile, fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE)) env = gym_tensorflow.make(game, 1) model = get_model(model_name) obs_op = env.observation() reset_op = env.reset() if model.requires_ref_batch: def make_env(b): return gym_tensorflow.make(game=game, batch_size=1) with tf.Session() as sess: ref_batch = gym_tensorflow.get_ref_batch(make_env, sess, 128) ref_batch = ref_batch[:, ...] else: ref_batch = None action_op = model.make_net(tf.expand_dims(obs_op, axis=1), env.action_space, batch_size=1, ref_batch=ref_batch) if env.discrete_action: action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32) rew_op, done_op = env.step(action_op) if not no_video: from gym.envs.classic_control import rendering viewer = rendering.SimpleImageViewer() if hasattr(env.unwrapped, 'render'): obs_op = env.unwrapped.render() def display_obs(im): # pdb.set_trace() if im.shape[1] > 1: im = np.bitwise_or(im[0, 0, ...], im[0, 1, ...]) else: im = im[0, 0, ...] handle_frame(im, outvid, viewer, game, iteration, add_text) else: def display_obs(im): pdb.set_trace() im = im[0, :, :, -1] im = np.stack([im] * 3, axis=-1) im = (im * 255).astype(np.uint8) handle_frame(im, outvid, viewer, game, iteration, add_text) rewards = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) model.initialize() tlogger.info(model.description) import pdb pdb.set_trace() if seeds: noise = SharedNoiseTable() weights = model.compute_weights_from_seeds(noise, seeds) model.load(sess, 0, weights, seeds) else: weights = state.theta model.load(sess, 0, weights, (weights, 0)) if graph: saver.save(sess, graph) for i in range(num_runs): sess.run(reset_op) sess.run(obs_op) #recorder.capture_frame() display_obs(sess.run(obs_op)) total_rew = 0 num_frames = 0 while True: rew, done = sess.run([rew_op, done_op]) num_frames += 1 total_rew += rew[0] display_obs(sess.run(obs_op)) time.sleep(4 / 60) if done[0]: break rewards += [total_rew] print('Final reward: ', total_rew, 'after', num_frames, 'steps') print(rewards) print("Mean: ", np.mean(rewards)) print("Std: ", np.std(rewards)) if outvid: outvid.release()
def main(game, filename=None, out_dir=None, model_name='LargeModel', add_text=False, num_runs=RUNS, layer=None): seeds = default_seeds outvid = None viewer = None iteration = None state = None if filename: with open(filename, 'rb+') as file: state = pickle.load(file) #if hasattr(state, 'best_score'): # seeds = state.best_score.seeds # iteration = len(seeds) # print("Loading GA snapshot from best_score, iteration: ", len(seeds)) if hasattr(state, 'elite'): seeds = state.elite.seeds iteration = state.it print("Loading GA snapshot from elite, iteration: {} / {}".format(len(seeds), iteration)) else: seeds = None iteration = state.it print("Loading ES snapshot, iteration: {}", state.it) fourcc = cv.VideoWriter_fourcc(*'H264') env = gym_tensorflow.make(game, 1) model = get_model(model_name) obs_op = env.observation() reset_op = env.reset() if model.requires_ref_batch: def make_env(b): return gym_tensorflow.make(game=game, batch_size=1) with tf.Session() as sess: ref_batch = gym_tensorflow.get_ref_batch(make_env, sess, 128) ref_batch = ref_batch[:, ...] else: ref_batch = None input_op = tf.expand_dims(obs_op, axis=1) action_op = model.make_net(input_op, env.action_space, batch_size=1, ref_batch=ref_batch) if env.discrete_action: action_op = tf.argmax(action_op, axis=-1, output_type=tf.int32) rew_op, done_op = env.step(action_op) out_vids = {'all': cv.VideoWriter(os.path.join(out_dir, 'all.mp4'), fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE))} if hasattr(env.unwrapped, 'render'): obs_op = env.unwrapped.render() def display_obs(im, viz): # pdb.set_trace() if im.shape[1] > 1: im = np.bitwise_or(im[0, 0, ...], im[0, 1, ...]) else: im = im[0, 0, ...] for key in out_vids.keys(): im = combine_viz(im, viz, key) handle_frame(im, out_vids[key], viewer, game, iteration, add_text) else: def display_obs(im, viz): im = im[0, :, :, -1] im = np.stack([im] * 3, axis=-1) im = (im * 255).astype(np.uint8) for key in out_vids.keys(): im = combine_viz(im, viz, key) handle_frame(im, out_vids[key], viewer, game, iteration, add_text) rewards = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) model.initialize() tlogger.info(model.description) if seeds: noise = SharedNoiseTable() weights = model.compute_weights_from_seeds(noise, seeds) model.load(sess, 0, weights, seeds) else: weights = state.theta model.load(sess, 0, weights, (weights, 0)) success, images = get_nn_images(sess, input_op, model) for key in images.keys(): out_vids[key] = cv.VideoWriter( os.path.join(out_dir, '{}.mp4'.format(key.replace('/', '-'))), fourcc, 16, (VIDEO_SIZE, VIDEO_SIZE)) for i in range(num_runs): sess.run(reset_op) # recorder.capture_frame() total_rew = 0 num_frames = 0 while True: img = sess.run(obs_op) success, images = get_nn_images(sess, input_op, model) rew, done = sess.run([rew_op, done_op]) num_frames += 1 total_rew += rew[0] display_obs(img, images) # time.sleep(4/60) if done[0] or num_frames == 50: rewards += [total_rew] print('Final reward: ', total_rew, 'after', num_frames, 'steps') break print(rewards) print("Mean: ", np.mean(rewards)) print("Std: ", np.std(rewards)) for key in out_vids: out_vids[key].release()