def test_deep_q_all(self): self.config_test() self.build_cnn(self.num_actions) restorer = tf.train.Saver() total_rewards_ph = tf.placeholder(tf.int64) tf.summary.scalar('test/total_reward', total_rewards_ph) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(logdir=self.save_dir) sess = tf.Session() ckpt_list = list_getter(self.save_dir, 'index', 'model') for ckpt in ckpt_list: restorer.restore(sess, ckpt.replace('.index', '')) total_reward = 0 for episode in range(self.num_repeat_episode): state = self.initialize_game() for step in range(self.max_step_per_episode): feed = {self.state: np.expand_dims(state, axis=0), self.is_train: False} Qs = sess.run(self.output, feed_dict=feed) action = np.argmax(Qs) angle = action * 4 + 20 state, is_end, reward = self.take_action(angle) if is_end: break ckpt_num = int(ckpt.split('-')[-1].split('.')[0]) print(ckpt_num) average_total_reward = float(total_reward) / float(self.num_repeat_episode) summary_writer.add_summary(sess.run(summary_op, {total_rewards_ph: average_total_reward}), ckpt_num) sess.close()
def test_deep_q(self): raise NotImplemented('this method should be modified') self.config_test() self.build_cnn(self.num_actions) restorer = tf.train.Saver() total_rewards_ph = tf.placeholder(tf.int64) tf.summary.scalar('test/total_reward', total_rewards_ph) sess = tf.Session() ckpt = list_getter(self.save_dir, 'index', 'model')[-1] ckpt = ckpt.replace('.index', '') restorer.restore(sess, ckpt.replace('.index', '')) total_reward = 0 for episode in range(self.num_repeat_episode): self.initialize_game() frame, _, _ = self.take_action( np.random.randint(0, self.num_actions), 0) state_queue = deque([frame for _ in range(self.state_stack_num)], maxlen=self.state_stack_num) for step in range(self.max_step_per_episode): state = np.stack(state_queue, axis=2) action = np.argmax( sess.run( self.prob_distribution, { self.state: np.expand_dims(state, axis=0), self.is_train: False })) frame, is_end, reward = self.take_action(action, step) # if is_end: # break state_queue.append(frame) total_reward += reward sess.close() average_total_reward = float(total_reward) / float( self.num_repeat_episode) print("average total reward: %d" % average_total_reward)
def test_deep_q(self): self.config_test() self.build_cnn(self.num_actions) restorer = tf.train.Saver() total_rewards_ph = tf.placeholder(tf.int64) tf.summary.scalar('test/total_reward', total_rewards_ph) sess = tf.Session() ckpt = list_getter(self.save_dir, 'index', 'model')[-1] ckpt = ckpt.replace('.index', '') restorer.restore(sess, ckpt.replace('.index', '')) total_reward = 0 for episode in range(self.num_repeat_episode): self.initialize_game() state_queue = deque(maxlen=self.state_stack_num) # iterate to fill get very first state for i in range(self.state_stack_num): frame, _, _ = self.take_action(0) state_queue.append(frame) for step in range(self.max_step_per_episode): state = np.stack(state_queue, axis=2) action = np.argmax( sess.run( self.output, { self.state: np.expand_dims(state, axis=0), self.is_train: False })) frame, is_end, reward = self.take_action(action) # if is_end: # break state_queue.append(frame) total_reward += reward sess.close() average_total_reward = float(total_reward) / float( self.num_repeat_episode) print("average total reward: %d" % average_total_reward)
def _input_from_image(self): def inspect_file_extension(target_list): extensions = list( set([ os.path.basename(img_name).split(".")[-1] for img_name in target_list ])) if len(extensions) > 1: raise ValueError("Multiple image formats are used:") elif len(extensions) == 0: raise ValueError("no image files exist") def inspect_pairness(list1, list2): if not len(list1) == len(list2): raise ValueError("number of images are different") for file1, file2 in zip(list1, list2): file1_name = os.path.basename(file1).split(".")[-2] file2_name = os.path.basename(file2).split(".")[-2] if not file1_name == file2_name: raise ValueError("image names are different: %s | %s" % (file2, file1)) img_list = list_getter(self.config.img_dir, "jpg") img_list_tensor = tf.convert_to_tensor(img_list, dtype=tf.string) img_data = tf.data.Dataset.from_tensor_slices(img_list_tensor) if self.config.phase == "eval": gt_list = list_getter(self.seg_dir, "png") inspect_pairness(gt_list, img_list) inspect_file_extension(gt_list) inspect_file_extension(img_list) gt_list_tensor = tf.convert_to_tensor(gt_list, dtype=tf.string) gt_data = tf.data.Dataset.from_tensor_slices(gt_list_tensor) data = tf.data.Dataset.zip((img_data, gt_data)) data = data.map(self._image_gt_parser, 4).batch(self.config.batch_size, False) else: data = img_data.map(self._image_parser, 4).batch(self.config.batch_size, False) data = data.prefetch(4) # tf.data_pipeline.experimental.AUTOTUNE iterator = data.make_initializable_iterator() dataset = iterator.get_next() self.input_data = dataset["input_data"] self.gt = dataset["gt"] if self.config.phase == "eval" else None self.filename = dataset["filename"] self.data_init = iterator.initializer
def build(self): tfrecord_list = list_getter(self.tfrecord_dir, extension="tfrecord") if not tfrecord_list: raise ValueError("tfrecord does not exist: %s" % self.tfrecord_dir) data = tf.data.TFRecordDataset(tfrecord_list, num_parallel_reads=auto) data = data.map(self._tfrecord_parser, auto) if self.is_train_set: data = data.shuffle(self.batch_size * 10) data = data.prefetch(auto) data = data.batch(self.batch_size, drop_remainder=self.is_train_set) return data
def _get_ckpt_in_range(self): all_ckpt_list = [_.split(".index")[0] for _ in list_getter(self.config.model_dir, 'index')] ckpt_pattern = './model/checkpoints/model_step-%d' if self.config.ckpt_start == 'beginning': start_idx = 0 else: start_idx = all_ckpt_list.index(ckpt_pattern % self.config.ckpt_start) if self.config.ckpt_end == 'end': end_idx = None else: end_idx = all_ckpt_list.index(ckpt_pattern % self.config.ckpt_end) + 1 return all_ckpt_list[start_idx:end_idx:self.config.ckpt_step]
def _build(self): tfrecord_list = list_getter(self._src_dir, "tfrecord") if not tfrecord_list: raise ValueError("tfrecord is not given") data = tf.data.TFRecordDataset(tfrecord_list) data = data.shuffle(self._batch * 10) data = data.map(self._parser, 4).batch(self._batch, True) data = data.prefetch(2) iterator = data.make_initializable_iterator() data_batched = iterator.get_next() self.image = data_batched["image"] self.gt = data_batched["gt"] self.data_init = iterator.initializer
def test_deep_q_all(self): self.config_test() self.build_cnn(self.num_actions) restorer = tf.train.Saver() total_rewards_ph = tf.placeholder(tf.int64) tf.summary.scalar('test/total_reward', total_rewards_ph) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(logdir=self.save_dir) sess = tf.Session() ckpt_list = list_getter(self.save_dir, 'index', 'model') for ckpt in ckpt_list: ckpt = ckpt.replace('.index', '') restorer.restore(sess, ckpt.replace('.index', '')) total_reward = 0 for episode in range(self.num_repeat_episode): self.initialize_game() state_queue = deque(maxlen=self.state_stack_num) # iterate to fill get very first state for i in range(self.state_stack_num): frame, _, _ = self.take_action(0) state_queue.append(frame) for step in range(self.max_step_per_episode): state = np.stack(state_queue, axis=2) action = np.argmax( sess.run( self.output, { self.state: np.expand_dims(state, axis=0), self.is_train: False })) if is_end: break frame, is_end, reward = self.take_action(action) state_queue.append(frame) total_reward += reward ckpt_num = int(ckpt.split('-')[-1]) print(ckpt_num) average_total_reward = float(total_reward) / float( self.num_repeat_episode) summary_writer.add_summary( sess.run(summary_op, {total_rewards_ph: average_total_reward}), ckpt_num) sess.close()
def _vis_with_video(self, sess): vid_list = list_getter(self.config.img_dir, ("avi", "mp4")) for vid_name in vid_list: vid = VideoCapture(vid_name) fps = round(vid.get(5)) should_continue, frame = vid.read() basename = os.path.basename(vid_name)[:-4] dst_name = self.config.vis_result_dir + "/" + basename + ".avi" h, w, _ = frame.shape pred = sess.run(self.pred, {self.input_data: np.expand_dims(frame, 0)}) superimposed = self._superimpose(frame, pred) vid_out = VideoWriter(dst_name, VideoWriter_fourcc(*"XVID"), fps, (w, h)) vid_out.write(superimposed.astype(np.uint8)) while should_continue: should_continue, frame = vid.read() if should_continue: pred = sess.run(self.pred, {self.input_data: np.expand_dims(frame, 0)}) superimposed = self._superimpose(frame, pred) vid_out.write(superimposed.astype(np.uint8)) vid_out.release()
def test_deep_q(self): self.config_test() self.build_cnn(self.num_actions) restorer = tf.train.Saver() sess = tf.Session() ckpt = list_getter(self.save_dir, 'index', 'model')[-1] restorer.restore(sess, ckpt.replace('.index', '')) total_reward = 0 for episode in range(self.num_repeat_episode): state = self.initialize_game() for step in range(self.max_step_per_episode): feed = {self.state: np.expand_dims(state, axis=0), self.is_train: False} Qs = sess.run(self.output, feed_dict=feed) action = np.argmax(Qs) angle = action * 4 + 20 state, is_end, reward = self.take_action(angle) total_reward += reward if is_end: break total_reward += reward print("average_total_reward =%.4f" % (float(total_reward) / float(self.num_repeat_episode))) sess.close()
def _get_ckpt(self): all_ckpt_list = [_.split(".index")[0] for _ in list_getter(self.config.model_dir, 'index')] ckpt_pattern = './model/checkpoints/model_step-%d' return all_ckpt_list[all_ckpt_list.index(ckpt_pattern % self.config.ckpt_id)]
def train_deep_q(self): self.config_train() self.build_cnn(self.num_actions) self.state_population = Queue() onehot_actions = tf.one_hot(self.actions, self.num_actions) q_value = tf.reduce_sum(tf.multiply(self.output, onehot_actions), axis=1) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.loss = tf.reduce_mean(tf.square(self.q_hat - q_value)) optm = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) self.optm = tf.group([optm, update_ops]) saver = tf.train.Saver(max_to_keep=1000) total_reward_ph = tf.placeholder(tf.int64) tf.summary.scalar('train/reward_gain', total_reward_ph) action_ph = tf.placeholder(tf.int64) tf.summary.histogram('train/action', action_ph) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(logdir=self.save_dir) sess = tf.Session() length = 0 ckpt_list = list_getter(self.save_dir, 'index', 'model') if not ckpt_list: sess.run(tf.global_variables_initializer()) self.fill_state_population() ep_start = 0 else: length = 999999 saver.restore(sess, ckpt_list[-1].replace('.index', '')) self.fill_state_population(sess, True) ep_start = int(ckpt_list[-1].split('-')[-1].split('.')[0]) for episode in range(ep_start + 1, self.train_episodes): tic = time.time() action_record = [] total_reward = 0.0 current_state = self.initialize_game() # Note: # current_state[:, :, 1] = next_state[:, :, 0] # current_state[:, :, 2] = next_state[:, :, 1] # current_state[:, :, 3] = next_state[:, :, 2] for step in range(self.max_step_per_episode): length += 1 # Explore or Exploit explore_prob = self.explore_stop + \ (self.explore_start - self.explore_stop) * \ np.exp(-self.decay_rate * length) if explore_prob > np.random.rand(): # explore and get random action action = random.randint(0, 35) # min and max shooting angle else: # Get action from the model feed = {self.state: np.expand_dims(current_state, axis=0), self.is_train: False} Qs = sess.run(self.output, feed_dict=feed) action = np.argmax(Qs) action_record.append(action) angle = action * 4 + 20 # angle changes by 4, minimum of 20 maxinum of 160 next_state, is_end, reward = self.take_action(angle) if is_end: self.state_population.add((current_state, action, reward, next_state)) break total_reward += reward self.state_population.add((current_state, action, reward, next_state)) current_state = next_state # Sample mini-batch from state_queue batch = self.state_population.sample(self.batch_size) current_state_batch = np.array([each[0] for each in batch]) actions_batch = np.array([each[1] for each in batch]) rewards_batch = np.array([each[2] for each in batch]) next_state_batch = np.array([each[3] for each in batch]) # Q values for the next_state, which is going to be our target Q target_Qs = sess.run(self.output, feed_dict={self.state: next_state_batch, self.is_train: True}) end_game_index = rewards_batch < 0 target_Qs[end_game_index] = np.zeros(self.num_actions) q_hat = rewards_batch + self.gamma * np.max(target_Qs, axis=1) loss, _ = sess.run([self.loss, self.optm], feed_dict={self.state: current_state_batch, self.q_hat: q_hat, self.actions: actions_batch, self.is_train: True}) print('Episode: {},'.format(episode), 'total_reward: {:.4f},'.format(total_reward), 'explor prob: {:.4f}'.format(explore_prob), 'duration: {:.4f}'.format(time.time() - tic)) summary_writer.add_summary(sess.run(summary_op, {total_reward_ph: total_reward, action_ph: action_record}), episode) if episode % 50 == 0: saver.save(sess, self.save_dir + '/model', episode)