def main(): opt = parse_opt() use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') env = gym.make(game) seed = 7122 env.seed(seed) random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) agent = DQN(env, opt, device=device) agent.network.apply(weights_init) agent.sync_weight() progress = trange(opt.episode, ascii=True) summary = Summary() last_rewards = 0 for episode in progress: # Training state = env.reset() for s in range(opt.max_step): # use epsilon-greedy in training action = agent.egreedy_action(state) next_state, reward, done, _ = env.step(action) loss = agent.perceive(state, action, reward, next_state, done) state = next_state if done: break summary.add(episode, 'loss', loss) # Testing if opt.test_interval > 0 and (episode + 1) % opt.test_interval == 0: rewards = 0 for t in trange(opt.test, ascii=True, leave=False): state = env.reset() for s in range(opt.max_step): action = agent.action(state) next_state, reward, done, _ = env.step(action) state = next_state rewards += reward if done: break if opt.test > 0: rewards /= opt.test last_rewards = rewards summary.add(episode, 'reward', rewards) progress.set_description('Loss: {:.4f} | Reward: {:2}'.format( loss, last_rewards)) if opt.log: summary.write(opt.log)
class DQNTrainer: def __init__(self, level_filepath, episodes=30000, initial_epsilon=1., min_epsilon=0.1, exploration_ratio=0.5, max_steps=2000, render_freq=500, enable_render=True, render_fps=20, save_dir='checkpoints', enable_save=True, save_freq=500, gamma=0.99, batch_size=64, min_replay_memory_size=1000, replay_memory_size=100000, target_update_freq=5, seed=42): self.set_random_seed(seed) self.episodes = episodes self.max_steps = max_steps self.epsilon = initial_epsilon self.min_epsilon = min_epsilon self.exploration_ratio = exploration_ratio self.render_freq = render_freq self.enable_render = enable_render self.render_fps = render_fps self.save_dir = save_dir self.enable_save = enable_save self.save_freq = save_freq if enable_save and not os.path.exists(save_dir): os.makedirs(save_dir) level_loader = LevelLoader(level_filepath) self.agent = DQNAgent(level_loader.get_field_size(), gamma=gamma, batch_size=batch_size, min_replay_memory_size=min_replay_memory_size, replay_memory_size=replay_memory_size, target_update_freq=target_update_freq) self.env = Snake(level_loader) self.summary = Summary() self.current_episode = 0 self.max_average_length = 0 self.epsilon_decay = (initial_epsilon - min_epsilon) / (exploration_ratio * episodes) def set_random_seed(self, seed): random.seed(seed) np.random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) tf.set_random_seed(seed) def train(self): pbar = tqdm(initial=self.current_episode, total=self.episodes, unit='episodes') while self.current_episode < self.episodes: current_state = self.env.reset() done = False steps = 0 while not done and steps < self.max_steps: if random.random() > self.epsilon: action = np.argmax( self.agent.get_q_values(np.array([current_state]))) else: action = np.random.randint(NUM_ACTIONS) next_state, reward, done = self.env.step(action) self.agent.update_replay_memory(current_state, action, reward, next_state, done) self.summary.add('loss', self.agent.train()) current_state = next_state steps += 1 self.agent.increase_target_update_counter() self.summary.add('length', self.env.get_length()) self.summary.add('reward', self.env.tot_reward) self.summary.add('steps', steps) # decay epsilon self.epsilon = max(self.epsilon - self.epsilon_decay, self.min_epsilon) self.current_episode += 1 # save model, training info if self.enable_save and self.current_episode % self.save_freq == 0: self.save(str(self.current_episode)) average_length = self.summary.get_average('length') if average_length > self.max_average_length: self.max_average_length = average_length self.save('best') print('best model saved - average_length: {}'.format( average_length)) self.summary.write(self.current_episode, self.epsilon) self.summary.clear() # update pbar pbar.update(1) # preview if self.enable_render and self.current_episode % self.render_freq == 0: self.preview(self.render_fps) def preview(self, render_fps, disable_exploration=False, save_dir=None): if save_dir is not None and not os.path.exists(save_dir): os.makedirs(save_dir) current_state = self.env.reset() self.env.render(fps=render_fps) if save_dir is not None: self.env.save_image(save_path=save_dir + '/0.png') done = False steps = 0 while not done and steps < self.max_steps: if disable_exploration or random.random() > self.epsilon: action = np.argmax( self.agent.get_q_values(np.array([current_state]))) else: action = np.random.randint(NUM_ACTIONS) next_state, reward, done = self.env.step(action) current_state = next_state steps += 1 self.env.render(fps=render_fps) if save_dir is not None: self.env.save_image(save_path=save_dir + '/{}.png'.format(steps)) return self.env.get_length() def quit(self): self.env.quit() def save(self, suffix): self.agent.save(self.save_dir + '/model_{}.h5'.format(suffix), self.save_dir + '/target_model_{}.h5'.format(suffix)) dic = { 'replay_memory': self.agent.replay_memory, 'target_update_counter': self.agent.target_update_counter, 'current_episode': self.current_episode, 'epsilon': self.epsilon, 'summary': self.summary, 'max_average_length': self.max_average_length } with open(self.save_dir + '/training_info_{}.pkl'.format(suffix), 'wb') as fout: pickle.dump(dic, fout) def load(self, suffix): self.agent.load(self.save_dir + '/model_{}.h5'.format(suffix), self.save_dir + '/target_model_{}.h5'.format(suffix)) with open(self.save_dir + '/training_info_{}.pkl'.format(suffix), 'rb') as fin: dic = pickle.load(fin) self.agent.replay_memory = dic['replay_memory'] self.agent.target_update_counter = dic['target_update_counter'] self.current_episode = dic['current_episode'] self.epsilon = dic['epsilon'] self.summary = dic['summary'] self.max_average_length = dic['max_average_length']
class ConvolutionalModel: def __init__(self, options, session): self._options = options self._session = session np.random.seed(options.seed) tf.set_random_seed(options.seed) print(options.num_layers, options.patch_size) self.input_size = unet.input_size_needed(options.patch_size, options.num_layers) self.experiment_name = datetime.now().strftime("%Y-%m-%dT%Hh%Mm%Ss") experiment_path = os.path.abspath( os.path.join(options.save_path, self.experiment_name)) summary_path = os.path.join(options.logdir, self.experiment_name) self._summary = Summary(options, session, summary_path) self.build_graph() def cross_entropy_loss(self, labels, pred_logits): """BCE loss""" cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=pred_logits, labels=labels) loss = tf.reduce_mean(cross_entropy) return loss def optimize(self, loss): """Build the part of the graph to optimize the loss function.""" opts = self._options learning_rate = tf.train.exponential_decay(opts.lr, self._global_step, 1000, 0.95, staircase=True) # Use simple momentum for the optimization. optimizer = tf.train.MomentumOptimizer(learning_rate, opts.momentum) train = optimizer.minimize(loss, global_step=self._global_step) return train, learning_rate def build_graph(self): """Build the graph for the full model.""" opts = self._options # Global step: scalar, i.e., shape []. global_step = tf.Variable(0, name="global_step") self._global_step = global_step # data placeholders patches_node = tf.placeholder(tf.float32, shape=(opts.batch_size, self.input_size, self.input_size, NUM_CHANNELS), name="patches") labels_node = tf.placeholder(tf.int64, shape=(opts.batch_size, opts.patch_size, opts.patch_size), name="groundtruth") patches_node, labels_node = self.stochastic_images_augmentation( patches_node, labels_node) dropout_keep = tf.placeholder_with_default(1.0, shape=(), name="dropout_keep") self._dropout_keep = dropout_keep predict_logits = unet.forward(patches_node, root_size=opts.root_size, num_layers=opts.num_layers, dilated_layers=opts.dilated_layers, dropout_keep=dropout_keep) predictions = tf.nn.softmax(predict_logits, dim=3) predictions = predictions[:, :, :, 1] loss = self.cross_entropy_loss(labels_node, predict_logits) self._train, self._learning_rate = self.optimize(loss) self._loss = loss self._predictions = predictions self._patches_node = patches_node self._labels_node = labels_node self._predict_logits = predict_logits self._summary.initialize_eval_summary() self._summary.initialize_train_summary() self._summary.initialize_overlap_summary() self._summary.initialize_missclassification_summary() summary_scalars = {"loss": loss, "learning_rate": self._learning_rate} self.summary_op = self._summary.get_summary_op(summary_scalars) # Properly initialize all variables. tf.global_variables_initializer().run() tf.local_variables_initializer().run() self.saver = tf.train.Saver(max_to_keep=100) def stochastic_images_augmentation(self, imgs, masks): """Add stochastic transformation to imgs and masks: flip_ud, flip_lr, transpose, rotation by any 90 degree """ original_imgs, original_masks = imgs, masks batch_size = int(imgs.shape[0]) self._image_augmentation = tf.placeholder_with_default( False, shape=(), name='image_augmentation_flag') def apply_transform(transform, pim): proba, img, mask = pim return tf.cond(proba > 0.5, lambda: transform(img), lambda: img), \ tf.cond(proba > 0.5, lambda: transform(mask), lambda: mask) def stochastic_transform(transform, imgs, masks, name): proba = tf.random_uniform(shape=(batch_size, ), name="should_" + name) imgs, masks = tf.map_fn( lambda pim: apply_transform(tf.image.flip_up_down, pim), [proba, imgs, masks], dtype=(imgs.dtype, masks.dtype)) return imgs, masks with tf.variable_scope("data_augm"): masks = tf.expand_dims(masks, -1) imgs, masks = stochastic_transform(tf.image.flip_up_down, imgs, masks, name="flip_up_down") imgs, masks = stochastic_transform(tf.image.flip_left_right, imgs, masks, name="flip_up_down") imgs, masks = stochastic_transform(tf.image.transpose_image, imgs, masks, name="transpose") number_rotation = tf.cast( tf.floor( tf.random_uniform(shape=(batch_size, ), name="number_rotation") * 4), tf.int32) imgs, masks = tf.map_fn(lambda kim: (tf.image.rot90( kim[1], kim[0]), tf.image.rot90(kim[2], kim[0])), [number_rotation, imgs, masks], dtype=(imgs.dtype, masks.dtype)) masks = tf.squeeze(masks, -1) imgs, masks = tf.cond(self._image_augmentation, lambda: (imgs, masks), lambda: (original_imgs, original_masks)) return imgs, masks def train(self, patches, labels_patches, imgs, labels): """Train the model for one epoch params: imgs: [num_images, img_height, img_width, num_channel] labels: [num_images, num_patches_side, num_patches_side] """ opts = self._options labels_patches = (labels_patches >= 0.5) * 1. labels = (labels >= 0.5) * 1. num_train_patches = patches.shape[0] indices = np.arange(0, num_train_patches) np.random.shuffle(indices) num_errors = 0 total = 0 for batch_i, offset in enumerate( range(0, num_train_patches - opts.batch_size, opts.batch_size)): batch_indices = indices[offset:offset + opts.batch_size] feed_dict = { self._patches_node: patches[batch_indices, :, :, :], self._labels_node: labels_patches[batch_indices], self._dropout_keep: opts.dropout, self._image_augmentation: opts.image_augmentation, } summary_str, _, l, predictions, predictions, step = self._session.run( [ self.summary_op, self._train, self._loss, self._predict_logits, self._predictions, self._global_step ], feed_dict=feed_dict) print("Batch {} Step {}".format(batch_i, step), end="\r") self._summary.add(summary_str, global_step=step) num_errors += np.abs(labels_patches[batch_indices] - predictions).sum() total += opts.batch_size self._summary.add_to_pixel_missclassification_summary( num_errors, total, self._global_step) # from time to time do full prediction on some images if step > 0 and step % opts.eval_every == 0: print() images_to_predict = imgs[:opts.num_eval_images, :, :, :] masks = self.predict(images_to_predict) overlays = images.overlays(images_to_predict, masks) pred_masks = ((masks > 0.5) * 1).squeeze() true_masks = labels[:opts.num_eval_images, :, :].squeeze() self._summary.add_to_eval_summary(masks, overlays, labels, self._global_step) self._summary.add_to_overlap_summary(true_masks, pred_masks, self._global_step) if step > 0 and step % opts.train_score_every == 0: self._summary.add_to_training_summary(self.predict(imgs), labels, self._global_step) self._summary.flush() def predict(self, imgs): """Run inference on `imgs` and return predicted masks imgs: [num_images, image_height, image_width, num_channel] returns: masks [num_images, images_height, image_width] with road probabilities """ opts = self._options num_images = imgs.shape[0] print("Running prediction on {} images... ".format(num_images), end="") if opts.ensemble_prediction: print("Start data augmentation for prediction...") imgs = images.image_augmentation_ensemble(imgs) print("Done") num_images = imgs.shape[0] offset = int( (unet.input_size_needed(opts.patch_size, opts.num_layers) - opts.patch_size) / 2) imgs_exp = images.mirror_border(imgs, offset) patches = images.extract_patches(imgs_exp, patch_size=unet.input_size_needed( opts.patch_size, opts.num_layers), predict_patch_size=opts.patch_size, stride=opts.stride) num_patches = patches.shape[0] num_channel = imgs.shape[3] # patches padding to have full batches if num_patches % opts.batch_size != 0: num_extra_patches = opts.batch_size - (num_patches % opts.batch_size) extra_patches = np.zeros((num_extra_patches, opts.patch_size, opts.patch_size, num_channel)) patches = np.concatenate([patches, extra_patches], axis=0) num_batches = int(patches.shape[0] / opts.batch_size) eval_predictions = np.ndarray(shape=(patches.shape[0], opts.patch_size, opts.patch_size)) for batch in range(num_batches): offset = batch * opts.batch_size feed_dict = { self._patches_node: patches[offset:offset + opts.batch_size, :, :, :], } eval_predictions[offset:offset + opts.batch_size, :, :] = self._session.run( self._predictions, feed_dict) # remove padding eval_predictions = eval_predictions[0:num_patches] patches_per_image = int(num_patches / num_images) # construct masks new_shape = (num_images, patches_per_image, opts.patch_size, opts.patch_size, 1) masks = images.images_from_patches(eval_predictions.reshape(new_shape), stride=opts.stride) if opts.ensemble_prediction: print("Invert Data augmentation and average predictions...") masks = images.invert_image_augmentation_ensemble(masks) print("Averaging done...") print("Prediction Done") return masks def predict_batchwise(self, imgs, pred_batch_size): masks = [] for i in range(int(np.ceil(imgs.shape[0] / pred_batch_size))): start = i * pred_batch_size end = start + pred_batch_size masks.append(self.predict(imgs[start:end])) if len(masks) > 1: masks = np.concatenate(masks, axis=0) return masks else: return masks[0] def save(self, epoch=0): opts = self._options model_data_dir = os.path.abspath( os.path.join(opts.save_path, self.experiment_name, 'model-epoch-{:03d}.chkpt'.format(epoch))) saved_path = self.saver.save(self._session, model_data_dir) # create checkpoint print("Model saved in file: {}".format(saved_path)) def restore(self, date=None, epoch=None, file=None): """Restores model from saved checkpoint date: which model should be restored (most recent if None) epoch: at which epoch model should be restored (most recent if None) file: provide directly the checkpoint file te restore """ opts = self._options if file is not None: model_data_dir = file else: # get experiment name to restore from if date is None: dates = [ date for date in glob.glob(os.path.join(opts.save_path, "*")) if os.path.isdir(date) ] model_data_dir = sorted(dates)[-1] else: model_data_dir = os.path.abspath( os.path.join(opts.save_path, date)) # get epoch construct final path if epoch is None: model_data_dir = os.path.abspath( os.path.join(model_data_dir, 'model-epoch-*.chkpt.meta')) model_data_dir = sorted(glob.glob(model_data_dir))[-1][:-5] else: model_data_dir = os.path.abspath( os.path.join(model_data_dir, 'model-epoch-{:03d}.chkpt'.format(epoch))) self.saver.restore(self._session, model_data_dir) print("Model restored from from file: {}".format(model_data_dir))
class ConvolutionalModel: def __init__(self, options, session): self._options = options self._session = session self.train_images_shape = None np.random.seed(options.seed) tf.set_random_seed(options.seed) self.input_size = self._options.patch_size self.experiment_name = datetime.now().strftime("%Y%m%d%H%M%S") experiment_path = os.path.abspath( os.path.join(options.save_path, self.experiment_name)) self.summary_path = os.path.join( options.logdir, self.experiment_name + options.log_suffix) self._summary = Summary(options, session) self.build_graph() def calculate_loss_abs(self, labels, prediction): """Calculate absolute difference loss """ loss = tf.losses.absolute_difference(labels, prediction) return loss def calculate_loss_mse(self, labels, prediction): """Calculate mean squared error loss """ loss = tf.losses.mean_squared_error(labels, prediction) return loss def calculate_loss_snr(self, labels, prediction): """Calculate loss based on signal to noise """ loss = tf.negative(tf.multiply( tf.constant(20.0), tf.subtract( self.tf_log_10(self.tf_range(labels)), self.tf_log_10( tf.sqrt(tf.losses.mean_squared_error(labels, prediction))))), name="snr") print(loss) return loss def tf_log_10(self, x): """ log10 implemented using tensorflow """ return tf.divide(tf.log(x), tf.log(tf.constant(10.0))) def tf_range(self, img): """ calculate dynamic range of an image using tensorflow """ return tf.subtract(tf.reduce_max(img), tf.reduce_min(img)) def optimize(self, loss): """optimize with MomentumOptimizer """ learning_rate = tf.train.exponential_decay(self._options.learning_rate, self._global_step, 100, 0.99, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) train = optimizer.minimize(loss, global_step=self._global_step) return train, learning_rate def adam_optimize(self, loss): """ optimize with AdamOptimizer """ optimizer = tf.train.AdamOptimizer(self._options.learning_rate, epsilon=10e-3) train = optimizer.minimize(loss, global_step=self._global_step) return train, optimizer._lr def build_graph(self): """ Build the tensorflow graph for the model """ opts = self._options global_step = tf.Variable(0, name="global_step") self._global_step = global_step # data placeholders patches_node = tf.placeholder(tf.float32, shape=(self._options.batch_size, self.input_size, self.input_size, 1), name="patches") labels_node = tf.placeholder(tf.float32, shape=(self._options.batch_size, self._options.patch_size, self._options.patch_size, 1), name="labels") dropout_keep = tf.placeholder_with_default(1.0, shape=(), name="dropout_keep") self._dropout_keep = dropout_keep print("Patches node: {}".format(patches_node)) predict_logits = unet.forward(patches_node, root_size=opts.root_size, num_layers=opts.num_layers, dropout_keep=dropout_keep, dilation_size=opts.dilation_size, conv_size=opts.conv_size) predictions = predict_logits print("Predicted logits: {}".format(predict_logits)) loss = self.calculate_loss_snr(labels_node, predict_logits) self._train, self._learning_rate = self.adam_optimize(loss) self._loss = loss self._predictions = predictions self._patches_node = patches_node self._labels_node = labels_node self._predict_logits = predict_logits self._summary.create_writer(self.summary_path) self._summary.initialize_snr_summary() self._summary.initialize_eval_summary() summary_scalars = {"loss": loss, "learning_rate": self._learning_rate} self.summary_op = self._summary.get_summary_op(summary_scalars) # Properly initialize all variables. tf.global_variables_initializer().run() tf.local_variables_initializer().run() self.saver = tf.train.Saver(max_to_keep=100) def train(self, patches, labels_patches, eval_images, downsampled_eval_images): """Train the model for one epoch params: patches: [num_patches, patch_height, patch_width, num_channel] labels_patches: [num_patches, patch_height, patch_width, num_channel] eval_images: [num_images, img_height, img_width, num_channel] downsampled_eval_images: [num_images, img_height, img_width, num_channel] """ opts = self._options num_train_patches = patches.shape[0] indices = np.arange(0, num_train_patches) # randomize indices for training np.random.shuffle(indices) for batch_i, offset in enumerate( range(0, num_train_patches - opts.batch_size, opts.batch_size)): batch_indices = indices[offset:offset + opts.batch_size] feed_dict = { self._patches_node: patches[batch_indices, :, :, :], self._labels_node: labels_patches[batch_indices, :, :, :], self._dropout_keep: opts.dropout, } summary_str, _, l, predictions, predictions, step = self._session.run( [ self.summary_op, self._train, self._loss, self._predict_logits, self._predictions, self._global_step ], feed_dict=feed_dict) print("Batch {} Step {}".format(batch_i, step), end="\r") self._summary.add(summary_str, global_step=step) snr = images.psnr(labels_patches[batch_indices], predictions) self._summary.add_to_snr_summary(snr, self._global_step) # Do evaluation once per epoch if step > 0 and step % int( patches.shape[0] / opts.batch_size) == 0: predictions = self.predict(downsampled_eval_images) self._summary.add_to_eval_summary(eval_images, downsampled_eval_images, predictions, self._global_step) self._summary.flush() def predict(self, imgs): """Run inference on `imgs` and return predictions imgs: [num_images, image_height, image_width, num_channel] returns: predictions [num_images, images_height, image_width, num_channel] """ opts = self._options num_images = imgs.shape[0] print() print("Running prediction on {} images with shape {}... ".format( num_images, imgs.shape)) patches = tf.extract_image_patches( imgs, [1, self.input_size, self.input_size, 1], [1, opts.stride, opts.stride, 1], [1, 1, 1, 1], 'VALID').eval() patches = patches.reshape((-1, self.input_size, self.input_size, 1)) num_patches = patches.shape[0] # patches padding to have full batches if num_patches % opts.batch_size != 0: num_extra_patches = opts.batch_size - (num_patches % opts.batch_size) extra_patches = np.zeros( (num_extra_patches, self.input_size, self.input_size, 1)) patches = np.concatenate([patches, extra_patches], axis=0) num_patches = patches.shape[0] num_batches = int(num_patches / opts.batch_size) eval_predictions = np.ndarray(shape=(num_patches, opts.patch_size, opts.patch_size, 1)) print("Patches to predict: ", num_patches) print("Shape eval predictions: ", eval_predictions.shape) # do batchwise prediction for batch in range(num_batches): offset = batch * opts.batch_size feed_dict = { self._patches_node: patches[offset:offset + opts.batch_size, :, :, :], } eval_predictions[offset:offset + opts.batch_size, :, :, :] = self._session.run( self._predictions, feed_dict) # remove padding eval_predictions = eval_predictions[0:num_patches] # construct predicted images predictions = images.images_from_patches(eval_predictions, imgs.shape, stride=opts.stride) # Clipping for display in tensorboard predictions[predictions < 0] = 0 predictions[predictions > 1] = 1 return predictions def save(self, epoch=0): """ Saves the training state of the model to disk to continue training at a later point """ opts = self._options model_data_dir = os.path.abspath( os.path.join(opts.save_path, self.experiment_name, 'model-epoch-{:03d}.chkpt'.format(epoch))) saved_path = self.saver.save(self._session, model_data_dir) # create checkpoint print("Model saved in file: {}".format(saved_path))