def learn(self, total_timesteps, callback=None, log_interval=100, tb_log_name="DQN", reset_num_timesteps=True, replay_wrapper=None): new_tb_log = self._init_num_timesteps(reset_num_timesteps) callback = self._init_callback(callback) with SetVerbosity(self.verbose), TensorboardWriter(self.graph, self.tensorboard_log, tb_log_name, new_tb_log) \ as writer: self._setup_learn() # Create the replay buffer if self.prioritized_replay: self.replay_buffer = PrioritizedReplayBuffer( self.buffer_size, alpha=self.prioritized_replay_alpha) if self.prioritized_replay_beta_iters is None: prioritized_replay_beta_iters = total_timesteps else: prioritized_replay_beta_iters = self.prioritized_replay_beta_iters self.beta_schedule = LinearSchedule( prioritized_replay_beta_iters, initial_p=self.prioritized_replay_beta0, final_p=1.0) else: self.replay_buffer = ReplayBuffer(self.buffer_size) self.beta_schedule = None if replay_wrapper is not None: assert not self.prioritized_replay, "Prioritized replay buffer is not supported by HER" self.replay_buffer = replay_wrapper(self.replay_buffer) # Create the schedule for exploration starting from 1. self.exploration = LinearSchedule( schedule_timesteps=int(self.exploration_fraction * total_timesteps), initial_p=self.exploration_initial_eps, final_p=self.exploration_final_eps) episode_rewards = [0.0] episode_successes = [] callback.on_training_start(locals(), globals()) callback.on_rollout_start() reset = True obs = self.env.reset() # Retrieve unnormalized observation for saving into the buffer if self._vec_normalize_env is not None: obs_ = self._vec_normalize_env.get_original_obs().squeeze() for _ in range(total_timesteps): # Take action and update exploration to the newest value kwargs = {} if not self.param_noise: update_eps = self.exploration.value(self.num_timesteps) update_param_noise_threshold = 0. else: update_eps = 0. # Compute the threshold such that the KL divergence between perturbed and non-perturbed # policy is comparable to eps-greedy exploration with eps = exploration.value(t). # See Appendix C.1 in Parameter Space Noise for Exploration, Plappert et al., 2017 # for detailed explanation. update_param_noise_threshold = \ -np.log(1. - self.exploration.value(self.num_timesteps) + self.exploration.value(self.num_timesteps) / float(self.env.action_space.n)) kwargs['reset'] = reset kwargs[ 'update_param_noise_threshold'] = update_param_noise_threshold kwargs['update_param_noise_scale'] = True with self.sess.as_default(): action = self.act(np.array(obs)[None], update_eps=update_eps, **kwargs)[0] env_action = action reset = False new_obs, rew, done, info = self.env.step(env_action) self.num_timesteps += 1 # Stop training if return value is False if callback.on_step() is False: break # Store only the unnormalized version if self._vec_normalize_env is not None: new_obs_ = self._vec_normalize_env.get_original_obs( ).squeeze() reward_ = self._vec_normalize_env.get_original_reward( ).squeeze() else: # Avoid changing the original ones obs_, new_obs_, reward_ = obs, new_obs, rew # Store transition in the replay buffer. self.replay_buffer.add(obs_, action, reward_, new_obs_, float(done)) obs = new_obs # Save the unnormalized observation if self._vec_normalize_env is not None: obs_ = new_obs_ if writer is not None: ep_rew = np.array([reward_]).reshape((1, -1)) ep_done = np.array([done]).reshape((1, -1)) tf_util.total_episode_reward_logger( self.episode_reward, ep_rew, ep_done, writer, self.num_timesteps) if done and writer: run_time = info["episode"]["run_time"][0] ts = info["episode"]["l"] summary = tf.Summary(value=[ tf.Summary.Value(tag='run_time', simple_value=run_time) ]) writer.add_summary(summary, self.num_timesteps) ts = info["episode"]["l"] summary = tf.Summary(value=[ tf.Summary.Value(tag='length_episode', simple_value=ts) ]) writer.add_summary(summary, self.num_timesteps) episode_rewards[-1] += reward_ if done: maybe_is_success = info.get('is_success') if maybe_is_success is not None: episode_successes.append(float(maybe_is_success)) if not isinstance(self.env, VecEnv): obs = self.env.reset() episode_rewards.append(0.0) reset = True # Do not train if the warmup phase is not over # or if there are not enough samples in the replay buffer can_sample = self.replay_buffer.can_sample(self.batch_size) if can_sample and self.num_timesteps > self.learning_starts \ and self.num_timesteps % self.train_freq == 0: callback.on_rollout_end() # Minimize the error in Bellman's equation on a batch sampled from replay buffer. # pytype:disable=bad-unpacking if self.prioritized_replay: assert self.beta_schedule is not None, \ "BUG: should be LinearSchedule when self.prioritized_replay True" experience = self.replay_buffer.sample( self.batch_size, beta=self.beta_schedule.value(self.num_timesteps), env=self._vec_normalize_env) (obses_t, actions, rewards, obses_tp1, dones, weights, batch_idxes) = experience else: obses_t, actions, rewards, obses_tp1, dones = self.replay_buffer.sample( self.batch_size, env=self._vec_normalize_env) weights, batch_idxes = np.ones_like(rewards), None # pytype:enable=bad-unpacking if writer is not None: # run loss backprop with summary, but once every 100 steps save the metadata # (memory, compute time, ...) if False: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, td_errors = self._train_step( obses_t, actions, rewards, obses_tp1, obses_tp1, dones, weights, sess=self.sess, options=run_options, run_metadata=run_metadata) writer.add_run_metadata( run_metadata, 'step%d' % self.num_timesteps) else: summary, td_errors = self._train_step( obses_t, actions, rewards, obses_tp1, obses_tp1, dones, weights, sess=self.sess) writer.add_summary(summary, self.num_timesteps) else: _, td_errors = self._train_step(obses_t, actions, rewards, obses_tp1, obses_tp1, dones, weights, sess=self.sess) if self.prioritized_replay: new_priorities = np.abs( td_errors) + self.prioritized_replay_eps assert isinstance(self.replay_buffer, PrioritizedReplayBuffer) self.replay_buffer.update_priorities( batch_idxes, new_priorities) callback.on_rollout_start() if can_sample and self.num_timesteps > self.learning_starts and \ self.num_timesteps % self.target_network_update_freq == 0: # Update target network periodically. self.update_target(sess=self.sess) if len(episode_rewards[-101:-1]) == 0: mean_100ep_reward = -np.inf else: mean_100ep_reward = round( float(np.mean(episode_rewards[-101:-1])), 1) num_episodes = len(episode_rewards) if self.verbose >= 1 and done and log_interval is not None and len( episode_rewards) % log_interval == 0: logger.record_tabular("steps", self.num_timesteps) logger.record_tabular("episodes", num_episodes) if len(episode_successes) > 0: logger.logkv("success rate", np.mean(episode_successes[-100:])) logger.record_tabular("mean 100 episode reward", mean_100ep_reward) if writer: print("ici") summary = tf.Summary(value=[ tf.Summary.Value(tag='mean_100ep_reward', simple_value=mean_100ep_reward) ]) writer.add_summary(summary, self.num_timesteps) logger.record_tabular( "% time spent exploring", int(100 * self.exploration.value(self.num_timesteps))) logger.dump_tabular() callback.on_training_end() return self
model, xs, ys, gradient_accumulation=True) if args.eval: val_iterator = val_dataset.make_initializable_iterator() val_stream = val_iterator.get_next() xs, ys = val_stream[:3], val_stream[3:] y, y_hat, eval_loss, eval_summaries = eval(model, xs, ys) saver = tf.train.Saver(max_to_keep=config.NUM_EPOCHS) # config_tf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) config_tf = tf.ConfigProto(allow_soft_placement=True) config_tf.gpu_options.allow_growth = True run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True) with tf.Session(config=config_tf) as sess: if os.path.isdir(config.LOGDIR): shutil.rmtree(config.LOGDIR) os.mkdir(config.LOGDIR) ckpt = tf.train.latest_checkpoint(config.CHECKPOINTDIR) rouge = Rouge() if ckpt is None: logging.info("Initializing from scratch") sess.run(tf.global_variables_initializer())
def train_model(self, sess, max_iters): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) # RPN # classification loss rpn_cls_score = tf.reshape( self.net.get_output('rpn_cls_score_reshape'), [-1, 2]) rpn_label = tf.reshape(self.net.get_output('rpn-data')[0], [-1]) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))), [-1, 2]) rpn_label = tf.reshape( tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) # bounding box regression L1 loss rpn_bbox_pred = self.net.get_output('rpn_bbox_pred') rpn_bbox_targets = tf.transpose( self.net.get_output('rpn-data')[1], [0, 2, 3, 1]) rpn_bbox_inside_weights = tf.transpose( self.net.get_output('rpn-data')[2], [0, 2, 3, 1]) rpn_bbox_outside_weights = tf.transpose( self.net.get_output('rpn-data')[3], [0, 2, 3, 1]) rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights) rpn_loss_box = tf.reduce_mean( tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3])) # R-CNN # classification loss cls_score = self.net.get_output('cls_score') label = tf.reshape(self.net.get_output('roi-data')[1], [-1]) cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)) # bounding box regression L1 loss bbox_pred = self.net.get_output('bbox_pred') bbox_targets = self.net.get_output('roi-data')[2] bbox_inside_weights = self.net.get_output('roi-data')[3] bbox_outside_weights = self.net.get_output('roi-data')[4] smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) loss_box = tf.reduce_mean( tf.reduce_sum(smooth_l1, reduction_indices=[1])) # final loss loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box # optimizer and learning rate global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, cfg.TRAIN.STEPSIZE, 0.1, staircase=True) momentum = cfg.TRAIN.MOMENTUM train_op = tf.train.MomentumOptimizer(lr, momentum).minimize( loss, global_step=global_step) # iintialize variables sess.run(tf.global_variables_initializer()) if self.pretrained_model is not None: print('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model) self.net.load(self.pretrained_model, sess, self.saver, True) last_snapshot_iter = -1 timer = Timer() for iter in range(max_iters): # get one batch blobs = data_layer.forward() # Make one SGD update feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, \ self.net.gt_boxes: blobs['gt_boxes']} run_options = None run_metadata = None if cfg.TRAIN.DEBUG_TIMELINE: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, _ = sess.run( [ rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, train_op ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if cfg.TRAIN.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0: print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()) print 'speed: {:.3f}s / iter'.format(timer.average_time) if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, labels = cifar10.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) c = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) c.gpu_options.visible_device_list = FLAGS.gpu c.gpu_options.allow_growth = True run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=c) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op, options=run_options)
def train_model(self, sess, max_iters): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) part_features_fc7 = self.net.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc71 = self.net1.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc72 = self.net2.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc73 = self.net3.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc74 = self.net4.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc75 = self.net5.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc76 = self.net6.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc77 = self.net7.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc78 = self.net8.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc79 = self.net9.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc710 = self.net10.get_output( 'pool_5')[:self.proposal_number, :] part_features_fc711 = self.net11.get_output( 'pool_5')[:self.proposal_number, :] #print(part_features) # learning matrix 1 Matrix_L1_S1 = tf.get_variable( 'L1_S1', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer( stddev=1 / math.sqrt(self.feature_size * self.feature_size))) # learning matrix 2 Matrix_L1_S2 = tf.get_variable( 'L1_S2', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer( stddev=1 / math.sqrt(self.feature_size * self.feature_size))) ################################ #### get the region feature #### ######### max pooling ########## ################################ part_features_fc7 = tf.reduce_max(tf.reshape( part_features_fc7, [self.proposal_number, 49, 512]), axis=1) part_features_fc71 = tf.reduce_max(tf.reshape( part_features_fc71, [self.proposal_number, 49, 512]), axis=1) part_features_fc72 = tf.reduce_max(tf.reshape( part_features_fc72, [self.proposal_number, 49, 512]), axis=1) part_features_fc73 = tf.reduce_max(tf.reshape( part_features_fc73, [self.proposal_number, 49, 512]), axis=1) part_features_fc74 = tf.reduce_max(tf.reshape( part_features_fc74, [self.proposal_number, 49, 512]), axis=1) part_features_fc75 = tf.reduce_max(tf.reshape( part_features_fc75, [self.proposal_number, 49, 512]), axis=1) part_features_fc76 = tf.reduce_max(tf.reshape( part_features_fc76, [self.proposal_number, 49, 512]), axis=1) part_features_fc77 = tf.reduce_max(tf.reshape( part_features_fc77, [self.proposal_number, 49, 512]), axis=1) part_features_fc78 = tf.reduce_max(tf.reshape( part_features_fc78, [self.proposal_number, 49, 512]), axis=1) part_features_fc79 = tf.reduce_max(tf.reshape( part_features_fc79, [self.proposal_number, 49, 512]), axis=1) part_features_fc710 = tf.reduce_max(tf.reshape( part_features_fc710, [self.proposal_number, 49, 512]), axis=1) part_features_fc711 = tf.reduce_max(tf.reshape( part_features_fc711, [self.proposal_number, 49, 512]), axis=1) ############################## ######### L1_S1 ############## ############################## # view 0 L1_S1_Similarity = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc7, Matrix_L1_S1), tf.transpose(part_features_fc7))) similarity = tf.reduce_sum(L1_S1_Similarity, axis=0, keep_dims=True) / self.proposal_number similarity = tf.transpose(similarity) part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7), axis=0, keep_dims=True) # view 1 L1_S1_Similarity1 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc71, Matrix_L1_S1), tf.transpose(part_features_fc71))) similarity1 = tf.reduce_sum(L1_S1_Similarity1, axis=0, keep_dims=True) / self.proposal_number similarity1 = tf.transpose(similarity1) part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71), axis=0, keep_dims=True) # view 2 L1_S1_Similarity2 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc72, Matrix_L1_S1), tf.transpose(part_features_fc72))) similarity2 = tf.reduce_sum(L1_S1_Similarity2, axis=0, keep_dims=True) / self.proposal_number similarity2 = tf.transpose(similarity2) part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72), axis=0, keep_dims=True) # view 3 L1_S1_Similarity3 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc73, Matrix_L1_S1), tf.transpose(part_features_fc73))) similarity3 = tf.reduce_sum(L1_S1_Similarity3, axis=0, keep_dims=True) / self.proposal_number similarity3 = tf.transpose(similarity3) part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73), axis=0, keep_dims=True) # view 4 L1_S1_Similarity4 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc74, Matrix_L1_S1), tf.transpose(part_features_fc74))) similarity4 = tf.reduce_sum(L1_S1_Similarity4, axis=0, keep_dims=True) / self.proposal_number similarity4 = tf.transpose(similarity4) part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74), axis=0, keep_dims=True) # view 5 L1_S1_Similarity5 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc75, Matrix_L1_S1), tf.transpose(part_features_fc75))) similarity5 = tf.reduce_sum(L1_S1_Similarity5, axis=0, keep_dims=True) / self.proposal_number similarity5 = tf.transpose(similarity5) part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75), axis=0, keep_dims=True) # view 6 L1_S1_Similarity6 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc76, Matrix_L1_S1), tf.transpose(part_features_fc76))) similarity6 = tf.reduce_sum(L1_S1_Similarity6, axis=0, keep_dims=True) / self.proposal_number similarity6 = tf.transpose(similarity6) part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76), axis=0, keep_dims=True) # view 7 L1_S1_Similarity7 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc77, Matrix_L1_S1), tf.transpose(part_features_fc77))) similarity7 = tf.reduce_sum(L1_S1_Similarity7, axis=0, keep_dims=True) / self.proposal_number similarity7 = tf.transpose(similarity7) part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77), axis=0, keep_dims=True) # view 8 L1_S1_Similarity8 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc78, Matrix_L1_S1), tf.transpose(part_features_fc78))) similarity8 = tf.reduce_sum(L1_S1_Similarity8, axis=0, keep_dims=True) / self.proposal_number similarity8 = tf.transpose(similarity8) part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78), axis=0, keep_dims=True) # view 9 L1_S1_Similarity9 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc79, Matrix_L1_S1), tf.transpose(part_features_fc79))) similarity9 = tf.reduce_sum(L1_S1_Similarity9, axis=0, keep_dims=True) / self.proposal_number similarity9 = tf.transpose(similarity9) part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79), axis=0, keep_dims=True) # view 10 L1_S1_Similarity10 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc710, Matrix_L1_S1), tf.transpose(part_features_fc710))) similarity10 = tf.reduce_sum( L1_S1_Similarity10, axis=0, keep_dims=True) / self.proposal_number similarity10 = tf.transpose(similarity10) part_sum10 = tf.reduce_sum(tf.multiply(similarity10, part_features_fc710), axis=0, keep_dims=True) # view 11 L1_S1_Similarity11 = tf.nn.softmax( tf.matmul(tf.matmul(part_features_fc711, Matrix_L1_S1), tf.transpose(part_features_fc711))) similarity11 = tf.reduce_sum( L1_S1_Similarity11, axis=0, keep_dims=True) / self.proposal_number similarity11 = tf.transpose(similarity11) part_sum11 = tf.reduce_sum(tf.multiply(similarity11, part_features_fc711), axis=0, keep_dims=True) # concat views view_parts = tf.concat([part_sum, part_sum1], axis=0) view_parts = tf.concat([view_parts, part_sum2], axis=0) view_parts = tf.concat([view_parts, part_sum3], axis=0) view_parts = tf.concat([view_parts, part_sum4], axis=0) view_parts = tf.concat([view_parts, part_sum5], axis=0) view_parts = tf.concat([view_parts, part_sum6], axis=0) view_parts = tf.concat([view_parts, part_sum7], axis=0) view_parts = tf.concat([view_parts, part_sum8], axis=0) view_parts = tf.concat([view_parts, part_sum9], axis=0) view_parts = tf.concat([view_parts, part_sum10], axis=0) view_parts = tf.concat([view_parts, part_sum11], axis=0) view_parts = tf.nn.l2_normalize(view_parts, 1) '''L1_S2''' L1_S2_Similarity = tf.nn.softmax( tf.matmul(tf.matmul(view_parts, Matrix_L1_S2), tf.transpose(view_parts))) view_similarity = tf.reduce_sum( L1_S2_Similarity, axis=0, keep_dims=True) / self.views view_similarity = tf.transpose(view_similarity) # view_similarity = tf.constant([[1.0 / self.views]] * self.views, dtype=tf.float32) view_sums = tf.reduce_sum(tf.multiply(view_similarity, view_parts), axis=0, keep_dims=True) view_sums = tf.nn.l2_normalize(view_sums, 1) #view_sums_extend = tf.tile(view_sums, [self.views,1]) #views_input = tf.add(view_parts, view_sums_extend) #view_extend = tf.expand_dims(views_input, 0) #view_extend = [views_input] #view_sums = tf.nn.softmax(view_sums) #view_sequence = tf.unstack(view_extend, self.rnn_steps, 1) ######RNN Part########## ######################## ######################## #outputs, states = self.build_RNN(view_sequence) #outputs = tf.reshape(outputs, [-1, 12, 4096]) # #outputs = tf.concat(outputs, 1) # outputs = tf.reshape(outputs, [-1, 12, self.hidden_size]) #model_feature = tf.reduce_max(outputs, 1) # output_similarity = tf.nn.softmax(tf.matmul(tf.matmul(outputs, Matrix_L1_S3), tf.transpose(outputs))) # output_similarity = tf.reduce_sum(output_similarity, axis=0, keep_dims=True) / self.views # output_similarity = tf.transpose(output_similarity) # output_sums = tf.reduce_sum(tf.multiply(output_similarity, outputs), axis=0, keep_dims=True) '''L2_S1''' #part_features = tf.reshape(part_features, [self.views*self.proposal_number, self.feature_size]) #L2_S1_Similarity = tf.nn.softmax(tf.matmul(tf.matmul(part_features, Matrix_L2_S1), # tf.transpose(part_features))) #global_similarity = tf.reduce_sum(L2_S1_Similarity, axis=0, keep_dims=True) / (self.proposal_number * self.views) #global_similarity = tf.transpose(global_similarity) #global_sums = tf.reduce_sum(tf.multiply(global_similarity, part_features), axis=0, keep_dims=True) #global_sums = tf.nn.l2_normalize(global_sums, 1) # #global_sums = tf.nn.softmax(global_sums) # # model_feature = tf.concat([bof, states], axis=1) # #print(model_feature) # classification layer # second attention part is related to the acutual classes w_init = tf.truncated_normal_initializer(stddev=0.1) b_init = tf.constant_initializer(0.1) fc2_w = tf.get_variable('fc2_w', [self.hidden_size, self.classes], dtype=tf.float32, initializer=w_init) fc2_b = tf.get_variable('fc2_b', [self.classes], dtype=tf.float32, initializer=b_init) cls_logits = tf.matmul(view_sums, fc2_w) + fc2_b cls_prob = tf.nn.softmax(cls_logits) # initializing variables saver1 = tf.train.Saver(max_to_keep=150) self.saver = saver1 sess.run(tf.global_variables_initializer()) self.saver.restore(sess, self.pretrained_model) print('loaded:%s' % (self.pretrained_model)) last_snapshot_iter = -1 timer = Timer() sums = .0 class_ac_test = True # class_ac_test = False class_acc = np.zeros(13, np.float32) if class_ac_test == True: model_num = 732 classes_num = [ 100, 10, 50, 50, 100, 100, 100, 100, 20, 50, 7, 30, 15 ] else: model_num = 3991 classes_num = [106, 515, 889, 200, 200, 465, 200, 680, 392, 344] part_attention = np.zeros([732, 12, 20, 20], np.float32) view_attention = np.zeros([732, 12, 12], np.float32) rl1 = [ L1_S1_Similarity, L1_S1_Similarity1, L1_S1_Similarity2, L1_S1_Similarity3, L1_S1_Similarity4, L1_S1_Similarity5 ] rl2 = [ L1_S1_Similarity6, L1_S1_Similarity7, L1_S1_Similarity8, L1_S1_Similarity9, L1_S1_Similarity10, L1_S1_Similarity11 ] for iter in range(model_num): # get one batch train_target = data_layer.netvlad_target() blobs = data_layer.forward() blobs1 = data_layer.forward() blobs2 = data_layer.forward() blobs3 = data_layer.forward() blobs4 = data_layer.forward() blobs5 = data_layer.forward() blobs6 = data_layer.forward() blobs7 = data_layer.forward() blobs8 = data_layer.forward() blobs9 = data_layer.forward() blobs10 = data_layer.forward() blobs11 = data_layer.forward() # Make one SGD update feed_dict = { self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 1.0, self.net1.data: blobs1['data'], self.net1.im_info: blobs1['im_info'], self.net1.keep_prob: 1.0, self.net2.data: blobs2['data'], self.net2.im_info: blobs2['im_info'], self.net2.keep_prob: 1.0, self.net3.data: blobs3['data'], self.net3.im_info: blobs3['im_info'], self.net3.keep_prob: 1.0, self.net4.data: blobs4['data'], self.net4.im_info: blobs4['im_info'], self.net4.keep_prob: 1.0, self.net5.data: blobs5['data'], self.net5.im_info: blobs5['im_info'], self.net5.keep_prob: 1.0, self.net6.data: blobs6['data'], self.net6.im_info: blobs6['im_info'], self.net6.keep_prob: 1.0, self.net7.data: blobs7['data'], self.net7.im_info: blobs7['im_info'], self.net7.keep_prob: 1.0, self.net8.data: blobs8['data'], self.net8.im_info: blobs8['im_info'], self.net8.keep_prob: 1.0, self.net9.data: blobs9['data'], self.net9.im_info: blobs9['im_info'], self.net9.keep_prob: 1.0, self.net10.data: blobs10['data'], self.net10.im_info: blobs10['im_info'], self.net10.keep_prob: 1.0, self.net11.data: blobs11['data'], self.net11.im_info: blobs11['im_info'], self.net11.keep_prob: 1.0 } run_options = None run_metadata = None if cfg.TRAIN.DEBUG_TIMELINE: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # rl = [L1_S1_Similarity,L1_S1_Similarity1,L1_S1_Similarity2,L1_S1_Similarity3,L1_S1_Similarity4,L1_S1_Similarity5] timer.tic() L1_S0, L1_S1, L1_S2, L1_S3, L1_S4, L1_S5 = sess.run( rl1, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) L1_S6, L1_S7, L1_S8, L1_S9, L1_S10, L1_S11 = sess.run( rl2, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) L2_S = sess.run(L1_S2_Similarity, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) part_attention[iter, 0] = L1_S0 part_attention[iter, 1] = L1_S1 part_attention[iter, 2] = L1_S2 part_attention[iter, 3] = L1_S3 part_attention[iter, 4] = L1_S4 part_attention[iter, 5] = L1_S5 part_attention[iter, 6] = L1_S6 part_attention[iter, 7] = L1_S7 part_attention[iter, 8] = L1_S8 part_attention[iter, 9] = L1_S9 part_attention[iter, 10] = L1_S10 part_attention[iter, 11] = L1_S11 view_attention[iter] = L2_S timer.toc() print(iter) sio.savemat( '/data/lxh/Models/fine-grained/attention/part_att_air1.mat', {'pt': part_attention}) sio.savemat( '/data/lxh/Models/fine-grained/attention/view_att_air1.mat', {'vt': view_attention}) # if np.argmax(test_acc, axis=1)[0] == np.argmax(train_target): # sums += 1.0 # class_acc[np.argmax(train_target)] += 1.0 # # print('model id: %d' % iter, np.argmax(test_acc, axis=1)[0], np.argmax(train_target)) # # print("Total accuracy: %f" % (sums / model_num)) # fid = open('/home/liuxinhai/fine-grained/results/airplane_bs_90_addGRU_4096_output_no_view_attention.txt', 'a+') # fid.write('{:.6f}\n'.format(sums / model_num)) # fid.close() for i in range(13): print("the %d class:%f" % (i, class_acc[i] / classes_num[i])) print('class acc: %f' % (sum(class_acc / classes_num) / 13))
def k_m_tf(defect_tensor, clusters, max_iters, summaries_dir, stage_str, name_str, go_to_max=False): length = len(defect_tensor[:, 0]) num_clus = clusters MAX_ITERS = max_iters tiles = len(defect_tensor[0, :]) start = time.time() sess = tf.InteractiveSession() with tf.name_scope('input'): points = tf.Variable(tf.random_uniform([length, tiles]), dtype=tf.float32) with tf.name_scope('cluster_assigns'): cluster_assignments = tf.Variable(tf.zeros([length], dtype=tf.float32)) with tf.name_scope('cents'): centroids = tf.Variable(tf.random_crop(points.initialized_value(), [num_clus, tiles]), dtype=tf.float32) # centroids = tf.Print(centroids,[centroids], summarize = 16, message = 'centroids') # Replicate to N copies of each centroid and K copies of each # point, then subtract and compute the sum of squared distances. with tf.name_scope('Replicate'): rep_centroids = tf.reshape(tf.tile(centroids, [length, 1]), [length, num_clus, tiles]) # rep_centroids = tf.Print(rep_centroids,[tf.shape(rep_centroids)],message='shape_rep_centroids') rep_points = tf.reshape(tf.tile(points, [1, num_clus]), [length, num_clus, tiles]) with tf.name_scope('Sum_squares'): squares = tf.square(rep_points - rep_centroids) sum_squares = tf.reduce_sum(tf.square(squares), reduction_indices=2) squares_1d = tf.scalar_summary('sum_squares', tf.reduce_mean(sum_squares)) # sum_squares = tf.Print(sum_squares,[sum_squares], summarize = 40, message = 'sum_squares') # sum_squares = tf.Print(sum_squares,[tf.shape(sum_squares)], summarize = 16, message = 'sum_squares_shape') # Use argmin to select the lowest-distance point with tf.name_scope('argmin'): best_centroids = tf.argmin(sum_squares, 1) # best_centroids = tf.Print(best_centroids,[best_centroids], summarize = 40, message = ' best_cents') did_assignments_change = tf.reduce_any( tf.not_equal(tf.cast(best_centroids, tf.float32), cluster_assignments)) ## This part exists for counting purposes, since I can't simply access the count in the means part with tf.name_scope('counting'): const_1d = {} num_1d = {} found_1d = {} scalar_1d = {} for i in range(0, num_clus): const_1d[i] = tf.constant(i, shape=[320, 1], dtype=tf.int64) # string_1d[i] = tf.constant(str[i], shape =[320,1], dtype = tf.string) for i in range(0, num_clus): num_1d[i] = tf.equal(tf.reshape(best_centroids, [320, 1]), const_1d[i]) found_1d[i] = tf.reduce_sum(tf.cast(num_1d[i], tf.int32)) found_1d[i] = tf.expand_dims(found_1d[i], -1) scalar_1d[i] = tf.scalar_summary(str(i), tf.squeeze(found_1d[i])) # found_1d[i] = tf.Print(found_1d[i], [found_1d[i]], summarize=40, message=str(i)) # found_1d[i] = tf.Print(found_1d[i], [tf.shape(found_1d[i])], summarize=40, message=str(i)) # found_1d[i] = tf.Print(found_1d[i],[tf.expand_dims(found_1d[i],0)], summarize = 40, message =str(i)) # found_1d[i] = tf.Print(found_1d[i],[tf.shape(tf.expand_dims(found_1d[i],0))], summarize = 40, message =str(i)) # found_1d[i] = tf.Print(found_1d[i], [tf.shape(tf.reshape(found_1d[i],[1,1]))], summarize=40, message=str(i)) found_tensor = tf.concat(0, [found_1d[i] for i in range(0, num_clus)]) distro = tf.histogram_summary('Distribution', found_tensor) ## calculate the means at the indices of best_centroids. with tf.name_scope('means'): total = tf.unsorted_segment_sum(points, best_centroids, num_clus) count = tf.unsorted_segment_sum(tf.ones_like(points), best_centroids, num_clus) # count = tf.Print(count, [tf.shape(count)]) means = total / count means = tf.select(tf.is_nan(means), tf.ones_like(means) * 0, means) means_1d = tf.scalar_summary('means', tf.reduce_mean(means)) # means = tf.Print(means,[means],summarize = 16, message = 'MEANS') # means = tf.Print(means,[tf.shape(means)], message = 'm_shape') # Do not write to the assigned clusters variable until after # computing whether the assignments have changed - hence with_dependencies with tf.name_scope('Do_updates'): with tf.control_dependencies([did_assignments_change]): do_updates = tf.group( centroids.assign(means), cluster_assignments.assign(tf.cast(best_centroids, tf.float32))) changed = True iters = 0 found_numerical = {} # found_1d = tf.Print(found_1d,[found_1d]) # Merge summaries scalar_summary = tf.merge_summary( [scalar_1d[i] for i in range(0, num_clus)]) other_summary = tf.merge_summary([means_1d, squares_1d]) histogram_summary = tf.merge_summary([distro]) writer = tf.train.SummaryWriter( summaries_dir + '/' + stage_str + '/kmeans/' + name_str, sess.graph) init = tf.initialize_all_variables() sess.run(init) # loop # check for assignment changes and assign new based on new means. If assignments didnt change, stop. while changed and iters < MAX_ITERS: iters += 1 run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # if iters%10 == 1: [changed, _, histogram_sum_run, scalar_sum_run, other_sum_run] = sess.run([ did_assignments_change, do_updates, histogram_summary, scalar_summary, other_summary ], feed_dict={points: defect_tensor}) writer.add_run_metadata(run_metadata, 'step%03d' % iters) writer.add_summary(histogram_sum_run, iters) writer.add_summary(scalar_sum_run, iters) writer.add_summary(other_sum_run, iters) # else: # [changed, _, scalar_sum_run] = sess.run([did_assignments_change, do_updates, scalar_summary], feed_dict={points: defect_tensor}) # writer.add_run_metadata(run_metadata, 'step%03d' % iters) # writer.add_summary(scalar_sum_run, iters) ## Note: due to the interconnectivity of found_1d, it seems as you need to run it ALONG the session a couple lines before in order to get numerical results ## Can't do that in a seperate run. Weirdly enough it works for found_tensor, which is simply a concat of found_1d. I don't know why. # found_numerical[0] = sess.run([found_1d[0]], feed_dict={points:defect_tensor}) found_numerical[1] = sess.run([found_1d[1]], feed_dict={points: defect_tensor}) found_numerical[3] = sess.run([found_1d[3]], feed_dict={points: defect_tensor}) found_numerical[4] = sess.run([found_1d[4]], feed_dict={points: defect_tensor}) if go_to_max == True: changed = True writer.close() [centers, assignments] = sess.run([centroids, cluster_assignments]) end = time.time() print("Found in %.2f seconds" % (end - start), iters, "iterations") print('Distribution:', sess.run(found_tensor, feed_dict={points: defect_tensor})) tf.reset_default_graph() sess.close() return centers, assignments
def train(net_configname, batch_size, devices=None, target=None, batch_num=None, tb_dir=None, train_dir=None, benchmark_name=None): with tf.Graph().as_default(): if tb_dir is None: tb_dir = '/tmp/workspace/tflogs' #tb_dir = '/alexnetLog/tflogs' if train_dir is None: train_dir = './model' if benchmark_name is None: benchmark_name = 'fake_data' if not os.path.exists(train_dir): os.makedirs(train_dir) input_data = benchmarks[benchmark_name] config = net_configs[net_configname] if devices is None: devices = config[1] if target is None: target = config[2] batch_size = config[3] * batch_size if batch_num is None: batch_num = config[4] with tf.device(devices[-1]): images, labels, num_classes = input_data(batch_size, batch_num) print('Input batch shape: images: {} labels: {}'.format(images.get_shape(), labels.get_shape())) if net_configname == "single": (net, logprob, total_loss,train_op, global_step) = alexnetmodes.original(images, labels, num_classes,batch_num * batch_size, devices) else: (net, logprob, total_loss,train_op, global_step) = alexnetmodes.distribute(images, labels, num_classes,batch_num * batch_size, devices) tfhelper.scalar_summary('total_loss', total_loss) summary_op = tfhelper.merge_all_summaries() # model saver saver = tf.train.Saver(tf.trainable_variables()) # print some information for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): print(qr.name) config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) with tf.Session(target, config=config) as sess: sess.run(tfhelper.initialize_op()) coord = tf.train.Coordinator() queue_threads = tf.train.start_queue_runners(sess, coord) print('{} threads started for queue'.format(len(queue_threads))) summary_writer = tf.summary.FileWriter(tb_dir, sess.graph) speeds = [] execution_time = [] for step in range(batch_num): if coord.should_stop(): break # disable run time tracing, which is super slow if step % 4 == 1000: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() start_time = default_timer() _, loss_value, summary = sess.run([train_op, total_loss, summary_op], options=run_options, run_metadata=run_metadata) duration = default_timer() - start_time summary_writer.add_run_metadata(run_metadata, 'step{}'.format(step), step) summary_writer.add_summary(summary, step) else: start_time = default_timer() _, loss_value, summary = sess.run([train_op, total_loss, summary_op]) duration = default_timer() - start_time summary_writer.add_summary(summary, step) num_examples_per_step = batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) speeds.append(examples_per_sec) execution_time.append(sec_per_batch) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) sys.stdout.flush() # Save the model checkpoint periodically. if step % 100 == 0 or (step + 1) == batch_num: #if (step+1) % 2 == 0 or step % 100 == 0 or (step + 1) == batch_num: checkpoint_path = os.path.join(train_dir, 'model-{}.ckpt'.format(step)) saver.save(sess, checkpoint_path, write_meta_graph=True) # When done, ask the threads to stop. coord.request_stop() # And wait for them to actually do it. coord.join(queue_threads) print('Average %.1f examples/sec' % np.average(speeds)) print('Average %.2f execution time' % np.average(execution_time))
logits=logits, labels=Y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) global_step_tensor = tf.Variable(0, trainable=False, name='global_step') train_op = optimizer.minimize(loss_op, global_step=global_step_tensor) # Evaluate model correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar("accuracy", accuracy) # Initialize the variables (i.e. assign their default value) init = tf.global_variables_initializer() run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE, output_partition_graphs=True) print(run_options) run_metadata = tf.RunMetadata() merged = tf.summary.merge_all() #scaffold = tf.train.Scaffold(summary_op=merged) summary_hook = tf.train.SummarySaverHook(save_steps=1, output_dir="test/logs", summary_op=merged) profiler_hook = tf.train.ProfilerHook(save_steps=10, output_dir="test/logs", show_memory=True) #scaffold.finalize()
def train(self, sess, x, y_, accuracy, train_step, train_feed_dict, test_feed_dict): # To view graph: tensorboard --logdir=/Users/ryanzotti/Documents/repos/Self_Driving_RC_Car/tf_visual_data/runs tf.scalar_summary('accuracy', accuracy) merged = tf.merge_all_summaries() # Archive this script to document model design in event of good results that need to be replicated model_file_path = os.path.dirname( os.path.realpath(__file__)) + '/' + os.path.basename(__file__) cmd = 'cp {model_file} {archive_path}' shell_command( cmd.format(model_file=self.model_file, archive_path=self.tfboard_run_dir + '/')) sess.run(tf.initialize_all_variables()) dataset = Dataset(input_file_path=self.data_path, max_sample_records=self.max_sample_records) # Not sure what these two lines do run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_images, train_labels = process_data( dataset.get_sample(train=True)) train_feed_dict[x] = train_images train_feed_dict[y_] = train_labels train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_images, test_labels = process_data( dataset.get_sample(train=False)) test_feed_dict[x] = test_images test_feed_dict[y_] = test_labels test_summary, test_accuracy = sess.run([merged, accuracy], feed_dict=test_feed_dict, options=run_opts, run_metadata=run_opts_metadata) message = "epoch: {0}, training accuracy: {1}, validation accuracy: {2}" print(message.format(-1, train_accuracy, test_accuracy)) with open(self.results_file, 'a') as f: f.write(message.format(-1, train_accuracy, test_accuracy) + '\n') for epoch in range(self.epochs): train_batches = dataset.get_batches(train=True) for batch in train_batches: images, labels = process_data(batch) train_feed_dict[x] = images train_feed_dict[y_] = labels train_step.run(feed_dict=train_feed_dict) # TODO: remove all this hideous boilerplate run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_opts_metadata = tf.RunMetadata() train_images, train_labels = process_data( dataset.get_sample(train=True)) train_feed_dict[x] = train_images train_feed_dict[y_] = train_labels train_summary, train_accuracy = sess.run( [merged, accuracy], feed_dict=train_feed_dict, options=run_opts, run_metadata=run_opts_metadata) test_images, test_labels = process_data( dataset.get_sample(train=False)) test_feed_dict[x] = test_images test_feed_dict[y_] = test_labels test_summary, test_accuracy = sess.run( [merged, accuracy], feed_dict=test_feed_dict, options=run_opts, run_metadata=run_opts_metadata) print(message.format(epoch, train_accuracy, test_accuracy)) with open(self.results_file, 'a') as f: f.write( message.format(epoch, train_accuracy, test_accuracy) + '\n') # Save the trained model to a file saver = tf.train.Saver() save_path = saver.save(sess, self.tfboard_run_dir + "/model.ckpt") # Marks unambiguous successful completion to prevent deletion by cleanup script shell_command('touch ' + self.tfboard_run_dir + '/SUCCESS')
def train(): # Import data mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True, fake_data=FLAGS.fake_data) sess = tf.InteractiveSession() # Create a multilayer model. # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 784], name='x-input') y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image('input', image_shaped_input, 10) # We can't initialize these variables to 0 - the network will get stuck. def weight_variable(shape): """Create a weight variable with appropriate initialization.""" initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): """Create a bias variable with appropriate initialization.""" initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu): """Reusable code for making a simple neural net layer. It does a matrix multiply, bias add, and then uses ReLU to nonlinearize. It also sets up name scoping so that the resultant graph is easy to read, and adds a number of summary ops. """ # Adding a name scope ensures logical grouping of the layers in the graph. with tf.name_scope(layer_name): # This Variable will hold the state of the weights for the layer with tf.name_scope('weights'): weights = weight_variable([input_dim, output_dim]) variable_summaries(weights) with tf.name_scope('biases'): biases = bias_variable([output_dim]) variable_summaries(biases) with tf.name_scope('Wx_plus_b'): preactivate = tf.matmul(input_tensor, weights) + biases tf.summary.histogram('pre_activations', preactivate) activations = act(preactivate, name='activation') tf.summary.histogram('activations', activations) return activations hidden1 = nn_layer(x, 784, 500, 'layer1') with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) tf.summary.scalar('dropout_keep_probability', keep_prob) dropped = tf.nn.dropout(hidden1, keep_prob) # Do not apply softmax activation yet, see below. y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity) with tf.name_scope('cross_entropy'): # The raw formulation of cross-entropy, # # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)), # reduction_indices=[1])) # # can be numerically unstable. # # So here we use tf.nn.softmax_cross_entropy_with_logits on the # raw outputs of the nn_layer above, and then average across # the batch. diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer( FLAGS.learning_rate).minimize(cross_entropy) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out to # /tmp/tensorflow/mnist/logs/mnist_with_summaries (by default) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test') tf.global_variables_initializer().run() # Train the model, and also write summaries. # Every 10th step, measure test-set accuracy, and write test summaries # All other steps, run train_step on training data, & add training summaries def feed_dict(train): """Make a TensorFlow feed_dict: maps data onto Tensor placeholders.""" if train or FLAGS.fake_data: xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data) k = FLAGS.dropout else: xs, ys = mnist.test.images, mnist.test.labels k = 1.0 return {x: xs, y_: ys, keep_prob: k} for i in range(FLAGS.max_steps): if i % 10 == 0: # Record summaries and test-set accuracy summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False)) test_writer.add_summary(summary, i) print('Accuracy at step %s: %s' % (i, acc)) else: # Record train set summaries, and train if i % 100 == 99: # Record execution stats run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True), options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary, i) print('Adding run metadata for', i) else: # Record a summary summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True)) train_writer.add_summary(summary, i) train_writer.close() test_writer.close()
def train(): # 데이터 입력 mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True, fake_data=FLAGS.fake_data) sess = tf.InteractiveSession() # 멀티레이어 모델 생성 # 입력 placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 784], name='x-input') y_ = tf.placeholder(tf.float32, [None, 10], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image('input', image_shaped_input, 10) # 가중치 변수들은 0으로 초기화할 수 없다. -그러면 네트워크 학습이 제대로 진행되지 않는다. def weight_variable(shape): """적절히 초기화한 가중치 변수들을 생성한다.""" initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): """적절히 초기화한 바이어스(bias) 변수들을 생성한다.""" initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def variable_summaries(var): """텐서에 많은 양의 요약정보(summaries)를 붙인다. (텐서보드 시각화를 위해서)""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.histogram('histogram', var) def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu): """간단한 neural net layer를 만들기 위해 재사용(resuable)이 가능한 코드 행렬 곱셈(matrix multiply), 바이어스 덧셈(bias add) 이후에 nonlinearize를 위해 ReLU를 사용한다. 또한, 그래프를 읽기 쉽게 만들기 위해 name scoping을 지정하고 summary ops들을 추가한다. """ # 그래프를 논리적으로 그룹핑(grouping)하기 위해 name scope를 추가한다. with tf.name_scope(layer_name): # 이 변수들은 레이어의 가중치들의 상태를 가지고(hold) 있을 것이다. with tf.name_scope('weights'): weights = weight_variable([input_dim, output_dim]) variable_summaries(weights) with tf.name_scope('biases'): biases = bias_variable([output_dim]) variable_summaries(biases) with tf.name_scope('Wx_plus_b'): preactivate = tf.matmul(input_tensor, weights) + biases tf.summary.histogram('pre_activations', preactivate) activations = act(preactivate, name='activation') tf.summary.histogram('activations', activations) return activations hidden1 = nn_layer(x, 784, 500, 'layer1') with tf.name_scope('dropout'): keep_prob = tf.placeholder(tf.float32) tf.summary.scalar('dropout_keep_probability', keep_prob) dropped = tf.nn.dropout(hidden1, keep_prob) # softmax 활성함수는 아직 적용하지 않는다. (아래를 참조하라.) y = nn_layer(dropped, 500, 10, 'layer2', act=tf.identity) with tf.name_scope('cross_entropy'): # cross_entropy의 수학적 표현(formulation) # # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)), # reduction_indices=[1])) # # 위 함수는 계산적으로(numerically) 불안정(unstable)할 수 있다. # # 따라서 여기서 우리는, nn_layer의 raw output에 대해 tf.nn.softmax_cross_entropy_with_logits을 사용한다. # 그리고 나서 배치(batch)간의 평균을을 취한다. diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer( FLAGS.learning_rate).minimize(cross_entropy) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # 모든 요약정보들(summaries)을 합치고(merge) 그들을 지정된 경로에 쓴다.(write) (기본경로: /tmp/tensorflow/mnist/logs/mnist_with_summaries) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test') tf.global_variables_initializer().run() # 모델을 학습시키고, 요약정보들(summaries)들을 쓴다. # 매 10 step마다, test_set accuracy를 측정하고, test suammries을 쓴다. # 그외의 step에서는, 트레이닝 데이터에 대해서 train-step을 실행하고, training summaries을 추가한다. def feed_dict(train): """텐서프롤우 feed_dict를 만든다: 데이터를 Tensor placeholders에 맵핑(map)한다.""" if train or FLAGS.fake_data: xs, ys = mnist.train.next_batch(100, fake_data=FLAGS.fake_data) k = FLAGS.dropout else: xs, ys = mnist.test.images, mnist.test.labels k = 1.0 return {x: xs, y_: ys, keep_prob: k} for i in range(FLAGS.max_steps): if i % 10 == 0: # summaries와 test-set accuracy를 기록(record)한다. summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False)) test_writer.add_summary(summary, i) print('Accuracy at step %s: %s' % (i, acc)) else: # train set summaries를 기록하고, 학습을 진행한다. if i % 100 == 99: # 실행 상태들(execution stats)을 기록한다. run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True), options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary, i) print('Adding run metadata for', i) else: # summary를 기록한다. summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True)) train_writer.add_summary(summary, i) train_writer.close() test_writer.close()
def train_optimizer(logdir, optimizer_spec, problems_and_data, num_problems, num_meta_iterations, num_unroll_func, num_partial_unroll_itrs_func, learning_rate=1e-4, gradient_clip=5., is_chief=False, select_random_problems=True, callbacks=None, obj_train_max_multiplier=-1, out=sys.stdout): """Trains the meta-parameters of this optimizer. Args: logdir: a directory filepath for storing model checkpoints (must exist) optimizer_spec: specification for an Optimizer (see utils.Spec) problems_and_data: a list of tuples containing three elements: a problem specification (see utils.Spec), a dataset (see datasets.Dataset), and a batch_size (int) for generating a problem and corresponding dataset. If the problem doesn't have data, set dataset to None. num_problems: the number of problems to sample during meta-training num_meta_iterations: the number of iterations (steps) to run the meta-optimizer for on each subproblem. num_unroll_func: called once per meta iteration and returns the number of unrolls to do for that meta iteration. num_partial_unroll_itrs_func: called once per unroll and returns the number of iterations to do for that unroll. learning_rate: learning rate of the RMSProp meta-optimizer (Default: 1e-4) gradient_clip: value to clip gradients at (Default: 5.0) is_chief: whether this is the chief task (Default: False) select_random_problems: whether to select training problems randomly (Default: True) callbacks: a list of callback functions that is run after every random problem draw obj_train_max_multiplier: the maximum increase in the objective value over a single training run. Ignored if < 0. out: where to write output to, e.g. a file handle (Default: sys.stdout) Raises: ValueError: If one of the subproblems has a negative objective value. """ if select_random_problems: # iterate over random draws of problem / dataset pairs sampler = (random.choice(problems_and_data) for _ in range(num_problems)) else: # iterate over a random shuffle of problems, looping if necessary num_repeats = (num_problems / len(problems_and_data)) + 1 random.shuffle(problems_and_data) sampler = (problems_and_data * num_repeats)[:num_problems] for problem_itr, (problem_spec, dataset, batch_size) in enumerate(sampler): # timer used to time how long it takes to initialize a problem problem_start_time = time.time() # if dataset is None, use the EMPTY_DATASET if dataset is None: dataset = datasets.EMPTY_DATASET batch_size = dataset.size # build a new graph for this problem graph = tf.Graph() real_device_setter = tf.train.replica_device_setter(FLAGS.ps_tasks) def custom_device_setter(op): # Places the local variables onto the workers. if trainable_optimizer.is_local_state_variable(op): return "/job:worker" else: return real_device_setter(op) if real_device_setter: device_setter = custom_device_setter else: device_setter = None with graph.as_default(), graph.device(device_setter): # initialize a problem problem = problem_spec.build() # build the optimizer opt = optimizer_spec.build() # get the meta-objective for training the optimizer train_output = opt.train(problem, dataset) state_keys = opt.state_keys for key, val in zip(state_keys, train_output.output_state[0]): finite_val = utils.make_finite(val, replacement=tf.zeros_like(val)) tf.summary.histogram("State/{}".format(key), finite_val, collections=[OPT_SUM_COLLECTION]) tf.summary.scalar("MetaObjective", train_output.metaobj, collections=[OPT_SUM_COLLECTION]) # Per-problem meta-objective tf.summary.scalar(problem_spec.callable.__name__ + "_MetaObjective", train_output.metaobj, collections=[OPT_SUM_COLLECTION]) # create the meta-train_op global_step = tf.Variable(0, name="global_step", trainable=False) meta_parameters = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=OPTIMIZER_SCOPE) # parameter regularization reg_l2 = FLAGS.l2_reg * sum([tf.reduce_sum(param ** 2) for param in meta_parameters]) # compute the meta-gradients meta_opt = tf.train.RMSPropOptimizer(learning_rate, decay=FLAGS.rms_decay, use_locking=True, epsilon=FLAGS.rms_epsilon) grads_and_vars = meta_opt.compute_gradients(train_output.metaobj + reg_l2, meta_parameters) # clip the gradients clipped_grads_and_vars = [] for grad, var in grads_and_vars: clipped_grad = tf.clip_by_value( utils.make_finite(grad, replacement=tf.zeros_like(var)), -gradient_clip, gradient_clip) clipped_grads_and_vars.append((clipped_grad, var)) # histogram summary of grads and vars for grad, var in grads_and_vars: tf.summary.histogram( var.name + "_rawgrad", utils.make_finite( grad, replacement=tf.zeros_like(grad)), collections=[OPT_SUM_COLLECTION]) for grad, var in clipped_grads_and_vars: tf.summary.histogram(var.name + "_var", var, collections=[OPT_SUM_COLLECTION]) tf.summary.histogram(var.name + "_grad", grad, collections=[OPT_SUM_COLLECTION]) # builds the train and summary operations train_op = meta_opt.apply_gradients(clipped_grads_and_vars, global_step=global_step) # only grab summaries defined for LOL, not inside the problem summary_op = tf.summary.merge_all(key=OPT_SUM_COLLECTION) # make sure the state gets propagated after the gradients and summaries # were computed. with tf.control_dependencies([train_op, summary_op]): propagate_loop_state_ops = [] for dest, src in zip( train_output.init_loop_vars, train_output.output_loop_vars): propagate_loop_state_ops.append(dest.assign(src)) propagate_loop_state_op = tf.group(*propagate_loop_state_ops) # create the supervisor sv = tf.train.Supervisor( graph=graph, is_chief=is_chief, logdir=logdir, summary_op=None, save_model_secs=0, # we save checkpoints manually global_step=global_step, ) with sv.managed_session() as sess: init_time = time.time() - problem_start_time out.write("--------- Problem #{} ---------\n".format(problem_itr)) out.write("{callable.__name__}{args}{kwargs}\n".format( **problem_spec.__dict__)) out.write("Took {} seconds to initialize.\n".format(init_time)) out.flush() # For profiling summaries if FLAGS.set_profiling: summary_writer = tf.summary.FileWriter(logdir, graph=sess.graph) # used to store information during training metadata = defaultdict(list) for k in range(num_meta_iterations): if sv.should_stop(): break problem.init_fn(sess) # set run options (for profiling) full_trace_opt = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_options = full_trace_opt if FLAGS.set_profiling else None run_metadata = tf.RunMetadata() if FLAGS.set_profiling else None num_unrolls = num_unroll_func() partial_unroll_iters = [ num_partial_unroll_itrs_func() for _ in xrange(num_unrolls) ] total_num_iter = sum(partial_unroll_iters) objective_weights = [np.ones(num) / float(num) for num in partial_unroll_iters] db = dataset.batch_indices(total_num_iter, batch_size) dataset_batches = [] last_index = 0 for num in partial_unroll_iters: dataset_batches.append(db[last_index:last_index + num]) last_index += num train_start_time = time.time() unroll_itr = 0 additional_log_info = "" for unroll_itr in range(num_unrolls): first_unroll = unroll_itr == 0 if FLAGS.reset_rnn_params: reset_state = first_unroll and k == 0 else: reset_state = first_unroll feed = { train_output.obj_weights: objective_weights[unroll_itr], train_output.batches: dataset_batches[unroll_itr], train_output.first_unroll: first_unroll, train_output.reset_state: reset_state, } # run the train and summary ops # when a "save_diagnostics" flag is turned on fetches_list = [ train_output.metaobj, train_output.problem_objectives, train_output.initial_obj, summary_op, clipped_grads_and_vars, train_op ] if unroll_itr + 1 < num_unrolls: fetches_list += [propagate_loop_state_op] fetched = sess.run(fetches_list, feed_dict=feed, options=run_options, run_metadata=run_metadata) meta_obj = fetched[0] sub_obj = fetched[1] init_obj = fetched[2] summ = fetched[3] meta_grads_and_params = fetched[4] # assert that the subproblem objectives are non-negative # (this is so that we can rescale the objective by the initial value # and not worry about rescaling by a negative value) if np.any(sub_obj < 0): raise ValueError( "Training problem objectives must be nonnegative.") # If the objective has increased more than we want, exit this # training run and start over on another meta iteration. if obj_train_max_multiplier > 0 and ( sub_obj[-1] > (init_obj + abs(init_obj) * (obj_train_max_multiplier - 1))): msg = " Broke early at {} out of {} unrolls. ".format( unroll_itr + 1, num_unrolls) additional_log_info += msg break # only the chief task is allowed to write the summary if is_chief: sv.summary_computed(sess, summ) metadata["subproblem_objs"].append(sub_obj) # store training metadata to pass to the callback metadata["meta_objs"].append(meta_obj) metadata["meta_grads_and_params"].append(meta_grads_and_params) optimization_time = time.time() - train_start_time if FLAGS.set_profiling: summary_name = "%02d_iter%04d_%02d" % (FLAGS.task, problem_itr, k) summary_writer.add_run_metadata(run_metadata, summary_name) metadata["global_step"].append(sess.run(global_step)) metadata["runtimes"].append(optimization_time) # write a diagnostic message to the output args = (k, meta_obj, optimization_time, sum(partial_unroll_iters[:unroll_itr + 1])) out.write(" [{:02}] {}, {} seconds, {} iters ".format(*args)) out.write("(unrolled {} steps)".format( ", ".join([str(s) for s in partial_unroll_iters[:unroll_itr + 1]]))) out.write("{}\n".format(additional_log_info)) out.flush() if FLAGS.set_profiling: summary_writer.close() # force a checkpoint save before we load a new problem # only the chief task has the save_path and can write the checkpoint if is_chief: sv.saver.save(sess, sv.save_path, global_step=global_step) # run the callbacks on the chief if is_chief and callbacks is not None: for callback in callbacks: if hasattr(callback, "__call__"): problem_name = problem_spec.callable.__name__ callback(problem_name, problem_itr, logdir, metadata)
def main(): if a.seed is None: a.seed = random.randint(0, 2**31 - 1) tf.set_random_seed(a.seed) np.random.seed(a.seed) random.seed(a.seed) if not os.path.exists(a.output_dir): os.makedirs(a.output_dir) if a.mode == "test" or a.mode == "export": if a.checkpoint is None: raise Exception("checkpoint required for test mode") # load some options from the checkpoint options = {"which_direction", "ngf", "ndf", "lab_colorization"} with open(os.path.join(a.checkpoint, "options.json")) as f: for key, val in json.loads(f.read()).items(): if key in options: print("loaded", key, "=", val) setattr(a, key, val) # disable these features in test mode a.scale_size = CROP_SIZE a.flip = False for k, v in a._get_kwargs(): print(k, "=", v) with open(os.path.join(a.output_dir, "options.json"), "w") as f: f.write(json.dumps(vars(a), sort_keys=True, indent=4)) if a.mode == "export": # export the generator to a meta graph that can be imported later for standalone generation if a.lab_colorization: raise Exception("export not supported for lab_colorization") input = tf.placeholder(tf.string, shape=[1]) input_data = tf.decode_base64(input[0]) input_image = tf.image.decode_png(input_data) # # remove alpha channel if present # input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 4), lambda: input_image[:,:,:3], lambda: input_image) # # convert grayscale to RGB # input_image = tf.cond(tf.equal(tf.shape(input_image)[2], 1), lambda: tf.image.grayscale_to_rgb(input_image), lambda: input_image) input_image = tf.image.convert_image_dtype(input_image, dtype=tf.float32) input_image.set_shape([CROP_SIZE, CROP_SIZE, 4]) batch_input = tf.expand_dims(input_image, axis=0) with tf.variable_scope("generator"): batch_output = deprocess( create_generator(preprocess(batch_input), 4)) output_image = tf.image.convert_image_dtype(batch_output, dtype=tf.uint8)[0] if a.output_filetype == "png": output_data = tf.image.encode_png(output_image) elif a.output_filetype == "jpeg": output_data = tf.image.encode_jpeg(output_image, quality=80) else: raise Exception("invalid filetype") output = tf.convert_to_tensor([tf.encode_base64(output_data)]) key = tf.placeholder(tf.string, shape=[1]) inputs = {"key": key.name, "input": input.name} tf.add_to_collection("inputs", json.dumps(inputs)) outputs = { "key": tf.identity(key).name, "output": output.name, } tf.add_to_collection("outputs", json.dumps(outputs)) init_op = tf.global_variables_initializer() restore_saver = tf.train.Saver() export_saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) print("loading model from checkpoint") checkpoint = tf.train.latest_checkpoint(a.checkpoint) restore_saver.restore(sess, checkpoint) print("exporting model") export_saver.export_meta_graph( filename=os.path.join(a.output_dir, "export.meta")) export_saver.save(sess, os.path.join(a.output_dir, "export"), write_meta_graph=False) return examples = load_examples() print("examples count = %d" % examples.count) # inputs and targets are [batch_size, height, width, channels] model = create_model(examples.inputs, examples.targets) inputs = deprocess(examples.inputs) targets = deprocess(examples.targets) outputs = deprocess(model.outputs) print(inputs.shape) print(targets.shape) print(outputs.shape) def convert(image): if a.aspect_ratio != 1.0: # upscale to correct aspect ratio size = [CROP_SIZE, int(round(CROP_SIZE * a.aspect_ratio))] image = tf.image.resize_images( image, size=size, method=tf.image.ResizeMethod.BICUBIC) return tf.image.convert_image_dtype(image, dtype=tf.uint8, saturate=True) # reverse any processing on images so they can be written to disk or displayed to user with tf.name_scope("convert_inputs"): converted_inputs = convert(inputs) with tf.name_scope("convert_targets"): converted_targets = convert(targets) with tf.name_scope("convert_outputs"): converted_outputs = convert(outputs) with tf.name_scope("encode_images"): display_fetches = { "paths": examples.paths, "inputs": tf.map_fn(tf.image.encode_png, converted_inputs, dtype=tf.string, name="input_pngs"), "targets": tf.map_fn(tf.image.encode_png, converted_targets, dtype=tf.string, name="target_pngs"), "outputs": tf.map_fn(tf.image.encode_png, converted_outputs, dtype=tf.string, name="output_pngs"), } # summaries with tf.name_scope("inputs_summary"): tf.summary.image("inputs", converted_inputs) with tf.name_scope("targets_summary"): tf.summary.image("targets", converted_targets) with tf.name_scope("outputs_summary"): tf.summary.image("outputs", converted_outputs) with tf.name_scope("predict_real_summary"): tf.summary.image( "predict_real", tf.image.convert_image_dtype(model.predict_real, dtype=tf.uint8)) with tf.name_scope("predict_fake_summary"): tf.summary.image( "predict_fake", tf.image.convert_image_dtype(model.predict_fake, dtype=tf.uint8)) tf.summary.scalar("discriminator_loss", model.discrim_loss) tf.summary.scalar("generator_loss_GAN", model.gen_loss_GAN) tf.summary.scalar("generator_loss_L1", model.gen_loss_L1) for var in tf.trainable_variables(): tf.summary.histogram(var.op.name + "/values", var) for grad, var in model.discrim_grads_and_vars + model.gen_grads_and_vars: tf.summary.histogram(var.op.name + "/gradients", grad) with tf.name_scope("parameter_count"): parameter_count = tf.reduce_sum( [tf.reduce_prod(tf.shape(v)) for v in tf.trainable_variables()]) saver = tf.train.Saver(max_to_keep=1) logdir = a.output_dir if (a.trace_freq > 0 or a.summary_freq > 0) else None sv = tf.train.Supervisor(logdir=logdir, save_summaries_secs=0, saver=None) with sv.managed_session() as sess: print("parameter_count =", sess.run(parameter_count)) if a.checkpoint is not None: print("loading model from checkpoint") checkpoint = tf.train.latest_checkpoint(a.checkpoint) saver.restore(sess, checkpoint) max_steps = 2**32 if a.max_epochs is not None: max_steps = examples.steps_per_epoch * a.max_epochs if a.max_steps is not None: max_steps = a.max_steps if a.mode == "test": # testing # at most, process the test data once start = time.time() max_steps = min(examples.steps_per_epoch, max_steps) for step in range(max_steps): results = sess.run(display_fetches) filesets = save_images(results) for i, f in enumerate(filesets): print("evaluated image", f["name"]) index_path = append_index(filesets) print("wrote index at", index_path) print("rate", (time.time() - start) / max_steps) else: # training start = time.time() for step in range(max_steps): def should(freq): return freq > 0 and ((step + 1) % freq == 0 or step == max_steps - 1) options = None run_metadata = None if should(a.trace_freq): options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() fetches = { "train": model.train, "global_step": sv.global_step, } if should(a.progress_freq): fetches["discrim_loss"] = model.discrim_loss fetches["gen_loss_GAN"] = model.gen_loss_GAN fetches["gen_loss_L1"] = model.gen_loss_L1 if should(a.summary_freq): fetches["summary"] = sv.summary_op if should(a.display_freq): fetches["display"] = display_fetches results = sess.run(fetches, options=options, run_metadata=run_metadata) if should(a.summary_freq): print("recording summary") sv.summary_writer.add_summary(results["summary"], results["global_step"]) if should(a.display_freq): print("saving display images") filesets = save_images(results["display"], step=results["global_step"]) append_index(filesets, step=True) if should(a.trace_freq): print("recording trace") sv.summary_writer.add_run_metadata( run_metadata, "step_%d" % results["global_step"]) if should(a.progress_freq): # global_step will have the correct step count if we resume from a checkpoint train_epoch = math.ceil(results["global_step"] / examples.steps_per_epoch) train_step = (results["global_step"] - 1) % examples.steps_per_epoch + 1 rate = (step + 1) * a.batch_size / (time.time() - start) remaining = (max_steps - step) * a.batch_size / rate print( "progress epoch %d step %d image/sec %0.1f remaining %dm" % (train_epoch, train_step, rate, remaining / 60)) print("discrim_loss", results["discrim_loss"]) print("gen_loss_GAN", results["gen_loss_GAN"]) print("gen_loss_L1", results["gen_loss_L1"]) if should(a.save_freq): print("saving model") saver.save(sess, os.path.join(a.output_dir, "model"), global_step=sv.global_step) if sv.should_stop(): break
def train(self, session, X_train, Y_train, checkpoint_every=1000, log_dir = 'log', display_step=5, verbose=True): """ Training the network :param X_train: features matrix :type 2-D Numpy array of float values :param Y_train: one-hot encoded labels matrix :type 2-D Numpy array of int values :param checkpoint_every: RNN model checkpoint frequency :type int :param log_dir: TensorBoard log directory :type string :param display_step: number of traing epochs executed before logging messages :type int :param verbose: display log mesages on screen at each training epoch :type boolean :returns: Cost history of each training epoch and the training Perplexity :rtype float, float :raises: - """ print("\nTraining the network...\n") epoch_cost=0 epoch_accuracy=0 epoch_recall=0 epoch_precison=0 epoch_iteration=0 winner_accuracy=0 winner_recall=0 winner_since=0 current_epoch=0 current_iteration=0 done = False cost_history = np.empty(shape=[1], dtype=float) perplexity_history = np.empty(shape=[1], dtype=float) accuracy_history = np.empty(shape=[1], dtype=float) recall_history = np.empty(shape=[1], dtype=float) precision_history = np.empty(shape=[1], dtype=float) try: #with tf.Session() as session: # Merge all the summaries and write them out self.summary = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(log_dir,'train', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) test_writer = tf.summary.FileWriter(os.path.join(log_dir,'test', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) writer = tf.summary.FileWriter(os.path.join(log_dir, datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) writer.add_graph(session.graph) session.run(self.initializer) saver = tf.train.Saver(tf.global_variables()) for epoch_idx, epoch in enumerate( self._get_epochs( self.training_parameters.training_epochs, X_train, Y_train)): current_epoch = epoch_idx avg_cost = 0. #training_state = np.zeros((batch_size, state_size)) current_iteration = 0 done = False for batch_step, (batch_x, batch_y) in enumerate(epoch): current_iteration = batch_step if epoch_idx % 100 == 99 and batch_step == 0: # Record execution stats run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() # Run optimization op (backprop) and cost op (to get loss value) _summary, _train_step, _cost, _prediction_series, _label_predictions, _unshaped_predictions = session.run( [self.summary, self.train_step, self.cost, self.smoothed_predictions, self.label_predictions, self.logits], feed_dict={ self.inputs:batch_x, self.targets:batch_y, self.learning_rate : self.model_parameters.learning_rate, self.momentum : self.model_parameters.momentum, self.input_keep_probability : self.model_parameters.input_keep_probability, self.output_keep_probability : self.model_parameters.output_keep_probability, self.is_training : True}, options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % epoch_idx) train_writer.add_summary(_summary, batch_step) else: # Run optimization op (backprop) and cost op (to get loss value) _summary, _train_step, _cost, _prediction_series, _label_predictions, _unshaped_predictions = session.run( [self.summary, self.train_step, self.cost, self.smoothed_predictions, self.label_predictions, self.logits], feed_dict={ self.inputs:batch_x, self.targets:batch_y, self.learning_rate : self.model_parameters.learning_rate, self.momentum : self.model_parameters.momentum, self.input_keep_probability : self.model_parameters.input_keep_probability, self.output_keep_probability : self.model_parameters.output_keep_probability, self.is_training : True}) train_writer.add_summary(_summary, batch_step) # Compute average loss avg_cost += _cost / self.model_parameters.batch_size tf.summary.scalar('train_loss', avg_cost) if (epoch_idx * self.model_parameters.batch_size + batch_step) % checkpoint_every == 0 or ( epoch_idx == self.training_parameters.training_epochs-1 and batch_step == self.model_parameters.batch_size-1): # Save for the last result checkpoint_path = os.path.join(self.directories.checkpoint_linear_dir, 'model.ckpt') saver.save(session, checkpoint_path, global_step=epoch_idx * self.model_parameters.batch_size + batch_step) print("model saved to {}".format(checkpoint_path)) epoch_cost += _cost epoch_iteration += self.model_parameters.batch_size # Display logs per epoch step if epoch_idx % display_step == 0: if verbose and not done: # Calculate batch accuracy epoch_accuracy, epoch_recall, epoch_update_op_recall, epoch_precision, epoch_update_op_precision, _threshold = session.run( [self.accuracy, self.recall, self.update_op_recall, self.precision, self.update_op_precision, self.decision_threshold], feed_dict= { self.inputs: batch_x, self.targets: batch_y, self.is_training : True}) time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') done = True test_writer.add_summary(_summary, epoch_idx) print(str(time), ": Epoch:", '%04d' % (epoch_idx), "cost=", "{:.9f}".format(avg_cost), ", Accuracy= ", "{:.5f}".format(epoch_accuracy), ", Recall= ", "{:.5f}".format(epoch_recall), ", Precision= ", "{:.5f}".format(epoch_precision), ", Threshold= ", _threshold) cost_history = np.append(cost_history,avg_cost) # Epoch cost accuracy_history = np.append(accuracy_history,epoch_accuracy) # Epoch accuracy recall_history = np.append(recall_history,epoch_recall) precision_history = np.append(precision_history,epoch_precision) if (winner_recall<epoch_recall): winner_recall=epoch_recall winner_since=0 else: winner_since=winner_since+1 if (winner_since>=20): raise Exception('No Recall improvements since 20 epochs ... Force stopping!') except KeyboardInterrupt: pass except Exception as error: train_writer.close() print('Early stopping mechanism enabled ...') print(error) pass print("Stop training at epoch %d, iteration %d" % (current_epoch, current_iteration), ", Accuracy= ", "{:.5f}".format(epoch_accuracy), ", Recall= ", "{:.5f}".format(epoch_recall), ", Precision= ", "{:.5f}".format(epoch_precision)) # "Perplexity=", "{:.4f}".format(self.perplexity(epoch_cost, epoch_iteration)), #logger.info("Stop training at epoch %d, iteration %d" % (current_epoch, current_iteration)) #summary.close() #logger.info("Saved model in %s " % self.directories.checkpoint_linear_dir) print("Optimization Finished!") # self.plot_training_losses(cost_history) # try: # self.plot_training_losses(cost_history, current_epoch) # print("Training losses plotted in plot folder") # self.plot_metrics(accuracy_history, current_epoch) # print("Training metrics plotted in plot folder") # except Exception as e: # print("ERROR Exception while plotting !") # print(e) # pass return cost_history, epoch_accuracy, epoch_update_op_recall, epoch_update_op_precision
def train(dataset): #sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) """Train on dataset for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. tf.set_random_seed(time.time()) tf.set_random_seed(198918) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bits_ph = [] for i in range(31): bits_ph.append(tf.placeholder(tf.int32)) nm = norm_monitor.norm_monitor(FLAGS.digits, len(bits_ph), FLAGS.rel_res, FLAGS.interval, FLAGS.stride) if FLAGS.layerinfo_file: assert tf.gfile.Exists(FLAGS.layerinfo_file) tmp = pickle.load(open(FLAGS.layerinfo_file, 'rb')) nm.set_layerinfo(tmp[-1]) print("Restore layerinfo") print(nm.get_layerinfo()) #print(nm.get_layerinfo()) # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples_per_epoch() / FLAGS.batch_size) decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay) print("num_batches_per_epoch: {}".format(num_batches_per_epoch)) print("use bitpack: {}".format(FLAGS.use_bitpack)) print("learning rate: {}".format(FLAGS.initial_learning_rate)) print("produce trace: {}".format(FLAGS.profile)) print("digits: {}".format(FLAGS.digits)) print("rel_res: {}".format(FLAGS.rel_res)) print("interval: {}".format(FLAGS.interval)) print("stride: {}".format(FLAGS.stride)) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) # Get images and labels for ImageNet and split the batch across GPUs. assert FLAGS.batch_size % FLAGS.num_gpus == 0, ( 'Batch size must be divisible by number of GPUs') split_batch_size = int(FLAGS.batch_size / FLAGS.num_gpus) # Override the number of preprocessing threads to account for the increased # number of GPU towers. num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus images, labels = image_processing.distorted_inputs( dataset, num_preprocess_threads=num_preprocess_threads) input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 # Split the batch of images and labels for towers. images_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=images) labels_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=labels) # Calculate the gradients for each model tower. tower_norms = [] tower_grads = [] tower_preds_1 = [] tower_preds_5 = [] tower_losses = [] reuse_variables = None for i in range(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope: # Force all Variables to reside on the CPU. # Calculate the loss for one tower of the ImageNet model. This # function constructs the entire ImageNet model but shares the # variables across all towers. #print(images_splits[i]) #print(labels_splits[i]) loss, norms, logits_split = _tower_loss( images_splits[i], labels_splits[i], num_classes, scope, reuse_variables, bits_ph) top_1_correct = tf.nn.in_top_k(logits_split, labels_splits[i], 1) top_5_correct = tf.nn.in_top_k(logits_split, labels_splits[i], 5) # Reuse variables for the next tower. reuse_variables = True # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Retain the Batch Normalization updates operations only from the # final tower. Ideally, we should grab the updates from all towers # but these stats accumulate extremely fast so we can ignore the # other stats from the other towers without significant detriment. #batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION, scope) batchnorm_updates = tf.get_collection( tf.GraphKeys.UPDATE_OPS) # Calculate the gradients for the batch of data on this ImageNet # tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) tower_norms.append(norms) tower_preds_1.append( tf.reduce_sum(tf.cast(top_1_correct, tf.int32))) tower_preds_5.append( tf.reduce_sum(tf.cast(top_5_correct, tf.int32))) tower_losses.append(loss) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = _average_gradients(tower_grads) top_1_sum = tf.add_n(tower_preds_1) top_5_sum = tf.add_n(tower_preds_5) losses_sum = tf.add_n(tower_losses) # Add a summaries for the input processing and global_step. summaries.extend(input_summaries) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. # Note that we maintain a "double-average" of the BatchNormalization # global statistics. This is more complicated then need be but we employ # this for backward-compatibility with our previous models. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY, global_step) # Another possibility is to use tf.slim.get_variables(). variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variables_to_average) # Group all updates to into a single train op. batchnorm_updates_op = tf.group(*batchnorm_updates) train_op = tf.group(apply_gradient_op, variables_averages_op, batchnorm_updates_op) # Create a saver. saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge(summaries) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) if FLAGS.pretrained_model_checkpoint_path: assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path) #variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE) restorer = tf.train.Saver(tf.global_variables(), max_to_keep=100) restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path) print('%s: Pre-trained model restored from %s' % (datetime.now(), FLAGS.pretrained_model_checkpoint_path)) for v in tf.all_variables(): print("%s %s %s %s" % (v.name, v.get_shape(), v.dtype, v.device)) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph) bits_dict = dict() #run_metadata = tf.RunMetadata() elapse = [] #gweights = [] glayerinfo = [] #wnp_name = 'weights_norm_{}_{}_{}_{}_{}_{}_{}.dat'.format(9, 2048, 0, FLAGS.digits, FLAGS.stride, FLAGS.interval, FLAGS.use_bitpack) lip_name = 'layerinfo_{}_{}_{}_{}_{}_{}_{}.dat'.format( 9, 4096, 0, FLAGS.digits, FLAGS.stride, FLAGS.interval, FLAGS.use_bitpack) for step in range(FLAGS.max_steps): run_metadata = tf.RunMetadata() start_time = time.time() info = nm.get_layerinfo() for i, bits in enumerate(bits_ph): bits_dict[bits] = info[i][0] if FLAGS.profile is False: _, loss_value, norms, top_1, top_5 = sess.run( [train_op, losses_sum, tower_norms, top_1_sum, top_5_sum], feed_dict=bits_dict) else: _, loss_value, norms = sess.run( [train_op, loss, tower_norms], feed_dict=bits_dict, options=tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) top_1 = 5 top_5 = 25 nm.adjust_digits(norms) duration = time.time() - start_time #gweights.append(norms) #glayerinfo.append(copy.deepcopy(nm.get_layerinfo())) elapse.append(duration) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: glayerinfo.append(copy.deepcopy(nm.get_layerinfo())) # Print layerinfo print(info) examples_per_sec = FLAGS.batch_size / float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch) elapse %.5f s top_1 %.5f top_5 %.5f') pred_1 = top_1 / (FLAGS.batch_size * FLAGS.num_gpus) pred_5 = top_5 / (FLAGS.batch_size * FLAGS.num_gpus) print(format_str % (datetime.now(), step, loss_value, examples_per_sec, duration, sum(elapse), pred_1, pred_5)) sys.stdout.flush() tl = timeline.Timeline(run_metadata.step_stats) if FLAGS.profile is True: if FLAGS.use_bitpack is False: trace_file = tf.gfile.Open(name='timeline%03d.json' % step, mode='w') else: trace_file = tf.gfile.Open( name='bitpack_timeline%03d.json' % step, mode='w') trace_file.write( tl.generate_chrome_trace_format(show_memory=True)) if step % 100 == 0: summary_str = sess.run(summary_op, feed_dict=bits_dict) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 4000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) glayerinfo.append(copy.deepcopy(nm.get_layerinfo())) #pickle.dump(gweights, open(wnp_name,'wb')) pickle.dump(glayerinfo, open(lip_name, 'wb'))
def _train_step(self, learning_rate, cliprange, obs, returns, masks, actions, values, neglogpacs, update, writer, states=None, cliprange_vf=None): """ Training of PPO2 Algorithm :param learning_rate: (float) learning rate :param cliprange: (float) Clipping factor :param obs: (np.ndarray) The current observation of the environment :param returns: (np.ndarray) the rewards :param masks: (np.ndarray) The last masks for done episodes (used in recurent policies) :param actions: (np.ndarray) the actions :param values: (np.ndarray) the values :param neglogpacs: (np.ndarray) Negative Log-likelihood probability of Actions :param update: (int) the current step iteration :param writer: (TensorFlow Summary.writer) the writer for tensorboard :param states: (np.ndarray) For recurrent policies, the internal state of the recurrent model :return: policy gradient loss, value function loss, policy entropy, approximation of kl divergence, updated clipping range, training update operation :param cliprange_vf: (float) Clipping factor for the value function """ advs = returns - values advs = (advs - advs.mean()) / (advs.std() + 1e-8) td_map = { self.train_model.obs_ph: obs, self.action_ph: actions, self.advs_ph: advs, self.rewards_ph: returns, self.learning_rate_ph: learning_rate, self.clip_range_ph: cliprange, self.old_neglog_pac_ph: neglogpacs, self.old_vpred_ph: values } if states is not None: td_map[self.train_model.states_ph] = states td_map[self.train_model.dones_ph] = masks if cliprange_vf is not None and cliprange_vf >= 0: td_map[self.clip_range_vf_ph] = cliprange_vf if states is None: update_fac = self.n_batch // self.nminibatches // self.noptepochs + 1 else: update_fac = self.n_batch // self.nminibatches // self.noptepochs // self.n_steps + 1 if writer is not None: # run loss backprop with summary, but once every 10 runs save the metadata (memory, compute time, ...) if self.full_tensorboard_log and (1 + update) % 10 == 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, policy_loss, value_loss, policy_entropy, approxkl, clipfrac, bss_loss, l2_loss, _ = self.sess.run( [ self.summary, self.pg_loss, self.vf_loss, self.entropy, self.approxkl, self.clipfrac, self.bss_loss, self.l2_loss, self._train ], td_map, options=run_options, run_metadata=run_metadata) writer.add_run_metadata(run_metadata, 'step%d' % (update * update_fac)) else: summary, policy_loss, value_loss, policy_entropy, approxkl, clipfrac, bss_loss, l2_loss, _ = self.sess.run( [ self.summary, self.pg_loss, self.vf_loss, self.entropy, self.approxkl, self.clipfrac, self.bss_loss, self.l2_loss, self._train ], td_map) writer.add_summary(summary, (update * update_fac)) else: policy_loss, value_loss, policy_entropy, approxkl, clipfrac, bss_loss, l2_loss, _ = self.sess.run( [ self.pg_loss, self.vf_loss, self.entropy, self.approxkl, self.clipfrac, self.bss_loss, self.l2_loss, self._train ], td_map) return policy_loss, value_loss, policy_entropy, approxkl, clipfrac, bss_loss, l2_loss
def train_model(self, sess, max_iters): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) #RoIDataLayer(roidb, num_classes) # RPN # classification loss (只用256个求loss,128 fg, 128 bg) rpn_cls_score = tf.reshape(self.net.get_output('rpn_cls_score_reshape'),[-1,2]) # return dect(inputs)['rpn_cls_score_reshape'] # rpn_cls_score_reshape: [1, 126(9*14),14,2]; output: [1764(9*14*14), 2] # 9: num of anchors rpn_label = tf.reshape(self.net.get_output('rpn-data')[0],[-1]) """ rpn_labels:(1,1,14*9,14) elem: 1,0,-1(sum:14*14*9) including 128(1)(fg_anchors),128(0)(bg_anchors)(how to choose is in paper),left are -1(random choice of 256 for eliminiting biases) 256 of inside of them is the after-choose where the value 1 represent fg_anchors and the value 0 represent bg_anchors; the rest of them is -1,which will be not considered how to choose: anchor交叠大于0.7某个阈值为1,交叠小于0.5为0,多了的随机选256个 rpn_bbox_targets: 1, 9*4, 14, 14 elem: x move of center, y move of center, width transform , height transform(anchors relative to gt) (only the inside boxes,but almost the same size as all anchors), the rest of boxes are [0 0 0 0] rpn_bbox_inside_weights: 1, 9*4, 14, 14 elem: the inside anchors are:[1,1,1,1] for left fg_anchors(labels == 1, around 128 个) (only the inside boxes), the rest of boxes are [0 0 0 0] rpn_bbox_outside_weights: 1, 9*4, 14, 14 elem: the inside anchors are:[1/128,1/128,1/128,1/128] for left fg_bg_anchors(labels == 1 or 0, 256个) (only the inside boxes), the rest of boxes are [0 0 0 0] """ #return (14*14*9,) rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score,tf.where(tf.not_equal(rpn_label,-1))),[-1,2]) #tf.not_equal: 返回逐个元素的布尔值; tf.where: 找出tensor里所有True值的index; # tf.gather(params, indices, name = None): 根据indices索引,从params中取对应索引的值,然后返回 # find the rows in rpn_cls_score whose indexes are the indexes of the after-choose(1 and 0) anchors rpn_label = tf.reshape(tf.gather(rpn_label,tf.where(tf.not_equal(rpn_label,-1))),[-1]) # find the rows in rpn_label which is useful #***********************对fg_bg_anchors(around 258)和索引对应的预测的框预测有没有物体的预测求损失*********************** rpn_cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label)) """第一个参数logits:就是神经网络最后一层的输出,如果有batch的话,它的大小就是[batchsize,num_classes],单样本的话,大小就是num_classes 第二个参数labels:实际的标签,大小同上 then: mean_value. """ # bounding box regression L1 loss(只用128个fg box求loss) rpn_bbox_pred = self.net.get_output('rpn_bbox_pred') # rpn_bbox_pred:14*14*36 (9 anchors * 4) [1,14,14,36] rpn_bbox_targets = tf.transpose(self.net.get_output('rpn-data')[1],[0,2,3,1]) # rpn-data[1]: same as former-----[1,14,14,36] rpn_bbox_inside_weights = tf.transpose(self.net.get_output('rpn-data')[2],[0,2,3,1]) # rpn-data[1]: same as former-----[1,14,14,36] rpn_bbox_outside_weights = tf.transpose(self.net.get_output('rpn-data')[3],[0,2,3,1]) rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights) # **********************只对fg_anchors(around128)和索引对应的预测的框坐标求损失。************************* rpn_loss_box = tf.reduce_mean(tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3])) #reduce_sum() 就是求和,由于求和的对象是tensor,所以是沿着tensor的某些维度求和。reduction_indices是指沿tensor的哪些维度求和。 """ 在这个过程中,我们先选出128个和gt-box交叠很大的anchors,在选出128个交叠很小的anchors,并求出他们分别的(dx,dy,dw,dh)和有无物体。 然后神经网络前向传播的得到256 anchors 对应的index的结果,求出(dx,dy,dw,dh)和有无物体的损失,并梯度下降 经过多次rpn损失函数的梯度下降过程之后,神经网络就会趋向于,在一张陌生图片的fg物体所最大对应的(我们多次利用这里的anchor box) 还有其中的最大交叠的一个anchor对应的rpn-bbox-pred中预测出正确的(相对于此anchor的dx,dy,dw,dh),rpn-cls-score-reshape预测出正确的有无物体 这里rpn-bbox-pred和rpn-cls-score-reshape对应着一个相对一个anchor的一个框。 应该会产生很多这样的预测效果良好的框,下面再进行筛选 """ # R-CNN classification loss (只对128(42fg, 128-42bg)个求类别的损失) cls_score = self.net.get_output('cls_score') # (num of final left proposals(128), 21) label = tf.reshape(self.net.get_output('roi-data')[1],[-1]) """ (1) rois: (num of finally left proposal(根据每个roi和gtbox的overlaps大小来确定留下的fg和bg,多了的随机选择) blob[:,0]=0; blob[:-2,1:5] = x1,y1,x2,y2(pred box in original image); blob[-2:,1:5] = x1,y1,x2,y2(gt_box) [0:fg_rois_per_this_image(大约42)]: the left foregound; [fg_rois_per_this_image(128-42):]:the left background (2) labels: the final classes of the ground truth correspounding to per pred box [num of finally left proposal(128),] for ex: [9,15,15,15,9,9....] (3): num of finally left proposals(128) * 4*21: [dx,dy,dw,dh](gt_boxes相对于rois) of 1 class (4): num of finally left proposals * 4*21: [1, 1, 1, 1] of 1 class (5): num of finally left proposals * 4*21: [true,true,true,true] of 1 class in 21; [false, false, false, false] in left of the classes """ #the final classes of the ground truth correspounding to per pred box [num of finally left proposal(128个),1] #for ex: [[9],[15],[15],[15],[9],[9]....] cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)) # cls_score: (n) # bounding box regression L1 loss bbox_pred = self.net.get_output('bbox_pred') #(num of final left proposals(128), 21*4) bbox_targets = self.net.get_output('roi-data')[2] # num of finally left proposals(128) * 4*21: [dx,dy,dw,dh](gt_boxes相对于rois) of 1 class bbox_inside_weights = self.net.get_output('roi-data')[3] # num of finally left proposals * 4*21: [1, 1, 1, 1] of 1 class in 21 classes bbox_outside_weights = self.net.get_output('roi-data')[4] # num of finally left proposals * 4*21: [true,true,true,true] of 1 class in 21; [false, false, false, false] in left of the classes smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) loss_box = tf.reduce_mean(tf.reduce_sum(smooth_l1, reduction_indices=[1])) # final loss loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box # optimizer and learning rate global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, cfg.TRAIN.STEPSIZE, 0.1, staircase=True) # decayed_learning_rate=learining_rate*decay_rate^(global_step/decay_steps) #decay_rate: 0.1 momentum = cfg.TRAIN.MOMENTUM train_op = tf.train.MomentumOptimizer(lr, momentum).minimize(loss, global_step=global_step) # iintialize variables sess.run(tf.global_variables_initializer()) if self.pretrained_model is not None: print ('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model) self.net.load(self.pretrained_model, sess, self.saver, True) last_snapshot_iter = -1 timer = Timer() for iter in range(max_iters): # 70000 大约一共有10000张图片 # get one batch blobs = data_layer.forward() """ blobs['data']: [1,maxL,maxH,3] blobs['gt_boxes']: [[11,22,33,44, 16] [22,33,44,55, 8] ]boxes +classes blobs['im_info'] [[max_length, max_width, im_scale]] im_scale: 缩放比例 """ # Make one SGD update feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, \ self.net.gt_boxes: blobs['gt_boxes']} # self.net.data: shape=[None, None, None, 3] run_options = None run_metadata = None if cfg.TRAIN.DEBUG_TIMELINE: #false run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, _ = sess.run([rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, train_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if cfg.TRAIN.DEBUG_TIMELINE: #false trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if (iter+1) % (cfg.TRAIN.DISPLAY) == 0: # cfg.TRAIN.DISPLAY = 10 print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()) print 'speed: {:.3f}s / iter'.format(timer.average_time) if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: # SNAPSHOT_ITERS = 5000 last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)
def add_to_queue(session, queue_operation, coordinator, normed_outputs, pix_values, sn_range, interp_sn, y_off_range, fetch_size, randomize, preprocess, queuetype): """basically a wrapper function that takes in fluxes and raw params and adds a random preprocessed example to queue INPUTS session: tensorflow session queue operation: operation to queue... coordinator: tensorflow thread coordinator known_outputs: output from read_known_binary pixel_values: output from read_known_binary sn_range: sn_range to sample y_offset_range: continuum error range to sample randomize: randomly draw fetch_size examples if true, if not true then draw the first fetch_size examples OUTPUTS N/A, this just wraps the preprocessing and enqueue ops into a threadable function """ while not coordinator.should_stop(): np.random.seed() num_stars = np.size(pix_values[:, 0]) if randomize: select_star = np.random.randint(0, num_stars - 1, size=(fetch_size, 1)) if not randomize: select_star = np.arange(0, fetch_size) select_star = np.reshape(select_star, (fetch_size, 1)) fluxes = pix_values[select_star[:, 0], :] if preprocess: sn_values = np.random.uniform(sn_range[0], sn_range[1], size=(fetch_size, 1)) y_offsets = np.random.uniform(y_off_range[0], y_off_range[1], size=(fetch_size, 1)) # strip off star num, dummy param info known = normed_outputs[select_star[:, 0], 1:-1] proc_fluxes = preprocess_spectra(fluxes, interp_sn, sn_values, y_offsets) else: known = normed_outputs[select_star[:, 0], 1:-1] proc_fluxes = fluxes if queuetype == 'train_q': try: session.run(queue_operation, feed_dict={ image_data: proc_fluxes, known_params: known }) except tf.errors.CancelledError: if randomize: print('Input queue closed, exiting training') if queuetype == 'xval_q': xval_enqueued = session.run(xval_queue.size(), feed_dict={ xval_data: proc_fluxes, xval_params: known }) if xval_enqueued <= int(parameters['MAX_PP_DEPTH']) - int( parameters['XVAL_SIZE']): try: session.run(queue_operation, feed_dict={ xval_data: proc_fluxes, xval_params: known }, options=tf.RunOptions(timeout_in_ms=5000)) except tf.errors.DeadlineExceededError: sizes = session.run(xval_queue.size(), feed_dict={ xval_data: proc_fluxes, xval_params: known }) print( 'Cross-validation enqueue error, current queue size: ' + str(sizes)) except tf.errors.CancelledError: if randomize: print('Input queue closed, exiting training')
def train(config): Model_cls = HandwritingVRNNGmmModel Dataset_cls = HandWritingDatasetConditionalTF # Dataset training_dataset = Dataset_cls(config['training_data'], use_bow_labels=config['use_bow_labels']) num_training_iterations = int(training_dataset.num_samples / config['batch_size']) print("# training steps per epoch: " + str(num_training_iterations)) # Create a tensorflow sub-graph that loads batches of samples. if config.get('use_bucket_feeder', True) and training_dataset.is_dynamic: bucket_edges = training_dataset.get_seq_len_histogram( num_bins=15, collapse_first_and_last_bins=[2, -2]) data_feeder = DataFeederTF(training_dataset, config['num_epochs'], config['batch_size'], queue_capacity=1024) sequence_length, inputs, targets = data_feeder.batch_queue_bucket( bucket_edges, dynamic_pad=training_dataset.is_dynamic, queue_capacity=300, queue_threads=4) else: # Training data data_feeder = DataFeederTF(training_dataset, config['num_epochs'], config['batch_size'], queue_capacity=1024) sequence_length, inputs, targets = data_feeder.batch_queue( dynamic_pad=training_dataset.is_dynamic, queue_capacity=512, queue_threads=4) if config.get('use_staging_area', False): staging_area = TFStagingArea([sequence_length, inputs, targets], device_name="/gpu:0") sequence_length, inputs, targets = staging_area.tensors # Create step counter (used by optimization routine and learning rate function.) global_step = tf.get_variable(name='global_step', trainable=False, initializer=1) # Annealing KL-divergence loss. kld_loss_weight_backup = config['loss_weights']['kld_loss'] if type(config['loss_weights']['kld_loss']) == np.ndarray: # Create a piecewise increasing kld loss weight. num_steps = len(config['loss_weights']['kld_loss']) values = np.linspace(0, 1, num_steps + 1).tolist() boundaries = (config['loss_weights']['kld_loss'] * num_training_iterations).tolist() config['loss_weights']['kld_loss'] = tf.train.piecewise_constant( global_step, boundaries=boundaries, values=values) tf.summary.scalar('training/kld_loss_weight', config['loss_weights']['kld_loss'], collections=["training_status"]) # Create training graph. with tf.name_scope("training"): model = Model_cls(config, reuse=False, input_op=inputs, target_op=targets, input_seq_length_op=sequence_length, input_dims=training_dataset.input_dims, target_dims=training_dataset.target_dims, mode="training", data_processor=training_dataset) model.build_graph() model.create_image_summary(training_dataset.prepare_for_visualization) # Create sampling graph. with tf.name_scope("sampling"): sampling_input_op = tf.placeholder( tf.float32, shape=[ 1, training_dataset.sequence_length, sum(training_dataset.input_dims) ]) sampling_sequence_length_op = tf.placeholder(tf.int32, shape=[1]) sampling_model = Model_cls( config, reuse=True, input_op=sampling_input_op, target_op=None, input_seq_length_op=sampling_sequence_length_op, input_dims=training_dataset.input_dims, target_dims=training_dataset.target_dims, batch_size=1, mode="sampling", data_processor=training_dataset) sampling_model.build_graph() sampling_model.create_image_summary( training_dataset.prepare_for_visualization) # Validation model. if config.get('validate_model', False): validation_dataset = Dataset_cls( config['validation_data'], use_bow_labels=config['use_bow_labels']) num_validation_iterations = int(validation_dataset.num_samples / config['batch_size']) print("# validation steps per epoch: " + str(num_validation_iterations)) valid_data_feeder = DataFeederTF(validation_dataset, config['num_epochs'], config['batch_size'], queue_capacity=1024, shuffle=False) valid_sequence_length, valid_inputs, valid_targets = valid_data_feeder.batch_queue( dynamic_pad=validation_dataset.is_dynamic, queue_capacity=512, queue_threads=4) if 'use_staging_area' in config and config['use_staging_area']: valid_staging_area = TFStagingArea( [valid_sequence_length, valid_inputs, valid_targets], device_name="/gpu:0") valid_sequence_length, valid_inputs, valid_targets = valid_staging_area.tensors with tf.name_scope("validation"): valid_model = Model_cls(config, reuse=True, input_op=valid_inputs, target_op=valid_targets, input_seq_length_op=valid_sequence_length, input_dims=validation_dataset.input_dims, target_dims=validation_dataset.target_dims, mode="training", data_processor=validation_dataset) valid_model.build_graph() # Create a session object and initialize parameters. gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True)) if config['learning_rate_type'] == 'exponential': learning_rate = tf.train.exponential_decay( config['learning_rate'], global_step=global_step, decay_steps=config['learning_rate_decay_steps'], decay_rate=config['learning_rate_decay_rate'], staircase=False) tf.summary.scalar('training/learning_rate', learning_rate, collections=["training_status"]) elif config['learning_rate_type'] == 'fixed': learning_rate = config['learning_rate'] else: raise Exception("Invalid learning rate type") optimizer = tf.train.AdamOptimizer(learning_rate) # Gradient clipping and a sanity check. grads = list( zip(tf.gradients(model.loss, tf.trainable_variables()), tf.trainable_variables())) grads_clipped = [] with tf.name_scope("grad_clipping"): for grad, var in grads: if config['grad_clip_by_norm'] > 0: grads_clipped.append( (tf.clip_by_norm(grad, config['grad_clip_by_norm']), var)) elif config['grad_clip_by_value'] > 0: grads_clipped.append( (tf.clip_by_value(grad, -config['grad_clip_by_value'], -config['grad_clip_by_value']), var)) else: grads_clipped.append((grad, var)) train_op = optimizer.apply_gradients(grads_and_vars=grads_clipped, global_step=global_step) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) run_opts = None run_opts_metadata = None if config.get('create_timeline', False): run_opts = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE, timeout_in_ms=100000) run_opts_metadata = tf.RunMetadata() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=2, save_relative_paths=True) if config['model_dir']: # If model directory already exists, continue training by restoring computation graph. # Restore variables. if config['checkpoint_id'] is None: checkpoint_path = tf.train.latest_checkpoint(config['model_dir']) else: checkpoint_path = os.path.join(config['model_dir'], config['checkpoint_id']) print("Continue training with model " + checkpoint_path) saver.restore(sess, checkpoint_path) step = tf.train.global_step(sess, global_step) start_epoch = round( step / (training_dataset.num_samples / config['batch_size'])) else: # Fresh start # Create a unique output directory for this experiment. config['model_dir'] = get_model_dir_timestamp( base_path=config['model_save_dir'], prefix="tf", suffix=config['experiment_name'], connector="-") print("Saving to {}\n".format(config['model_dir'])) start_epoch = 1 step = 1 coord = tf.train.Coordinator() data_feeder.init( sess, coord ) # Enqueue threads must be initialized after definition of train_op. if config.get('validate_model', False): valid_data_feeder.init(sess, coord) queue_threads = tf.train.start_queue_runners(coord=coord, sess=sess) queue_threads.append(data_feeder.enqueue_threads) # Register and create summary ops. summary_dir = os.path.join(config['model_dir'], "summary") summary_writer = tf.summary.FileWriter(summary_dir, sess.graph) # Create summaries to visualize weights and gradients. if config['tensorboard_verbose'] > 1: for grad, var in grads: tf.summary.histogram(var.name, var, collections=["training_status"]) tf.summary.histogram(var.name + '/gradient', grad, collections=["training_status"]) if config['tensorboard_verbose'] > 1: tf.summary.scalar( "training/queue", math_ops.cast(data_feeder.input_queue.size(), dtypes.float32) * (1. / data_feeder.queue_capacity), collections=["training_status"]) # Save configuration config['loss_weights']['kld_loss'] = kld_loss_weight_backup # Pickle and json dump. pickle.dump(config, open(os.path.join(config['model_dir'], 'config.pkl'), 'wb')) try: json.dump(config, open(os.path.join(config['model_dir'], 'config.json'), 'w'), indent=4, sort_keys=True) except: pass training_summary = tf.summary.merge_all('training_status') training_run_ops = [ model.loss_summary, training_summary, model.ops_loss, train_op ] training_run_ops_with_img_summary = [ model.loss_summary, training_summary, model.ops_loss, model.ops_img_summary, train_op ] if config.get('validate_model', False): validation_run_ops = [valid_model.ops_loss] if config['use_staging_area']: training_run_ops.append(staging_area.preload_op) training_run_ops_with_img_summary.append(staging_area.preload_op) # Fill staging area first. for i in range(256): _ = sess.run(staging_area.preload_op, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) if config.get('validate_model', False): validation_run_ops.append(valid_staging_area.preload_op) # Fill staging area first. for i in range(256): _ = sess.run(valid_staging_area.preload_op, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) for epoch in range(start_epoch, config['num_epochs'] + 1): for epoch_step in range(num_training_iterations): start_time = time.perf_counter() step = tf.train.global_step(sess, global_step) if (step % config['checkpoint_every_step']) == 0: ckpt_save_path = saver.save( sess, os.path.join(config['model_dir'], 'model'), global_step) print("Model saved in file: %s" % ckpt_save_path) if step % config['img_summary_every_step'] == 0: run_training_output = sess.run( training_run_ops_with_img_summary, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) img_summary = model.get_image_summary( sess, ops_img_summary_evaluated=run_training_output[3], seq_len=500) summary_writer.add_summary(img_summary, step) else: run_training_output = sess.run(training_run_ops, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) summary_writer.add_summary(run_training_output[0], step) # Loss summary summary_writer.add_summary(run_training_output[1], step) # Training status summary. if step % config['print_every_step'] == 0: time_elapsed = (time.perf_counter() - start_time) / config['print_every_step'] model.log_loss(run_training_output[2], step, epoch, time_elapsed) if step % config['img_summary_every_step'] == 0: sampling_img_summary = sampling_model.get_image_summary( sess, ops_img_summary_evaluated=None, seq_len=500) summary_writer.add_summary(sampling_img_summary, step) if config.get('validate_model', False) and step % config['validate_every_step'] == 0: start_time = time.perf_counter() for i in range(num_validation_iterations): run_validation_output = sess.run( validation_run_ops, feed_dict={}, options=run_opts, run_metadata=run_opts_metadata) valid_model.update_validation_loss( run_validation_output[0]) valid_summary, valid_eval_loss = valid_model.get_validation_summary( session=sess) summary_writer.add_summary(valid_summary, step) # Validation loss summary time_elapsed = (time.perf_counter() - start_time) / num_validation_iterations valid_model.log_loss(valid_eval_loss, step, epoch, time_elapsed, prefix="VALID: ") valid_model.reset_validation_loss() if config.get('create_timeline', False): create_tf_timeline(config['model_dir'], run_opts_metadata) print("End-of-Training.") ckpt_save_path = saver.save(sess, os.path.join(config['model_dir'], 'model'), global_step) print("Model saved in file: %s" % ckpt_save_path) print('Model is trained for %d epochs, %d steps.' % (config['num_epochs'], step)) try: sess.run(data_feeder.input_queue.close(cancel_pending_enqueues=True)) coord.request_stop() coord.join(queue_threads, stop_grace_period_secs=5) except: pass sess.close()
def train_neural_network(parameters): # import parameter values for normalizing known input parameters and preprocessing spectra minvals = np.fromstring(parameters['MIN_PARAMS'], sep=', ', dtype=np.float32) maxvals = np.fromstring(parameters['MAX_PARAMS'], sep=', ', dtype=np.float32) # read in the training data wavelengths, normed_outputs, pix_values = \ read_known_binary(parameters['TRAINING_DATA'], parameters, minvals, maxvals) if parameters['PREPROCESS_TRAIN'] == 'YES': sn_range = np.fromstring(parameters['SN_RANGE_TRAIN'], sep=', ', dtype=np.float32) y_off_range = np.fromstring(parameters['REL_CONT_E_TRAIN'], sep=', ', dtype=np.float32) sn_template_file = parameters['SN_TEMPLATE'] interp_sn = interpolate_sn(sn_template_file, wavelengths) else: sn_range, y_off_range, interp_sn = 0, 0, 0 fetch_size = int(parameters['NUM_FETCH']) bsize_train1 = int(parameters['BATCH_SIZE1']) # if separate xval set specified, read that in if parameters['TRAINING_XVAL'] == 'YES': xv_wave, xv_norm_out, xv_px_val = read_known_binary( parameters['XVAL_DATA'], parameters, minvals, maxvals) xv_size = int(parameters['XVAL_SIZE']) if parameters['PREPROCESS_XVAL'] == 'YES': # this could reload parameters, but allows for case of not preprocessing training but preprocess xval sn_template_file = parameters['SN_TEMPLATE'] interp_sn = interpolate_sn(sn_template_file, wavelengths) sn_range = np.fromstring(parameters['SN_RANGE_TRAIN'], sep=', ', dtype=np.float32) y_off_range = np.fromstring(parameters['REL_CONT_E_TRAIN'], sep=', ', dtype=np.float32) # subloop definition to build a separate queue for xval data if it exists and calculate the cost def xval_subloop(learn_rate, bsize, step, inherit_iter_count): queuetype = 'xval_q' xvcoordinator = tf.train.Coordinator() randomize = False # if xval preprocessing desired, flip the correct flat in add_to_queue if parameters['PREPROCESS_XVAL'] == 'YES': xval_training = True else: xval_training = False num_xvthread = int(parameters['XV_THREADS']) # force preprocessing to run on the cpu - this is actually not optimal since the threads will be respawned and # every time xval subloop run but for smaller xval sizes this shouldn't matter much with tf.device("/cpu:0"): xval_threads = [ threading.Thread(target=add_to_queue, args=(session, xval_op, xvcoordinator, xv_norm_out, xv_px_val, sn_range, interp_sn, y_off_range, xv_size, randomize, xval_training, queuetype)) for i in range(num_xvthread) ] for i in xval_threads: i.start() feed_dict_xval = { learning_rate: learn_rate, dropout: 1.0, batch_size: bsize, select_queue: 1 } xval_cost, xval_sum = session.run([cost, xval_cost_sum], feed_dict=feed_dict_xval) if parameters['TBOARD_OUTPUT'] == 'YES': writer.add_summary(xval_sum, step + inherit_iter_count) # close the queue and join threads xvcoordinator.request_stop() xvcoordinator.join(xval_threads) # return cost return round(xval_cost, 2) # definition of the training loop in order to allow for multistage training def train_loop(iterations, learn_rate, keep_prob, bsize, inherit_iter_count): begin_time = time.time() coordinator = tf.train.Coordinator() # will always randomize batch selection for training randomize = True # if training preprocessing desired, flips appropriate flag if parameters['PREPROCESS_TRAIN'] == 'YES': pp_training = True else: pp_training = False # force preprocessing to run on the cpu with tf.device("/cpu:0"): num_threads = int(parameters['PP_THREADS']) queuetype = 'train_q' enqueue_threads = [ threading.Thread(target=add_to_queue, args=(session, queue_op, coordinator, normed_outputs, pix_values, sn_range, interp_sn, y_off_range, fetch_size, randomize, pp_training, queuetype)) for i in range(num_threads) ] for i in enqueue_threads: i.start() # delay running training by 1 second in order to prefill the queue #time.sleep(1) feed_dict_train = { learning_rate: learn_rate, dropout: keep_prob, batch_size: bsize, select_queue: 0 } # controls early stopping threshold early_stop_counter = 0 # stores completed iterations to pass to second stage of training for tensorboard visualization completed_iterations = 0 # stores best cost in order to control early stopping best_cost = 0.0 # fetches early stop threshold if early stopping is enabled, otherwise just sets early stop iters to total iters if parameters['EARLY_STOP'] == 'YES': early_stop_threshold = int(parameters['ES_SAMPLE']) else: early_stop_threshold = iterations # main training loop for i in range(iterations): # only continues if early stop threshold has not been met if early_stop_counter <= early_stop_threshold: completed_iterations += 1 # first iteration will store the cost under best_cost for xval if xval specified, current batch if not if i == 0: if parameters['TRAINING_XVAL'] == 'YES': init_cost = xval_subloop(learn_rate, xv_size, i, inherit_iter_count) best_cost = init_cost print('Initial xval cost: ' + str(round(init_cost, 2))) session.run(optimize, feed_dict=feed_dict_train) else: init_cost = session.run(cost, feed_dict=feed_dict_train) best_cost = init_cost print('Initial batch cost: ' + str(round(init_cost, 2))) session.run(optimize, feed_dict=feed_dict_train) # if not first iteration but iteration corresponding to sample interval, runs diagnostics elif (i + 1) % int(parameters['SAMPLE_INTERVAL']) == 0: test_cost = session.run(cost, feed_dict=feed_dict_train) session.run(optimize, feed_dict=feed_dict_train) if parameters['TRAINING_XVAL'] == 'YES': xvcost = xval_subloop(learn_rate, xv_size, i, inherit_iter_count) print('done with batch ' + str(int(i + 1)) + '/' + str(iterations) + ', current cost: ' + str(round(test_cost, 2)) + ', xval cost: ' + str(xvcost)) else: # if xval set not specified, will just calculate cost for current batch and print print('done with batch ' + str(int(i + 1)) + '/' + str(iterations) + ', current cost: ' + str(round(test_cost, 2))) if parameters['EARLY_STOP'] == 'YES': # if early stopping desired, will compare xval or current batch cost to the best cost if parameters['TRAINING_XVAL'] == 'YES': if float(xvcost) >= best_cost: early_stop_counter += 1 else: # reset early stopping counter if current cost is better than previous best cost best_cost = xvcost early_stop_counter = 0 else: if float(test_cost) >= best_cost: early_stop_counter += 1 else: best_cost = test_cost early_stop_counter = 0 if parameters['TBOARD_OUTPUT'] == 'YES': # if tensorboard logging enabled, stores visualization data outlog = session.run(merged_summaries, feed_dict=feed_dict_train) writer.add_summary(outlog, i + 1 + inherit_iter_count) else: # just runs optimize if none of the above criteria are met session.run(optimize, feed_dict=feed_dict_train) if early_stop_counter == early_stop_threshold or ( i == (iterations - 1) and early_stop_counter < (early_stop_threshold + 1)): # if end of training reached, print a message and optionally save timeline if parameters['TIMELINE_OUTPUT'] == 'YES': # if timeline desired, prints to json file for most recent iteration fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) with open(parameters['LOG_LOC'] + 'timeline_01.json', 'w') as f: f.write(chrome_trace) print('Timeline saved as timeline_01.json in folder ' + parameters['LOG_LOC']) print('Early stop threshold or specified iterations met') early_stop_counter += 1 # close the preprocessing queues and join threads coordinator.request_stop() session.run(input_queue.close(cancel_pending_enqueues=True)) if parameters['TRAINING_XVAL'] == 'YES': session.run(xval_queue.close(cancel_pending_enqueues=True)) coordinator.join(enqueue_threads) end_time = time.time() # return the run time and total completed iterations return str(round(end_time - begin_time, 2)), completed_iterations # control flow for training - load in save location, etc. launch tensorflow session, prepare saver model_dir = parameters['SAVE_LOC'] session = tf.Session(config=tf.ConfigProto()) options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() session.run(tf.global_variables_initializer()) saver = tf.train.Saver() # if tboard output desired start a writer if parameters['TBOARD_OUTPUT'] == 'YES': writer = tf.summary.FileWriter(parameters['LOG_LOC'] + 'logs', session.graph) merged_summaries = tf.summary.merge([ cv1w_sum, cv1b_sum, cv1a_sum, fc1w_sum, fc1b_sum, fc1a_sum, fc2w_sum, fc2b_sum, fc2a_sum, fc3w_sum, fc3a_sum, batch_cost_sum ]) # train the network on input training data execute_time, finished_iters = train_loop(int( parameters['NUM_TRAIN_ITERS1']), float(parameters['LEARN_RATE1']), float(parameters['KEEP_PROB1']), bsize_train1, inherit_iter_count=0) # save model and the graph and close session save_path = saver.save(session, model_dir + 'save.ckpt') session.close() print('Training stage 1 finished in ' + execute_time + 's, model and graph saved in ' + save_path) if parameters['DO_TRAIN2'] == 'YES': # if multistage training specified, repeat above process except for the metagraph saving bsize_train2 = int(parameters['BATCH_SIZE2']) print('Training stage 2 beginning, loading model...') session = tf.Session() options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) saver = tf.train.Saver() saver.restore(session, model_dir + 'save.ckpt') if parameters['TBOARD_OUTPUT'] == 'YES': writer = tf.summary.FileWriter(parameters['LOG_LOC'] + 'logs', session.graph) merged_summaries = tf.summary.merge([ cv1w_sum, cv1b_sum, cv1a_sum, fc1w_sum, fc1b_sum, fc1a_sum, fc2w_sum, fc2b_sum, fc2a_sum, fc3w_sum, fc3a_sum, batch_cost_sum ]) print('Model loaded, beginning training...') execute_time, _ = train_loop(int(parameters['NUM_TRAIN_ITERS2']), float(parameters['LEARN_RATE2']), float(parameters['KEEP_PROB2']), bsize_train2, inherit_iter_count=finished_iters) save_path = saver.save(session, model_dir) session.close() print('Training stage 2 finished in ' + execute_time + 's, model saved in ' + save_path) # freeze the model and save it to disk after training freeze_model(parameters)
def fit(self, training_set, validation_set): """ Fit the model Args: training_set: validation_set: Returns: """ outputs = self.model outputs[-1] = tf.Print(outputs[-1], [self.label], summarize=self.batch_size * self.n_output, message="Label: ") outputs[-1] = tf.Print(outputs[-1], [outputs[-1]], summarize=self.batch_size * self.n_output, message="Prediction: ") metrics = SequenceModel.compute_metrics(outputs[-1], self.label, self.time_steps if self.is_sequence_output else 1) loss = SequenceModel.compute_loss(outputs[-1], self.label, self.loss_name, self.time_steps if self.is_sequence_output else 1) train_op = SequenceModel.compute_gradient(self, loss, self.global_step) # Merge summaries summaries = tf.summary.merge_all() # Initialize variables init_g = tf.global_variables_initializer() init_l = tf.local_variables_initializer() with tf.Session() as sess: run_opts = tf.RunOptions(report_tensor_allocations_upon_oom=True) sess.run(init_g) sess.run(init_l) self.train_writer.add_graph(sess.graph) # Load existing model SequenceModel.load(self, sess) if self.from_pretrained else None for epoch in range(self.epochs): for i in range(self.batch_size, len(training_set), self.batch_size): self.global_step = tf.add(self.global_step, tf.constant(1)) time0 = time() batch_input, batch_label = self.load_batch(training_set[i - self.batch_size: i]) _, loss_value, summaries_value, step = sess.run([ train_op, loss, summaries, self.global_step], feed_dict={ self.input: batch_input, self.label: batch_label, self.initial_output: np.zeros(shape=(self.batch_size, self.units_per_cell), dtype=np.float32), self.initial_cell: np.zeros(shape=(self.batch_size, self.units_per_cell), dtype=np.float32) }, options=run_opts, ) self.train_writer.add_summary(summaries_value, step) time1 = time() self.logger.info( "Cost = {0} for batch {1} in {2:.2f} seconds".format( loss_value, i / self.batch_size, time1 - time0)) if self.logger else None if i % self.validation_step == 0: self.validation_eval(sess, summaries, validation_set, metrics, step) if i % self.checkpoint_step == 0: SequenceModel.save(self, sess, step=self.global_step)
def learn(self, total_timesteps, callback=None, log_interval=100, tb_log_name="PPO1", reset_num_timesteps=True): new_tb_log = self._init_num_timesteps(reset_num_timesteps) callback = self._init_callback(callback) with SetVerbosity(self.verbose), TensorboardWriter(self.graph, self.tensorboard_log, tb_log_name, new_tb_log) \ as writer: self._setup_learn() assert issubclass(self.policy, ActorCriticPolicy), "Error: the input policy for the PPO1 model must be " \ "an instance of common.policies.ActorCriticPolicy." with self.sess.as_default(): self.adam.sync() callback.on_training_start(locals(), globals()) # Prepare for rollouts seg_gen = traj_segment_generator(self.policy_pi, self.env, self.timesteps_per_actorbatch, callback=callback) episodes_so_far = 0 timesteps_so_far = 0 iters_so_far = 0 t_start = time.time() # rolling buffer for episode lengths len_buffer = deque(maxlen=100) # rolling buffer for episode rewards reward_buffer = deque(maxlen=100) while True: if timesteps_so_far >= total_timesteps: break if self.schedule == 'constant': cur_lrmult = 1.0 elif self.schedule == 'linear': cur_lrmult = max(1.0 - float(timesteps_so_far) / total_timesteps, 0) else: raise NotImplementedError logger.log("********** Iteration %i ************" % iters_so_far) seg = seg_gen.__next__() # Stop training early (triggered by the callback) if not seg.get('continue_training', True): # pytype: disable=attribute-error break add_vtarg_and_adv(seg, self.gamma, self.lam) # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets)) observations, actions = seg["observations"], seg["actions"] atarg, tdlamret = seg["adv"], seg["tdlamret"] # true_rew is the reward without discount if writer is not None: total_episode_reward_logger(self.episode_reward, seg["true_rewards"].reshape((self.n_envs, -1)), seg["dones"].reshape((self.n_envs, -1)), writer, self.num_timesteps) # predicted value function before udpate vpredbefore = seg["vpred"] # standardized advantage function estimate atarg = (atarg - atarg.mean()) / atarg.std() dataset = Dataset(dict(ob=observations, ac=actions, atarg=atarg, vtarg=tdlamret), shuffle=not self.policy.recurrent) optim_batchsize = self.optim_batchsize or observations.shape[0] # set old parameter values to new parameter values self.assign_old_eq_new(sess=self.sess) logger.log("Optimizing...") logger.log(fmt_row(13, self.loss_names)) # Here we do a bunch of optimization epochs over the data for k in range(self.optim_epochs): # list of tuples, each of which gives the loss for a minibatch losses = [] for i, batch in enumerate(dataset.iterate_once(optim_batchsize)): steps = (self.num_timesteps + k * optim_batchsize + int(i * (optim_batchsize / len(dataset.data_map)))) if writer is not None: # run loss backprop with summary, but once every 10 runs save the metadata # (memory, compute time, ...) if self.full_tensorboard_log and (1 + k) % 10 == 0: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, grad, *newlosses = self.lossandgrad(batch["ob"], batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult, sess=self.sess, options=run_options, run_metadata=run_metadata) writer.add_run_metadata(run_metadata, 'step%d' % steps) else: summary, grad, *newlosses = self.lossandgrad(batch["ob"], batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult, sess=self.sess) writer.add_summary(summary, steps) else: _, grad, *newlosses = self.lossandgrad(batch["ob"], batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult, sess=self.sess) self.adam.update(grad, self.optim_stepsize * cur_lrmult) losses.append(newlosses) logger.log(fmt_row(13, np.mean(losses, axis=0))) logger.log("Evaluating losses...") losses = [] for batch in dataset.iterate_once(optim_batchsize): newlosses = self.compute_losses(batch["ob"], batch["ob"], batch["ac"], batch["atarg"], batch["vtarg"], cur_lrmult, sess=self.sess) losses.append(newlosses) mean_losses, _, _ = mpi_moments(losses, axis=0) logger.log(fmt_row(13, mean_losses)) for (loss_val, name) in zipsame(mean_losses, self.loss_names): logger.record_tabular("loss_" + name, loss_val) logger.record_tabular("ev_tdlam_before", explained_variance(vpredbefore, tdlamret)) # local values lrlocal = (seg["ep_lens"], seg["ep_rets"]) # list of tuples listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal) lens, rews = map(flatten_lists, zip(*listoflrpairs)) len_buffer.extend(lens) reward_buffer.extend(rews) if len(len_buffer) > 0: logger.record_tabular("EpLenMean", np.mean(len_buffer))#每次运行的步数(平均) logger.record_tabular("EpRewMean", np.mean(reward_buffer))#每次运行的最后回报(平均) logger.record_tabular("EpThisIter", len(lens))#每回合的运行次数 episodes_so_far += len(lens) current_it_timesteps = MPI.COMM_WORLD.allreduce(seg["total_timestep"]) timesteps_so_far += current_it_timesteps self.num_timesteps += current_it_timesteps iters_so_far += 1 logger.record_tabular("EpisodesSoFar", episodes_so_far) logger.record_tabular("TimestepsSoFar", self.num_timesteps) logger.record_tabular("TimeElapsed", time.time() - t_start) if self.verbose >= 1 and MPI.COMM_WORLD.Get_rank() == 0: logger.dump_tabular() callback.on_training_end() return self
def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors #options for profiling options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') # Run inference output_dict = sess.run( tensor_dict, options=options, run_metadata=run_metadata, feed_dict={image_tensor: np.expand_dims(image, 0)}) #Storing data from profile in a chrome trace fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open('timeline_01.json', 'w') as f: f.write(chrome_trace) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][ 0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] return output_dict
def train(): # Read Data mylog_section("READ DATA") from_train = None to_train = None from_dev = None to_dev = None from_train, to_train, from_dev, to_dev, _, _ = data_utils.prepare_data( FLAGS.data_cache_dir, FLAGS.train_path_from, FLAGS.train_path_to, FLAGS.dev_path_from, FLAGS.dev_path_to, FLAGS.from_vocab_size, FLAGS.to_vocab_size) train_data_bucket = read_data(from_train, to_train) dev_data_bucket = read_data(from_dev, to_dev) _, _, real_vocab_size_from, real_vocab_size_to = data_utils.get_vocab_info( FLAGS.data_cache_dir) FLAGS._buckets = _buckets FLAGS.real_vocab_size_from = real_vocab_size_from FLAGS.real_vocab_size_to = real_vocab_size_to #train_n_tokens = total training target size train_n_tokens = np.sum( [np.sum([len(items[1]) for items in x]) for x in train_data_bucket]) train_bucket_sizes = [ len(train_data_bucket[b]) for b in xrange(len(_buckets)) ] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] dev_bucket_sizes = [len(dev_data_bucket[b]) for b in xrange(len(_buckets))] dev_total_size = int(sum(dev_bucket_sizes)) mylog_section("REPORT") # steps batch_size = FLAGS.batch_size n_epoch = FLAGS.n_epoch steps_per_epoch = int(train_total_size / batch_size) steps_per_dev = int(dev_total_size / batch_size) steps_per_checkpoint = int(steps_per_epoch / 2) total_steps = steps_per_epoch * n_epoch # reports mylog("from_vocab_size: {}".format(FLAGS.from_vocab_size)) mylog("to_vocab_size: {}".format(FLAGS.to_vocab_size)) mylog("_buckets: {}".format(FLAGS._buckets)) mylog("Train:") mylog("total: {}".format(train_total_size)) mylog("bucket sizes: {}".format(train_bucket_sizes)) mylog("Dev:") mylog("total: {}".format(dev_total_size)) mylog("bucket sizes: {}".format(dev_bucket_sizes)) mylog("Steps_per_epoch: {}".format(steps_per_epoch)) mylog("Total_steps:{}".format(total_steps)) mylog("Steps_per_checkpoint: {}".format(steps_per_checkpoint)) mylog_section("IN TENSORFLOW") config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = FLAGS.allow_growth with tf.Session(config=config) as sess: # runtime profile if FLAGS.profile: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = None run_metadata = None mylog_section("MODEL/SUMMARY/WRITER") mylog("Creating Model.. (this can take a few minutes)") model = create_model(sess, run_options, run_metadata) if FLAGS.with_summary: mylog("Creating ModelSummary") modelSummary = ModelSummary() mylog("Creating tf.summary.FileWriter") summaryWriter = tf.summary.FileWriter( os.path.join(FLAGS.summary_dir, "train.summary"), sess.graph) mylog_section("All Variables") show_all_variables() # Data Iterators mylog_section("Data Iterators") dite = DataIterator(model, train_data_bucket, len(train_buckets_scale), batch_size, train_buckets_scale) iteType = 0 if iteType == 0: mylog("Itetype: withRandom") ite = dite.next_random() elif iteType == 1: mylog("Itetype: withSequence") ite = dite.next_sequence() # statistics during training step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] low_ppx = float("inf") low_ppx_step = 0 steps_per_report = 30 n_targets_report = 0 report_time = 0 n_valid_sents = 0 n_valid_words = 0 patience = FLAGS.patience mylog_section("TRAIN") while current_step < total_steps: # start start_time = time.time() # data and train source_inputs, target_inputs, target_outputs, target_weights, bucket_id = ite.next( ) L = model.step(sess, source_inputs, target_inputs, target_outputs, target_weights, bucket_id) # loss and time step_time += (time.time() - start_time) / steps_per_checkpoint loss += L current_step += 1 n_valid_sents += np.sum(np.sign(target_weights[0])) n_valid_words += np.sum(target_weights) # for report report_time += (time.time() - start_time) n_targets_report += np.sum(target_weights) if current_step % steps_per_report == 0: sect_name = "STEP {}".format(current_step) msg = "StepTime: {:.2f} sec Speed: {:.2f} targets/s Total_targets: {}".format( report_time / steps_per_report, n_targets_report * 1.0 / report_time, train_n_tokens) mylog_line(sect_name, msg) report_time = 0 n_targets_report = 0 # Create the Timeline object, and write it to a json if FLAGS.profile: tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('timeline.json', 'w') as f: f.write(ctf) exit() if current_step % steps_per_checkpoint == 0: i_checkpoint = int(current_step / steps_per_checkpoint) # train_ppx loss = loss / n_valid_words train_ppx = math.exp( float(loss)) if loss < 300 else float("inf") learning_rate = model.learning_rate.eval() # dev_ppx dev_loss, dev_ppx = evaluate(sess, model, dev_data_bucket) # report sect_name = "CHECKPOINT {} STEP {}".format( i_checkpoint, current_step) msg = "Learning_rate: {:.4f} Dev_ppx: {:.2f} Train_ppx: {:.2f}".format( learning_rate, dev_ppx, train_ppx) mylog_line(sect_name, msg) if FLAGS.with_summary: # save summary _summaries = modelSummary.step_record( sess, train_ppx, dev_ppx) for _summary in _summaries: summaryWriter.add_summary(_summary, i_checkpoint) # save model per checkpoint if FLAGS.saveCheckpoint: checkpoint_path = os.path.join(FLAGS.saved_model_dir, "model") s = time.time() model.saver.save(sess, checkpoint_path, global_step=i_checkpoint, write_meta_graph=False) msg = "Model saved using {:.2f} sec at {}".format( time.time() - s, checkpoint_path) mylog_line(sect_name, msg) # save best model if dev_ppx < low_ppx: patience = FLAGS.patience low_ppx = dev_ppx low_ppx_step = current_step checkpoint_path = os.path.join(FLAGS.saved_model_dir, "best") s = time.time() model.best_saver.save(sess, checkpoint_path, global_step=0, write_meta_graph=False) msg = "Model saved using {:.2f} sec at {}".format( time.time() - s, checkpoint_path) mylog_line(sect_name, msg) else: patience -= 1 if patience <= 0: mylog("Training finished. Running out of patience.") break # Save checkpoint and zero timer and loss. step_time, loss, n_valid_sents, n_valid_words = 0.0, 0.0, 0, 0
merged = tf.summary.merge_all() projector_writer = tf.summary.FileWriter(DTR + 'projector', sess.graph) save = tf.train.sever() config = projector.ProjectorConfig() embed = config.embedding.add() embed.tensor_name = embedding.name embed.metadata_path = DTR + 'projector/metadata.tsv' embed.sprite.image_path = DTR + 'projector/data/mnist_10k_sprite.png' embed.sprite.single_image_dim.extend([28, 28]) projector.visualize_embeddings(projector_writer, config) for i in range(max_step): #每个批次100个样本 batch_xs, batch_ys = mnist.train.next_batch(100) run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, _ = sess.run([merged, train_step], feed_dict={ x: batch_xs, y: batch_ys }, options=run_options, run_metadata=run_metadata) projector_writer.add_run_metadata(run_metadata, 'step%03d' % i) projector_writer.add_summary(summary, i) if i % 100 == 0: acc = sess.run(accuracy, feed_dict={ x: mnist.test.images,
def beam_decode(): mylog("Reading Data...") from_test = None from_vocab_path, to_vocab_path, real_vocab_size_from, real_vocab_size_to = data_utils.get_vocab_info( FLAGS.data_cache_dir) FLAGS._buckets = _buckets FLAGS._beam_buckets = _beam_buckets FLAGS.real_vocab_size_from = real_vocab_size_from FLAGS.real_vocab_size_to = real_vocab_size_to from_test = data_utils.prepare_test_data(FLAGS.data_cache_dir, FLAGS.test_path_from, from_vocab_path) test_data_bucket, test_data_order = read_data_test(from_test) test_bucket_sizes = [ len(test_data_bucket[b]) for b in xrange(len(_beam_buckets)) ] test_total_size = int(sum(test_bucket_sizes)) # reports mylog("from_vocab_size: {}".format(FLAGS.from_vocab_size)) mylog("to_vocab_size: {}".format(FLAGS.to_vocab_size)) mylog("_beam_buckets: {}".format(FLAGS._beam_buckets)) mylog("BEAM_DECODE:") mylog("total: {}".format(test_total_size)) mylog("buckets: {}".format(test_bucket_sizes)) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = FLAGS.allow_growth with tf.Session(config=config) as sess: # runtime profile if FLAGS.profile: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = None run_metadata = None mylog("Creating Model") model = create_model(sess, run_options, run_metadata) show_all_variables() sess.run(model.dropoutRate.assign(1.0)) start_id = 0 n_steps = 0 batch_size = FLAGS.batch_size dite = DataIterator(model, test_data_bucket, len(_beam_buckets), batch_size, None, data_order=test_data_order) ite = dite.next_original() i_sent = 0 targets = [] for source_inputs, bucket_id, length in ite: print("--- decoding {}/{} sent ---".format(i_sent, test_total_size)) i_sent += 1 results = [] # (sentence,score) scores = [0.0] * FLAGS.beam_size sentences = [[] for x in xrange(FLAGS.beam_size)] beam_parent = range(FLAGS.beam_size) target_inputs = [data_utils.GO_ID] * FLAGS.beam_size min_target_length = int(length * FLAGS.min_ratio) + 1 max_target_length = int( length * FLAGS.max_ratio) + 1 # include EOS for i in xrange(max_target_length): if i == 0: top_value, top_index, eos_value = model.beam_step( sess, bucket_id, index=i, sources=source_inputs, target_inputs=target_inputs) else: top_value, top_index, eos_value = model.beam_step( sess, bucket_id, index=i, target_inputs=target_inputs, beam_parent=beam_parent) # top_value = [array[batch_size, batch_size]] # top_index = [array[batch_size, batch_size]] # eos_value = [array[batch_size, 1] ] # expand global_queue = [] if i == 0: nrow = 1 else: nrow = FLAGS.beam_size if i == max_target_length - 1: # last_step for row in xrange(nrow): score = scores[row] + np.log(eos_value[0][row, 0]) word_index = data_utils.EOS_ID beam_index = row global_queue.append((score, beam_index, word_index)) else: for row in xrange(nrow): for col in xrange(top_index[0].shape[1]): score = scores[row] + np.log(top_value[0][row, col]) word_index = top_index[0][row, col] beam_index = row global_queue.append( (score, beam_index, word_index)) global_queue = sorted(global_queue, key=lambda x: -x[0]) if FLAGS.print_beam: print("--------- Step {} --------".format(i)) target_inputs = [] beam_parent = [] scores = [] temp_sentences = [] for j, (score, beam_index, word_index) in enumerate(global_queue): if word_index == data_utils.EOS_ID: if len(sentences[beam_index]) + 1 < min_target_length: continue results.append( (sentences[beam_index] + [word_index], score)) if FLAGS.print_beam: print("*Beam:{} Father:{} word:{} score:{}".format( j, beam_index, word_index, score)) continue if FLAGS.print_beam: print("Beam:{} Father:{} word:{} score:{}".format( j, beam_index, word_index, score)) beam_parent.append(beam_index) target_inputs.append(word_index) scores.append(score) temp_sentences.append(sentences[beam_index] + [word_index]) if len(scores) >= FLAGS.beam_size: break # can not fill beam_size, just repeat the last one while len(scores ) < FLAGS.beam_size and i < max_target_length - 1: beam_parent.append(beam_parent[-1]) target_inputs.append(target_inputs[-1]) scores.append(scores[-1]) temp_sentences.append(temp_sentences[-1]) sentences = temp_sentences # print the 1 best results = sorted(results, key=lambda x: -x[1]) targets.append(results[0][0]) data_utils.ids_to_tokens(targets, to_vocab_path, FLAGS.decode_output)
def timeGraph(gdef, batch_size=128, image_folder='images', nvidiasmi='output.out', latencyF='latency.txt', StopTime=100): tf.logging.info("Starting execution") gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) tf.reset_default_graph() g = tf.Graph() ## if dummy_input is None: ## dummy_input = np.random.random_sample((batch_size,224,224,3)) imageCounter = 0 outlist = [] with g.as_default(): imagenstack = tf.constant([""]) imageString = [] for imageName in sorted(glob.glob(image_folder + '/*.JPEG')): imageString.append(imageName) imageCounter = imageCounter + 1 imagenstack = tf.stack(imageString) dataset = tf.data.Dataset.from_tensor_slices(imagenstack) dataset = dataset.map(_parse_function) dataset = dataset.batch(batch_size) dataset = dataset.repeat() iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() out = tf.import_graph_def( graph_def=gdef, input_map={"input": next_element}, return_elements=["InceptionV3/Predictions/Softmax"]) out = out[0].outputs[0] print("\n\n image out", out, "\n\n") outlist.append(out) print("\n\n image out", outlist[-1], "\n\n") timings = [] with tf.Session(graph=g, config=tf.ConfigProto(gpu_options=gpu_options)) as sess: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() num_iters = int(math.ceil(imageCounter / batch_size)) print("\n\n\nNumber of Iterations = ", num_iters) nvidiasmiCommand = "nohup nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits -l 1 -f " + nvidiasmi + " &" nmonCommand = "nmon -s1 -c 2000 -F " + nvidiasmi + ".nmon &" pmonCommand = "nohup nvidia-smi pmon -f " + nvidiasmi + ".pmon &" os.system(nvidiasmiCommand) #os.system(nmonCommand) #os.system(pmonCommand) tstart = time.time() if os.path.exists(latencyF): append_write = 'a' # append if already exists else: append_write = 'w' # make a new file if not runtimeResults = open(latencyF, append_write) start_process = time.time() for k in range(num_iters): tic = time.time() val = sess.run(outlist) tac = time.time() runtimeResults.write(str(tac - tic)) runtimeResults.write("\n") if ((tac - start_process) > StopTime): break #printing lables printLables = 0 if printLables == 1: if os.path.exists('resultLables_InceptionV3.txt'): append_write = 'a' # append if already exists else: append_write = 'w' # make a new file if not # highscore = open('resultLables_InceptionV3.txt', append_write) for index1 in range(0, len(topX(val[0], f.topN)[1])): highscore.write( str(getLabels(labels, topX(val[0], f.topN)[1][index1]))) highscore.write("\n") highscore.close() #end for prinlables timings.append(time.time() - tstart) runtimeResults.close() # if os.path.exists('runtimes_InceptionV3.txt'): # append_write = 'a' # append if already exists # else: # append_write = 'w' # make a new file if not # # runtimeResults = open('runtimes_InceptionV3.txt',append_write) # runtimeResults.write(str(batch_size) + ',' + str(timings[-1])) # runtimeResults.write("\n") # runtimeResults.close() os.system("pkill nvidia-smi") #os.system("pkill nmon") sess.close() tf.logging.info("Timing loop done!") return timings, True, val[0], None
def testing(self, sess, test_writer): with tf.name_scope('view_cubic_rpn'): roi_bv = self.net.get_output('rpn_rois')[0] data_bv = self.net.lidar_bv_data image_rpn = tf.reshape(test_show_rpn_tf(data_bv, roi_bv), (1, 601, 601, -1)) tf.summary.image('lidar_bv_test', image_rpn) merged = tf.summary.merge_all() with tf.name_scope('load_weights'): weights = self.args.weights if weights.endswith('.ckpt'): print 'Loading test model weights from {:s}'.format( self.args.weights) self.saver.restore(sess, weights) else: print "error: Function [combinet_test.testing] can not load weights {:s}!".format( self.args.weights) return 0 cubic_cls_score = tf.reshape(self.net.get_output('cubic_cnn'), [-1, 2]) rpn_3d = tf.reshape(self.net.get_output('rpn_rois')[1], [-1, 8]) vispy_init( ) # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow timer = Timer() for idx in range(self.epoch): blobs = self.dataset.get_minibatch(idx) feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.calib: blobs['calib'] } run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() cubic_cls_score_,rpn_3d_,summary = \ sess.run([cubic_cls_score,rpn_3d,merged], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() cubic_result = cubic_cls_score_.argmax(axis=1) if idx % 3 == 0 and cfg.TEST.DEBUG_TIMELINE: # chrome://tracing trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'testing-step-' + str(idx).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if idx % cfg.TEST.ITER_DISPLAY == 0: pass print 'Test: %06d/%06d speed: %.4f s / iter' % ( idx + 1, self.epoch, timer.average_time) if VISION_DEBUG: scan = blobs['lidar3d_data'] img = blobs['image_data'] pred_boxes = np.hstack( (rpn_3d_, cubic_result.reshape(-1, 1) * 2)) pcd_vispy(scan, img, pred_boxes, no_gt=True, index=idx, save_img=cfg.TEST.SAVE_IMAGE, visible=True, name='CubicNet testing') if idx % 1 == 0 and cfg.TEST.TENSORBOARD: test_writer.add_summary(summary, idx) pass print 'Testing process has done, happy every day !'
def main(): def _str_to_bool(s): """Convert string to bool (in argparse context).""" if s.lower() not in ['true', 'false']: raise ValueError( 'Argument needs to be a boolean, got {}'.format(s)) return {'true': True, 'false': False}[s.lower()] parser = argparse.ArgumentParser(description='WaveNet example network') DATA_DIRECTORY = r'D:\GIT\Tacotron2-Wavenet-Korean-TTS\data\moon,D:\GIT\Tacotron2-Wavenet-Korean-TTS\data\son' #DATA_DIRECTORY = 'D:\\hccho\\Tacotron-Wavenet-Vocoder-hccho\\data\\moon' parser.add_argument('--data_dir', type=str, default=DATA_DIRECTORY, help='The directory containing the VCTK corpus.') LOGDIR = None # LOGDIR = './/logdir-wavenet//train//2019-03-27T20-27-18' parser.add_argument( '--logdir', type=str, default=LOGDIR, help= 'Directory in which to store the logging information for TensorBoard. If the model already exists, it will restore the state and will continue training. Cannot use with --logdir_root and --restore_from.' ) parser.add_argument( '--logdir_root', type=str, default=None, help= 'Root directory to place the logging output and generated model. These are stored under the dated subdirectory of --logdir_root. Cannot use with --logdir.' ) parser.add_argument( '--restore_from', type=str, default=None, help= 'Directory in which to restore the model from. This creates the new model under the dated directory in --logdir_root. Cannot use with --logdir.' ) CHECKPOINT_EVERY = 1000 # checkpoint 저장 주기 parser.add_argument( '--checkpoint_every', type=int, default=CHECKPOINT_EVERY, help='How many steps to save each checkpoint after. Default: ' + str(CHECKPOINT_EVERY) + '.') parser.add_argument('--eval_every', type=int, default=2, help='Steps between eval on test data') config = parser.parse_args() # command 창에서 입력받을 수 있는 조건 config.data_dir = config.data_dir.split(",") try: directories = validate_directories(config, hparams) except ValueError as e: print("Some arguments are wrong:") print(str(e)) return logdir = directories['logdir'] restore_from = directories['restore_from'] # Even if we restored the model, we will treat it as new training # if the trained model is written into an arbitrary location. is_overwritten_training = logdir != restore_from log_path = os.path.join(logdir, 'train.log') infolog.init(log_path, logdir) global_step = tf.Variable(0, name='global_step', trainable=False) if hparams.l2_regularization_strength == 0: hparams.l2_regularization_strength = None # Create coordinator. coord = tf.train.Coordinator() num_speakers = len(config.data_dir) # Load raw waveform from VCTK corpus. with tf.name_scope('create_inputs'): # Allow silence trimming to be skipped by specifying a threshold near # zero. silence_threshold = hparams.silence_threshold if hparams.silence_threshold > EPSILON else None gc_enable = True # Before: num_speakers > 1 After: 항상 True # AudioReader에서 wav 파일을 잘라 input값을 만든다. receptive_field길이만큼을 앞부분에 pad하거나 앞조각에서 가져온다. (receptive_field+ sample_size)크기로 자른다. reader = DataFeederWavenet(coord, config.data_dir, batch_size=hparams.wavenet_batch_size, gc_enable=gc_enable, test_mode=False) # test를 위한 DataFeederWavenet를 하나 만들자. 여기서는 딱 1개의 파일만 가져온다. reader_test = DataFeederWavenet(coord, config.data_dir, batch_size=1, gc_enable=gc_enable, test_mode=True, queue_size=1) audio_batch, lc_batch, gc_id_batch = reader.inputs_wav, reader.local_condition, reader.speaker_id # Create train network. net = create_network(hparams, hparams.wavenet_batch_size, num_speakers, is_training=True) net.add_loss(input_batch=audio_batch, local_condition=lc_batch, global_condition_batch=gc_id_batch, l2_regularization_strength=hparams.l2_regularization_strength, upsample_type=hparams.upsample_type) net.add_optimizer(hparams, global_step) run_metadata = tf.RunMetadata() # Set up session sess = tf.Session(config=tf.ConfigProto(log_device_placement=False) ) # log_device_placement=False --> cpu/gpu 자동 배치. init = tf.global_variables_initializer() sess.run(init) # Saver for storing checkpoints of the model. saver = tf.train.Saver( var_list=tf.global_variables(), max_to_keep=hparams.max_checkpoints) # 최대 checkpoint 저장 갯수 지정 try: start_step = load(saver, sess, restore_from) # checkpoint load if is_overwritten_training or start_step is None: # The first training step will be saved_global_step + 1, # therefore we put -1 here for new or overwritten trainings. zero_step_assign = tf.assign(global_step, 0) sess.run(zero_step_assign) start_step = 0 except: print( "Something went wrong while restoring checkpoint. We will terminate training to avoid accidentally overwriting the previous model." ) raise ########### reader.start_in_session(sess, start_step) reader_test.start_in_session(sess, start_step) ################### Create test network. <---- Queue 생성 때문에, sess restore후 test network 생성 net_test = create_network(hparams, 1, num_speakers, is_training=False) if hparams.scalar_input: samples = tf.placeholder(tf.float32, shape=[net_test.batch_size, None]) waveform = 2 * np.random.rand(net_test.batch_size).reshape( net_test.batch_size, -1) - 1 else: samples = tf.placeholder(tf.int32, shape=[ net_test.batch_size, None ]) # samples: mu_law_encode로 변환된 것. one-hot으로 변환되기 전. (batch_size, 길이) waveform = np.random.randint(hparams.quantization_channels, size=net_test.batch_size).reshape( net_test.batch_size, -1) upsampled_local_condition = tf.placeholder( tf.float32, shape=[net_test.batch_size, hparams.num_mels]) speaker_id = tf.placeholder(tf.int32, shape=[net_test.batch_size]) next_sample = net_test.predict_proba_incremental( samples, upsampled_local_condition, speaker_id ) # Fast Wavenet Generation Algorithm-1611.09482 algorithm 적용 sess.run(net_test.queue_initializer) # test를 위한 placeholder는 모두 3개: samples,speaker_id,upsampled_local_condition # test용 mel-spectrogram을 하나 뽑자. 그것을 고정하지 않으면, thread가 계속 돌아가면서 data를 읽어온다. reader_test의 역할은 여기서 끝난다. mel_input_test, speaker_id_test = sess.run( [reader_test.local_condition, reader_test.speaker_id]) with tf.variable_scope('wavenet', reuse=tf.AUTO_REUSE): upsampled_local_condition_data = net_test.create_upsample( mel_input_test, upsample_type=hparams.upsample_type) upsampled_local_condition_data_ = sess.run( upsampled_local_condition_data ) # upsampled_local_condition_data_ 을 feed_dict로 placehoder인 upsampled_local_condition에 넣어준다. ###################################################### start_step = sess.run(global_step) step = last_saved_step = start_step try: while not coord.should_stop(): start_time = time.time() if hparams.store_metadata and step % 50 == 0: # Slow run that stores extra information for debugging. log('Storing metadata') run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize], options=run_options, run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) timeline_path = os.path.join(logdir, 'timeline.trace') with open(timeline_path, 'w') as f: f.write(tl.generate_chrome_trace_format(show_memory=True)) else: step, loss_value, _ = sess.run( [global_step, net.loss, net.optimize]) duration = time.time() - start_time log('step {:d} - loss = {:.3f}, ({:.3f} sec/step)'.format( step, loss_value, duration)) if step % config.checkpoint_every == 0: save(saver, sess, logdir, step) last_saved_step = step if step % config.eval_every == 0: # config.eval_every eval_step(sess, logdir, step, waveform, upsampled_local_condition_data_, speaker_id_test, mel_input_test, samples, speaker_id, upsampled_local_condition, next_sample) if step >= hparams.num_steps: # error message가 나오지만, 여기서 멈춘 것은 맞다. raise Exception('End xxx~~~yyy') except Exception as e: print('finally') log('Exiting due to exception: %s' % e, slack=True) #if step > last_saved_step: # save(saver, sess, logdir, step) traceback.print_exc() coord.request_stop(e)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Parse the flags containint lists, using regular expressions. # This matches and extracts key=value pairs. component_beam_sizes = re.findall(r'([^=,]+)=(\d+)', FLAGS.inference_beam_size) # This matches strings separated by a comma. Does not return any empty # strings. components_to_locally_normalize = re.findall(r'[^,]+', FLAGS.locally_normalize) # Reads master spec. master_spec = spec_pb2.MasterSpec() with gfile.FastGFile(FLAGS.master_spec) as fin: text_format.Parse(fin.read(), master_spec) # Rewrite resource locations. if FLAGS.resource_dir: for component in master_spec.component: for resource in component.resource: for part in resource.part: part.file_pattern = os.path.join(FLAGS.resource_dir, part.file_pattern) if FLAGS.complete_master_spec: spec_builder.complete_master_spec(master_spec, None, FLAGS.resource_dir) # Graph building. tf.logging.info('Building the graph') g = tf.Graph() with g.as_default(), tf.device('/device:CPU:0'): hyperparam_config = spec_pb2.GridPoint() hyperparam_config.use_moving_average = True builder = graph_builder.MasterBuilder(master_spec, hyperparam_config) annotator = builder.add_annotation() builder.add_saver() tf.logging.info('Reading documents...') input_corpus = sentence_io.ConllSentenceReader(FLAGS.input_file).corpus() session_config = tf.ConfigProto( log_device_placement=False, intra_op_parallelism_threads=FLAGS.threads, inter_op_parallelism_threads=FLAGS.threads) with tf.Session(graph=g, config=session_config) as sess: tf.logging.info('Initializing variables...') sess.run(tf.global_variables_initializer()) tf.logging.info('Loading from checkpoint...') sess.run('save/restore_all', {'save/Const:0': FLAGS.checkpoint_file}) tf.logging.info('Processing sentences...') processed = [] start_time = time.time() run_metadata = tf.RunMetadata() for start in range(0, len(input_corpus), FLAGS.max_batch_size): end = min(start + FLAGS.max_batch_size, len(input_corpus)) feed_dict = {annotator['input_batch']: input_corpus[start:end]} for comp, beam_size in component_beam_sizes: feed_dict['%s/InferenceBeamSize:0' % comp] = beam_size for comp in components_to_locally_normalize: feed_dict['%s/LocallyNormalize:0' % comp] = True if FLAGS.timeline_output_file and end == len(input_corpus): serialized_annotations = sess.run( annotator['annotations'], feed_dict=feed_dict, options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) trace = timeline.Timeline(step_stats=run_metadata.step_stats) with open(FLAGS.timeline_output_file, 'w') as trace_file: trace_file.write(trace.generate_chrome_trace_format()) else: serialized_annotations = sess.run( annotator['annotations'], feed_dict=feed_dict) processed.extend(serialized_annotations) tf.logging.info('Processed %d documents in %.2f seconds.', len(input_corpus), time.time() - start_time) pos, uas, las = evaluation.calculate_parse_metrics(input_corpus, processed) if FLAGS.log_file: with gfile.GFile(FLAGS.log_file, 'w') as f: f.write('%s\t%f\t%f\t%f\n' % (FLAGS.language_name, pos, uas, las)) if FLAGS.output_file: with gfile.GFile(FLAGS.output_file, 'w') as f: for serialized_sentence in processed: sentence = sentence_pb2.Sentence() sentence.ParseFromString(serialized_sentence) f.write(text_format.MessageToString(sentence) + '\n\n')