def compile_saliency_function(model, activation_layer=layer_name): input_img = model.input layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) layer_output = layer_dict[activation_layer].output max_output = k.max(layer_output, axis=3) saliency = k.gradients(k.sum(max_output), input_img)[0] return k.function([input_img, k.learning_phase()], [saliency])
def on_epoch_end(self, epoch, logs=None): self.epoch = epoch self.batch = self.n_batches - 1 if not self.validation_data and self.histogram_freq: raise ValueError('If printing histograms, validation_data must be ' 'provided, and cannot be a generator.') if self.validation_data and self.histogram_freq: if self.epoch % self.histogram_freq == 0: val_data = self.validation_data tensors = ( self.model.inputs + self.model.targets + self.model.sample_weights) if self.model.uses_learning_phase: tensors += [K.learning_phase()] assert len(val_data) == len(tensors) val_size = val_data[0].shape[0] i = 0 while i < val_size: step = min(self.batch_size, val_size - i) batch_val = [] batch_val.append(val_data[0][i:i + step]) batch_val.append(val_data[1][i:i + step]) batch_val.append(val_data[2][i:i + step]) if self.model.uses_learning_phase: # do not slice the learning phase batch_val = [x[i:i + step] for x in val_data[:-1]] batch_val.append(val_data[-1]) else: batch_val = [x[i:i + step] for x in val_data] feed_dict = dict(zip(tensors, batch_val)) result = self.sess.run([self.merged], feed_dict=feed_dict) summary_str = result[0] self.writer.add_summary(summary_str, self.epoch * self.batch_size) i += self.batch_size self._save_logs(logs)
def run(train_iterator, train_iters, P, experiment_dir='./', log_steps=30, save_steps=1000, train_mode=True, segmentation_free=False, seed=128, num_producer_threads=1): np.random.seed(seed) tf.set_random_seed(seed) build_phocs = P.phoc_dim > 0 experiment_dir, logger = settings.get_exp_dir_and_logger(experiment_dir) train_pipe = settings.get_pipeline(P, train_iterator, num_producer_threads, augmentations=train_mode, crop_words=P.crop_words) network = NetworkStructure(P, experiment_dir) summary_op = tf.summary.merge_all() sess = settings.get_session(P) with sess.as_default(): tb_writer = utils.TensorBoardFiles(experiment_dir, P.log_prefix, sess) # Init all variables init_op = tf.global_variables_initializer() sess.run(init_op) logger('Initialized vars') network.models.load(sess) # Global Step reset if P.reset_gs: gss = [] if hasattr(network, 'gs_reg'): gss.append(network.gs_reg) if hasattr(network, 'gs_hmap'): gss.append(network.gs_hmap) sess.run(tf.variables_initializer(gss)) save_path = experiment_dir / ('eval' if P.stat_prefix is None else P.stat_prefix if P.eval_run else 'train') tf_op_timer = utils.Timer() pipe_timer = utils.Timer() stats_timer = utils.Timer() av_losses = utils.RunningAverages(num_of_averages=len(my_losses()), max_length=log_steps) runners = [] NORMALIZE = P.image_normalize_const if train_mode: if P.train_hmap: runners += [(network.train_hmap, 'hmap', network.gs_hmap, sess.run(network.gs_hmap))] if P.train_regression and train_mode: runners += [(network.train_boxes, 'regression', network.gs_reg, sess.run(network.gs_reg))] else: runners += [(network.train_hmap, 'eval', network.gs_hmap, sess.run(network.gs_hmap))] print ('Goiong for %d runners' % len(runners)) for train_op, train_mode, global_step, strt_iter in runners: logger('Starting %s from: %d to: %d' % (train_mode, strt_iter, train_iters + 1)) strt_iter = 0 if P.eval_run else strt_iter train_type = train_mode if train_mode else 'Eval' # Setting-up tf output ops execution = {'gs': global_step, 'losses': my_losses(), 'good_boxes': network.good_boxes} if train_mode: execution['train_op'] = train_op execution['random_boxes'] = network.rnd_boxes if segmentation_free else network.pool_boxes if segmentation_free: execution['random_iou_labels'] = network.rnd_iou_labels else: execution['update_os'] = network.update_ops if build_phocs: execution['good_phocs'] = network.good_phocs for i in range(strt_iter, train_iters + 1): # Pull data pipe_timer.tic() batch = train_pipe.pull_data() if batch is None: break # Normalize image original_image = batch['image'].copy() batch['image'] = batch['image'].astype(np.float32) / NORMALIZE feed_dict = settings.feed_dict_from_dict(network.inputs, batch, train_pipe, P, train_mode=True) feed_dict.update({Kb.learning_phase(): 1*(train_mode)}) pipe_timer.toc() # Train tf_op_timer.tic() res = sess.run(execution, feed_dict) tf_op_timer.toc() gs = res['gs'] # Update Running averages av_losses.update(res['losses']) # Log steps stats_timer.tic() if i % log_steps == 0 or not train_mode: logger('-%6d / %6d- GS [%6d] DataTime [%4.2fs] GPUTime [%4.2fs] StatsTime [%4.2fs]-%s [%s]-' % (i, train_iters, gs, pipe_timer.average(), tf_op_timer.average(), stats_timer.average(), train_type, P.name)) # Print out loss names and average values logger(' '.join(['%s [%5.4f]' % (v, w()) for v, w in zip([x.name.split('/')[0] for x in my_losses()], av_losses())])) # Evaluation Run statistics if not train_mode: # get boxes with their scores good_boxes_pred = res['good_boxes'] abs_good_boxes_pred = tf_format_to_abs(good_boxes_pred, P.target_size) # filter boxes logger('-%6d- BOXES [%4d] DataTime [%4.2fs] GPUTime [%4.2fs] StatsTime [%4.2fs] -EVAL-' % (i, good_boxes_pred.shape[0], pipe_timer.average(), tf_op_timer.average(), stats_timer.average())) if build_phocs: # NOTICE: For PHOCs, only single batch eval is supported box_viz_img = st.update_phoc_stats(meta_images=batch['meta_image'], doc_images=original_image, pred_boxes=abs_good_boxes_pred, pred_phocs=res['good_phocs'], gt_boxes=batch['gt_boxes'], save_path=save_path) else: box_viz_img = st.update_segmentation_stats(meta_images=batch['meta_image'], doc_images=original_image, gt_boxes=batch['gt_boxes'], pred_boxes=abs_good_boxes_pred, params=P, save_path=save_path, test_phase=not train_mode, viz=True) if box_viz_img is not None: feed_dict.update({network.inputs.box_viz_images: box_viz_img}) else: rboxes = res.get('random_boxes') rlabels = res.get('random_iou_labels', np.array([P.box_filter_num_clsses - 1]*rboxes.shape[0])) rboxes = tf_format_to_abs(rboxes, P.target_size) box_viz_img_tensor = st.train_viz(batch, rboxes, rlabels, phoc_lab_thresh=3, unnormalize=NORMALIZE) if box_viz_img_tensor is not None: feed_dict.update({network.inputs.box_viz_images: box_viz_img_tensor}) # Do another pass to log newly create visualizations to TensorBoard summary_protobuf, gs = sess.run([summary_op, global_step], feed_dict) tb_writer.real.add_summary(summary_protobuf, global_step=gs) # Save steps if i % save_steps == 0 and train_mode: network.models.save(sess, global_step) stats_timer.toc() # Won't be prefixed, saved as 'model' if train_mode and len(runners) > 0: network.models.save(sess, global_step) # statistics.final_stats() logger.close()
loss = cls_loss + reg_loss optimizer = tf.train.AdamOptimizer(learning_rate=0.009).minimize(loss) # cls_op = tf.train.AdamOptimizer(learning_rate=0.009).minimize(cls_loss) # reg_op = tf.train.AdamOptimizer(learning_rate=0.009).minimize(reg_loss) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) total_loss =0 Accuracy =0 classifier_loss =0 regressor_loss=0 for i in range(1, num_epochs): bbox, anchor_labels, img = next(train_gen) _, current_loss, reg_trainloss, cls_trainloss, train_acc = sess.run([optimizer, loss, reg_loss, cls_loss, cls_accuracy], \ feed_dict={anchor_boxes:bbox, labels: anchor_labels, input1:img, K.learning_phase(): 1}) total_loss += current_loss classifier_loss+= cls_trainloss Accuracy +=train_acc regressor_loss += reg_trainloss if i%100==0: print("Total loss: {0}, rpn_cls_loss: {1}, rpn_reg_loss: {2}, Accuracy {3}".format(total_loss/100, classifier_loss/100, regressor_loss/100, Accuracy/100)) logfile.write("Total loss: {0}, rpn_cls_loss: {1}, rpn_reg_loss: {2}, Accuracy {3}\n".format(total_loss/100, classifier_loss/100, regressor_loss/100, Accuracy/100) ) classifier_loss = 0 regressor_loss = 0 Accuracy = 0 total_loss = 0 if i>1 and i% 10000 == 0: for j in range(1000): bbox, anchor_labels, img = next(val_gen)
def _net(self, inputs, blocks): reuse = True if self._calls > 0 else None net, end_points = resnet_from_blocks(inputs, blocks, scope=self.scope, reuse=reuse) net = slim.dropout(net, keep_prob=(1-self.args.dropout), scope='%s_dropout' % self.scope, is_training=Kb.learning_phase()) return net
def base_pooling(self, x, b): reuse = self.get_reuse(self._roi_pool_call) self._roi_pool_call += 1 scope = self.scope L2_reg = self.args.box_filter_L2_reg dropout = self.args.dropout def _args_scope(): with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=self.act_func, weights_regularizer=slim.l2_regularizer(L2_reg)): with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc: return arg_sc with slim.arg_scope(_args_scope()): with tf.variable_scope(scope, scope, [x, b], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): boxes_input = tf.identity(b[:, 1:], name='boxes') batch_idx = tf.cast(b[:, 0], dtype=tf.int32, name='batch_idx') pooled_features = tf.image.crop_and_resize( x, boxes_input, batch_idx, crop_size=self.output_shape) net = slim.conv2d(pooled_features, 1024, self.output_shape, stride=[1, 1], padding='VALID', scope='conv1_phoc') net = slim.conv2d(net, 1024, [1, 1], stride=[1, 1], padding='VALID', scope='conv2_phoc') # TODO: remove the flags if not self.args.tiny_phoc: net = slim.dropout(net, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc1') net = slim.conv2d(net, 1024, [1, 1], stride=[1, 1], padding='VALID', scope='conv3_phoc') if not self.args.tiny_phoc and not self.args.bigger_phoc: net = slim.dropout(net, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc2') net = slim.conv2d(net, 1024, [1, 1], stride=[1, 1], padding='VALID', scope='conv4_phoc') net = slim.dropout(net, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc3') net = slim.conv2d(net, 1024, [1, 1], stride=1, scope='phoc_feature') return net
def phoc_prediction(features, phoc_dim, scope, reuse=None, L2_reg=0.0, act_func=tf.nn.relu, large_topology=False, dropout=0.0): with slim.arg_scope(_args_scope(act_func, L2_reg)): with tf.variable_scope(scope, scope, [features], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): if large_topology: phoc = slim.conv2d(features, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc4_phoc') phoc = slim.conv2d(phoc, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc5_phoc') phoc = slim.conv2d(phoc, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc6_phoc') phoc = slim.conv2d(phoc, phoc_dim, [1, 1], stride=1, activation_fn=None, padding='VALID', scope='fc7_phoc') else: phoc = slim.conv2d(features, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc1') phoc = slim.dropout(phoc, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc1') phoc = slim.conv2d(phoc, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc2') phoc = slim.dropout(phoc, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc2') phoc = slim.conv2d(phoc, phoc_dim, [1, 1], stride=1, activation_fn=None, padding='VALID', scope='linear') phoc = tf.squeeze(phoc, name='phoc_embd') return phoc