Ejemplo n.º 1
0
def compile_saliency_function(model, activation_layer=layer_name):
    input_img = model.input
    layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
    layer_output = layer_dict[activation_layer].output
    max_output = k.max(layer_output, axis=3)
    saliency = k.gradients(k.sum(max_output), input_img)[0]
    return k.function([input_img, k.learning_phase()], [saliency])
Ejemplo n.º 2
0
    def on_epoch_end(self, epoch, logs=None):
        self.epoch = epoch
        self.batch = self.n_batches - 1

        if not self.validation_data and self.histogram_freq:
            raise ValueError('If printing histograms, validation_data must be '
                             'provided, and cannot be a generator.')
        if self.validation_data and self.histogram_freq:
            if self.epoch % self.histogram_freq == 0:

                val_data = self.validation_data
                tensors = (
                        self.model.inputs + self.model.targets + self.model.sample_weights)

                if self.model.uses_learning_phase:
                    tensors += [K.learning_phase()]

                assert len(val_data) == len(tensors)
                val_size = val_data[0].shape[0]
                i = 0
                while i < val_size:
                    step = min(self.batch_size, val_size - i)
                    batch_val = []
                    batch_val.append(val_data[0][i:i + step])
                    batch_val.append(val_data[1][i:i + step])
                    batch_val.append(val_data[2][i:i + step])
                    if self.model.uses_learning_phase:
                        # do not slice the learning phase
                        batch_val = [x[i:i + step] for x in val_data[:-1]]
                        batch_val.append(val_data[-1])
                    else:
                        batch_val = [x[i:i + step] for x in val_data]
                    feed_dict = dict(zip(tensors, batch_val))
                    result = self.sess.run([self.merged], feed_dict=feed_dict)
                    summary_str = result[0]
                    self.writer.add_summary(summary_str, self.epoch * self.batch_size)
                    i += self.batch_size

        self._save_logs(logs)
def run(train_iterator, train_iters, P, experiment_dir='./', log_steps=30, save_steps=1000, train_mode=True, segmentation_free=False,
        seed=128, num_producer_threads=1):

    np.random.seed(seed)
    tf.set_random_seed(seed)
    build_phocs = P.phoc_dim > 0

    experiment_dir, logger = settings.get_exp_dir_and_logger(experiment_dir)
    train_pipe = settings.get_pipeline(P, train_iterator, num_producer_threads, augmentations=train_mode, crop_words=P.crop_words)

    network = NetworkStructure(P, experiment_dir)

    summary_op = tf.summary.merge_all()
    sess = settings.get_session(P)

    with sess.as_default():
        tb_writer = utils.TensorBoardFiles(experiment_dir, P.log_prefix, sess)

        # Init all variables
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        logger('Initialized vars')

        network.models.load(sess)

        # Global Step reset
        if P.reset_gs:
            gss = []
            if hasattr(network, 'gs_reg'):
                gss.append(network.gs_reg)
            if hasattr(network, 'gs_hmap'):
                gss.append(network.gs_hmap)
            sess.run(tf.variables_initializer(gss))

        save_path = experiment_dir / ('eval' if P.stat_prefix is None else P.stat_prefix if P.eval_run else 'train')

        tf_op_timer = utils.Timer()
        pipe_timer = utils.Timer()
        stats_timer = utils.Timer()
        av_losses = utils.RunningAverages(num_of_averages=len(my_losses()), max_length=log_steps)
        runners = []
        NORMALIZE = P.image_normalize_const

        if train_mode:
            if P.train_hmap:
                runners += [(network.train_hmap, 'hmap', network.gs_hmap, sess.run(network.gs_hmap))]
            if P.train_regression and train_mode:
                runners += [(network.train_boxes, 'regression', network.gs_reg, sess.run(network.gs_reg))]
        else:
            runners += [(network.train_hmap, 'eval', network.gs_hmap, sess.run(network.gs_hmap))]

        print ('Goiong for %d runners' % len(runners))
        for train_op, train_mode, global_step, strt_iter in runners:

            logger('Starting %s from: %d to: %d' % (train_mode, strt_iter, train_iters + 1))
            strt_iter = 0 if P.eval_run else strt_iter
            train_type = train_mode if train_mode else 'Eval'

            # Setting-up tf output ops
            execution = {'gs': global_step, 'losses': my_losses(), 'good_boxes': network.good_boxes}
            if train_mode:
                execution['train_op'] = train_op
                execution['random_boxes'] = network.rnd_boxes if segmentation_free else network.pool_boxes
                if segmentation_free:
                    execution['random_iou_labels'] = network.rnd_iou_labels
            else:
                execution['update_os'] = network.update_ops
            if build_phocs:
                execution['good_phocs'] = network.good_phocs

            for i in range(strt_iter, train_iters + 1):
                # Pull data
                pipe_timer.tic()
                batch = train_pipe.pull_data()
                if batch is None:
                    break

                # Normalize image
                original_image = batch['image'].copy()
                batch['image'] = batch['image'].astype(np.float32) / NORMALIZE

                feed_dict = settings.feed_dict_from_dict(network.inputs, batch, train_pipe, P, train_mode=True)
                feed_dict.update({Kb.learning_phase(): 1*(train_mode)})

                pipe_timer.toc()

                # Train
                tf_op_timer.tic()
                res = sess.run(execution, feed_dict)
                tf_op_timer.toc()

                gs = res['gs']
                # Update Running averages
                av_losses.update(res['losses'])

                # Log steps
                stats_timer.tic()
                if i % log_steps == 0 or not train_mode:

                    logger('-%6d / %6d- GS [%6d] DataTime [%4.2fs] GPUTime [%4.2fs] StatsTime [%4.2fs]-%s [%s]-' %
                           (i, train_iters, gs, pipe_timer.average(), tf_op_timer.average(), stats_timer.average(), train_type, P.name))
                    # Print out loss names and average values
                    logger(' '.join(['%s [%5.4f]' % (v, w()) for v, w in zip([x.name.split('/')[0] for x in my_losses()], av_losses())]))

                    # Evaluation Run statistics
                    if not train_mode:
                        # get boxes with their scores
                        good_boxes_pred = res['good_boxes']
                        abs_good_boxes_pred = tf_format_to_abs(good_boxes_pred, P.target_size)

                        # filter boxes
                        logger('-%6d- BOXES [%4d] DataTime [%4.2fs] GPUTime [%4.2fs] StatsTime [%4.2fs] -EVAL-' %
                               (i, good_boxes_pred.shape[0], pipe_timer.average(), tf_op_timer.average(), stats_timer.average()))

                        if build_phocs:
                            # NOTICE: For PHOCs, only single batch eval is supported
                            box_viz_img = st.update_phoc_stats(meta_images=batch['meta_image'], doc_images=original_image, pred_boxes=abs_good_boxes_pred,
                                                               pred_phocs=res['good_phocs'], gt_boxes=batch['gt_boxes'], save_path=save_path)
                        else:
                            box_viz_img = st.update_segmentation_stats(meta_images=batch['meta_image'], doc_images=original_image, gt_boxes=batch['gt_boxes'],
                                                                       pred_boxes=abs_good_boxes_pred, params=P, save_path=save_path,
                                                                       test_phase=not train_mode, viz=True)
                        if box_viz_img is not None:
                            feed_dict.update({network.inputs.box_viz_images: box_viz_img})

                    else:
                        rboxes = res.get('random_boxes')
                        rlabels = res.get('random_iou_labels', np.array([P.box_filter_num_clsses - 1]*rboxes.shape[0]))
                        rboxes = tf_format_to_abs(rboxes, P.target_size)
                        box_viz_img_tensor = st.train_viz(batch, rboxes, rlabels, phoc_lab_thresh=3, unnormalize=NORMALIZE)

                        if box_viz_img_tensor is not None:
                            feed_dict.update({network.inputs.box_viz_images: box_viz_img_tensor})

                    # Do another pass to log newly create visualizations to TensorBoard
                    summary_protobuf, gs = sess.run([summary_op, global_step], feed_dict)
                    tb_writer.real.add_summary(summary_protobuf, global_step=gs)

                # Save steps
                if i % save_steps == 0 and train_mode:
                    network.models.save(sess, global_step)
                stats_timer.toc()

        # Won't be prefixed, saved as 'model'
        if train_mode and len(runners) > 0:
            network.models.save(sess, global_step)

        # statistics.final_stats()
        logger.close()
Ejemplo n.º 4
0
    loss = cls_loss + reg_loss
    optimizer = tf.train.AdamOptimizer(learning_rate=0.009).minimize(loss)
    # cls_op = tf.train.AdamOptimizer(learning_rate=0.009).minimize(cls_loss)
    # reg_op = tf.train.AdamOptimizer(learning_rate=0.009).minimize(reg_loss)

    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    total_loss =0
    Accuracy =0
    classifier_loss =0
    regressor_loss=0
    for i in range(1, num_epochs):
        bbox, anchor_labels, img = next(train_gen)
        _, current_loss, reg_trainloss, cls_trainloss, train_acc = sess.run([optimizer, loss, reg_loss, cls_loss, cls_accuracy], \
            feed_dict={anchor_boxes:bbox, labels: anchor_labels, input1:img, K.learning_phase(): 1})
        total_loss += current_loss
        classifier_loss+= cls_trainloss
        Accuracy +=train_acc
        regressor_loss += reg_trainloss
        if i%100==0:
            print("Total loss: {0}, rpn_cls_loss: {1}, rpn_reg_loss: {2}, Accuracy {3}".format(total_loss/100, classifier_loss/100, regressor_loss/100, Accuracy/100))
            logfile.write("Total loss: {0}, rpn_cls_loss: {1}, rpn_reg_loss: {2}, Accuracy {3}\n".format(total_loss/100, classifier_loss/100, regressor_loss/100, Accuracy/100)
)
            classifier_loss = 0
            regressor_loss = 0
            Accuracy = 0
            total_loss = 0
            if i>1 and i% 10000 == 0:
                for j in range(1000):
                    bbox, anchor_labels, img = next(val_gen)
Ejemplo n.º 5
0
    def _net(self, inputs, blocks):
        reuse = True if self._calls > 0 else None

        net, end_points = resnet_from_blocks(inputs, blocks, scope=self.scope, reuse=reuse)
        net = slim.dropout(net, keep_prob=(1-self.args.dropout), scope='%s_dropout' % self.scope, is_training=Kb.learning_phase())
        return net
Ejemplo n.º 6
0
    def base_pooling(self, x, b):
        reuse = self.get_reuse(self._roi_pool_call)
        self._roi_pool_call += 1
        scope = self.scope
        L2_reg = self.args.box_filter_L2_reg
        dropout = self.args.dropout

        def _args_scope():
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected],
                    activation_fn=self.act_func,
                    weights_regularizer=slim.l2_regularizer(L2_reg)):
                with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
                    return arg_sc

        with slim.arg_scope(_args_scope()):
            with tf.variable_scope(scope, scope, [x, b], reuse=reuse) as sc:
                end_points_collection = sc.name + '_end_points'
                # Collect outputs for conv2d, fully_connected and max_pool2d.
                with slim.arg_scope(
                    [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=end_points_collection):
                    boxes_input = tf.identity(b[:, 1:], name='boxes')
                    batch_idx = tf.cast(b[:, 0],
                                        dtype=tf.int32,
                                        name='batch_idx')
                    pooled_features = tf.image.crop_and_resize(
                        x, boxes_input, batch_idx, crop_size=self.output_shape)
                    net = slim.conv2d(pooled_features,
                                      1024,
                                      self.output_shape,
                                      stride=[1, 1],
                                      padding='VALID',
                                      scope='conv1_phoc')
                    net = slim.conv2d(net,
                                      1024, [1, 1],
                                      stride=[1, 1],
                                      padding='VALID',
                                      scope='conv2_phoc')
                    # TODO: remove the flags
                    if not self.args.tiny_phoc:
                        net = slim.dropout(net,
                                           keep_prob=1 - dropout,
                                           is_training=Kb.learning_phase(),
                                           scope='dropout_phoc1')
                        net = slim.conv2d(net,
                                          1024, [1, 1],
                                          stride=[1, 1],
                                          padding='VALID',
                                          scope='conv3_phoc')
                    if not self.args.tiny_phoc and not self.args.bigger_phoc:
                        net = slim.dropout(net,
                                           keep_prob=1 - dropout,
                                           is_training=Kb.learning_phase(),
                                           scope='dropout_phoc2')
                        net = slim.conv2d(net,
                                          1024, [1, 1],
                                          stride=[1, 1],
                                          padding='VALID',
                                          scope='conv4_phoc')
                    net = slim.dropout(net,
                                       keep_prob=1 - dropout,
                                       is_training=Kb.learning_phase(),
                                       scope='dropout_phoc3')
                    net = slim.conv2d(net,
                                      1024, [1, 1],
                                      stride=1,
                                      scope='phoc_feature')
        return net
Ejemplo n.º 7
0
def phoc_prediction(features,
                    phoc_dim,
                    scope,
                    reuse=None,
                    L2_reg=0.0,
                    act_func=tf.nn.relu,
                    large_topology=False,
                    dropout=0.0):

    with slim.arg_scope(_args_scope(act_func, L2_reg)):
        with tf.variable_scope(scope, scope, [features], reuse=reuse) as sc:
            end_points_collection = sc.name + '_end_points'
            # Collect outputs for conv2d, fully_connected and max_pool2d.
            with slim.arg_scope(
                [slim.conv2d, slim.fully_connected, slim.max_pool2d],
                    outputs_collections=end_points_collection):
                if large_topology:
                    phoc = slim.conv2d(features,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc4_phoc')
                    phoc = slim.conv2d(phoc,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc5_phoc')
                    phoc = slim.conv2d(phoc,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc6_phoc')
                    phoc = slim.conv2d(phoc,
                                       phoc_dim, [1, 1],
                                       stride=1,
                                       activation_fn=None,
                                       padding='VALID',
                                       scope='fc7_phoc')
                else:
                    phoc = slim.conv2d(features,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc1')
                    phoc = slim.dropout(phoc,
                                        keep_prob=1 - dropout,
                                        is_training=Kb.learning_phase(),
                                        scope='dropout_phoc1')
                    phoc = slim.conv2d(phoc,
                                       1024, [1, 1],
                                       stride=1,
                                       activation_fn=act_func,
                                       padding='VALID',
                                       scope='fc2')
                    phoc = slim.dropout(phoc,
                                        keep_prob=1 - dropout,
                                        is_training=Kb.learning_phase(),
                                        scope='dropout_phoc2')
                    phoc = slim.conv2d(phoc,
                                       phoc_dim, [1, 1],
                                       stride=1,
                                       activation_fn=None,
                                       padding='VALID',
                                       scope='linear')
                phoc = tf.squeeze(phoc, name='phoc_embd')

    return phoc