Ejemplo n.º 1
0
def _pixel_selector_grad(op, grad):
    """The gradients for 'pixel_selector'.
        
        Args:
        op: The 'pixel_selector' operation we want to differentiate.
        grad: Gradient with respect to the output of the 'pixel_selector' op.
        
        Returns:
        Gradients with respect to the coordinates of points of interest for 'pixel_selector'.
        """
    input = op.inputs[0]
    coord = op.inputs[1]
    strides = op.inputs[2]
    coord_grad = ops.zeros_like((NUM_POINTS, 3), tf.float32)
    back_grad = ops.reshape(grad, [-1])
    coord_grad_tmp = np.zeros((NUM_POINTS, 3), np.float32)
    for i in range(0, NUM_POINTS):
        for j in range(0, 3):
            coord_tmp = np.zeros((NUM_POINTS, 3), np.float32)
            coord_tmp[i, j] = 1.0
            coord_tmp = coord + coord_tmp
            tmp_1 = ops.reshape(
                select_module.pixel_selector(input, coord_tmp, strides), [-1])
            coord_tmp = np.zeros((NUM_POINTS, 3), np.float32)
            coord_tmp[i, j] = -1.0
            coord_tmp = coord + coord_tmp
            tmp_2 = ops.reshape(
                select_module.pixel_selector(input, coord_tmp, strides), [-1])
            tmp = ops.subtract(tmp_1, tmp_2)
            tmp = ops.divide(tmp, 2)
            tmp = ops.multiply(tmp, back_grad)
            tmp_3 = np.zeros((NUM_POINTS, 3), np.float32)
            tmp_3[i, j] = 1.0
            coord_grad_tmp = coord_grad_tmp + tmp_3 * ops.reduce_sum(tmp)

    coord_grad = coord_grad_tmp

    return [None, coord_grad, None]
Ejemplo n.º 2
0
def calc_loss(logits: tf.Tensor, caps_out: tf.Tensor, x: tf.Tensor, y: tf.Tensor, decoded: tf.Tensor):
    with tf.variable_scope('calc_loss'):
        # margin loss 中调节上margin和下margind的权重
        lambda_val = 0.5
        # 上margin与下margin的参数值
        m_plus = 0.95
        m_minus = 0.05
        max_l = tf.square(tf.maximum(0., m_plus-logits))
        max_r = tf.square(tf.maximum(0., logits-m_minus))

        margin_loss = tf.reduce_mean(tf.reduce_sum(y * max_l + lambda_val * (1. - y) * max_r, axis=-1))

        orgin = tf.reshape(x, (x.shape[0], -1))
        reconstruct_loss = 0.0005*tf.reduce_mean(tf.square(orgin-decoded))
        total_loss = margin_loss+reconstruct_loss
    return total_loss
Ejemplo n.º 3
0
def main(ckpt_weights, image_size, output_size, model_def, class_num,
         depth_multiplier, obj_thresh, iou_thresh, train_set, test_image):
    h = Helper(None, class_num, f'data/{train_set}_anchor.npy',
               np.reshape(np.array(image_size), (-1, 2)),
               np.reshape(np.array(output_size), (-1, 2)))
    network = eval(model_def)  # type :yolo_mobilev2
    yolo_model, yolo_model_warpper = network([image_size[0], image_size[1], 3],
                                             len(h.anchors[0]),
                                             class_num,
                                             alpha=depth_multiplier)

    yolo_model_warpper.load_weights(str(ckpt_weights))
    print(INFO, f' Load CKPT {str(ckpt_weights)}')
    orig_img = h._read_img(str(test_image))
    image_shape = orig_img.shape[0:2]
    img, _ = h._process_img(orig_img,
                            true_box=None,
                            is_training=False,
                            is_resize=True)
    """ load images """
    img = tf.expand_dims(img, 0)
    y_pred = yolo_model_warpper.predict(img)
    """ box list """
    _yxyx_box = []
    _yxyx_box_scores = []
    """ preprocess label """
    for l, pred_label in enumerate(y_pred):
        """ split the label """
        pred_xy = pred_label[..., 0:2]
        pred_wh = pred_label[..., 2:4]
        pred_confidence = pred_label[..., 4:5]
        pred_cls = pred_label[..., 5:]
        # box_scores = obj_score * class_score
        box_scores = tf.sigmoid(pred_cls) * tf.sigmoid(pred_confidence)
        # obj_mask = pred_confidence_score[..., 0] > obj_thresh
        """ reshape box  """
        # NOTE tf_xywh_to_all will auto use sigmoid function
        pred_xy_A, pred_wh_A = tf_xywh_to_all(pred_xy, pred_wh, l, h)
        boxes = correct_box(pred_xy_A, pred_wh_A, image_size, image_shape)
        boxes = tf.reshape(boxes, (-1, 4))
        box_scores = tf.reshape(box_scores, (-1, class_num))
        """ append box and scores to global list """
        _yxyx_box.append(boxes)
        _yxyx_box_scores.append(box_scores)

    yxyx_box = tf.concat(_yxyx_box, axis=0)
    yxyx_box_scores = tf.concat(_yxyx_box_scores, axis=0)

    mask = yxyx_box_scores >= obj_thresh
    """ do nms for every classes"""
    _boxes = []
    _scores = []
    _classes = []
    for c in range(class_num):
        class_boxes = tf.boolean_mask(yxyx_box, mask[:, c])
        class_box_scores = tf.boolean_mask(yxyx_box_scores[:, c], mask[:, c])
        select = tf.image.non_max_suppression(class_boxes,
                                              scores=class_box_scores,
                                              max_output_size=30,
                                              iou_threshold=iou_thresh)
        class_boxes = tf.gather(class_boxes, select)
        class_box_scores = tf.gather(class_box_scores, select)
        _boxes.append(class_boxes)
        _scores.append(class_box_scores)
        _classes.append(tf.ones_like(class_box_scores) * c)

    boxes = tf.concat(_boxes, axis=0)
    classes = tf.concat(_classes, axis=0)
    scores = tf.concat(_scores, axis=0)
    """ draw box  """
    font = ImageFont.truetype(font='asset/FiraMono-Medium.otf',
                              size=tf.cast(
                                  tf.floor(3e-2 * image_shape[0] + 0.5),
                                  tf.int32).numpy())

    thickness = (image_shape[0] + image_shape[1]) // 300
    """ show result """
    if len(classes) > 0:
        pil_img = Image.fromarray(orig_img)
        print(f'[top\tleft\tbottom\tright\tscore\tclass]')
        for i, c in enumerate(classes):
            box = boxes[i]
            score = scores[i]
            label = '{:2d} {:.2f}'.format(int(c.numpy()), score.numpy())
            draw = ImageDraw.Draw(pil_img)
            label_size = draw.textsize(label, font)
            top, left, bottom, right = box
            print(
                f'[{top:.1f}\t{left:.1f}\t{bottom:.1f}\t{right:.1f}\t{score:.2f}\t{int(c):2d}]'
            )
            top = max(0, tf.cast(tf.floor(top + 0.5), tf.int32))
            left = max(0, tf.cast(tf.floor(left + 0.5), tf.int32))
            bottom = min(image_shape[0],
                         tf.cast(tf.floor(bottom + 0.5), tf.int32))
            right = min(image_shape[1], tf.cast(tf.floor(right + 0.5),
                                                tf.int32))

            if top - image_shape[0] >= 0:
                text_origin = tf.convert_to_tensor([left, top - label_size[1]])
            else:
                text_origin = tf.convert_to_tensor([left, top + 1])

            for j in range(thickness):
                draw.rectangle([left + j, top + j, right - j, bottom - j],
                               outline=h.colormap[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=h.colormap[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw
        pil_img.show()
    else:
        print(NOTE, ' no boxes detected')
Ejemplo n.º 4
0
if __name__ == "__main__":
    g = tf.get_default_graph()
    ds, ds_val = mnist_dataset()
    iterator = ds.make_one_shot_iterator()
    next_x, next_y = iterator.get_next()
    batch_x = tf.placeholder_with_default(next_x, shape=[100, 28, 28, 1])
    batch_y = tf.placeholder_with_default(next_y, shape=[100, 10])
    logits, caps_out = capsnet(batch_x)
    decoded = decoder(caps_out, batch_y)
    """ define loss """
    loss = calc_loss(logits, caps_out, batch_x, batch_y, decoded)
    """ define summary """
    acc_op, acc = tf.metrics.accuracy(tf.argmax(batch_y, -1), tf.argmax(logits, -1))
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('acc', acc)
    tf.summary.image('reconstruction_img', tf.reshape(decoded, (100, 28, 28, 1)))
    summ = tf.summary.merge_all()
    """ define train op """
    steps = tf.train.get_or_create_global_step(g)
    train_op = tf.train.AdamOptimizer().minimize(loss, global_step=steps)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        writer = tf.summary.FileWriter('log', g)
        sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()])
        for i in range(10):
            with tqdm(total=60000//100, bar_format='{n_fmt}/{total_fmt} |{bar}| {rate_fmt}{postfix}', unit=' batch', dynamic_ncols=True) as t:
                for j in range(60000//100):
                    _, summ_, steps_, loss_, acc_ = sess.run([train_op, summ, steps, loss, acc])
                    t.set_postfix(loss='{:<5.3f}'.format(loss_), acc='{:<4.2f}%'.format(acc_*100))
Ejemplo n.º 5
0
def flatten(previous_layer):
    return tf.reshape(previous_layer, shape=[-1, (previous_layer.get_shape()[1] * previous_layer.get_shape()[2] * previous_layer.get_shape()[3])])