def tf_xywh_to_all(grid_pred_xy: tf.Tensor, grid_pred_wh: tf.Tensor,
                   layer: int, h: Helper) -> [tf.Tensor, tf.Tensor]:
    """ rescale the pred raw [grid_pred_xy,grid_pred_wh] to [0~1]

    Parameters
    ----------
    grid_pred_xy : tf.Tensor

    grid_pred_wh : tf.Tensor

    layer : int
        the output layer
    h : Helper


    Returns
    -------
    tuple

        after process, [all_pred_xy, all_pred_wh] 
    """
    with tf.name_scope('xywh_to_all_%d' % layer):
        all_pred_xy = (tf.sigmoid(grid_pred_xy[..., :]) +
                       h.xy_offset[layer]) / h.out_hw[layer][::-1]
        all_pred_wh = tf.exp(grid_pred_wh[..., :]) * h.anchors[layer]
    return all_pred_xy, all_pred_wh
Example #2
0
    def train(self):
        """
            1、构造tensorflow的基本算子、算法。注意这一步都是在“定义”和“构造”,不是真正的模型训练和计算
        """
        # 先构造一个数据流图
        temp_graph = tf.Graph()
        with temp_graph.as_default():
            # 定义占位符,表示待训练的数据集,用这种方式最后运行train的时候总是报错,暂无法解决:
            # You must feed a value for placeholder tensor 'x' with dtype float and shape [?,?]
            # x = tf.placeholder(dtype=tf.float32, shape=[None, None], name='x')
            # y = tf.placeholder(dtype=tf.float32, shape=[None], name='y')

            # 定义待训练的参数w和b,weight被赋予随机值,介于-1和1之间,bias分配一个变量并赋值为0
            weight = tf.Variable(tf.random_uniform([1, self.__x_train.shape[1]], -1.0, 1.0))
            bias = tf.Variable(tf.zeros([1]))

            # 定义二分类的sigmoid模型 y = 1/(1+exp-(w*x + b))
            # y_pre = tf.div(1.0,
            #                tf.add(1.0,
            #                       tf.exp(tf.neg(tf.reduce_sum(tf.multiply(weight, self.__x_train),
            #                                                   1
            #                                                  ) + bias)
            #                             )
            #                      )
            #               )
            # 也可以直接利用tf的sigmoid函数
            y_pre = tf.sigmoid(tf.reduce_sum(tf.multiply(weight, self.__x_train), 1) + bias)

            # 定义损失函数为对数似然函数(-y*log(y_pre) - (1-y)*log(1-y_pre))/样本数
            # 为什么这样定义呢?这里要扯到线性回归的最小二乘法和逻辑回归中的最大似然函数法的区别了。
            # 最小二乘法的核心思想是,让预测值和真实值的“误差”尽可能小;
            # 而最大似然函数法的核心思想是,让已知训练样本发生的概率尽可能大。
            # 上述的对数似然函数就是这么来的,推导过程可参考相关文献,在梯度下降的运用中,就是加个负号,让其最小
            loss0 = self.__y_train * tf.log(y_pre)
            loss1 = (1 - self.__y_train) * tf.log(1 - y_pre)
            loss = tf.reduce_sum(- loss0 - loss1) / self.__x_train.shape[0]
            # 定义优化算法(梯度下降),目标就是最小化损失函数
            optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
            train = optimizer.minimize(loss)
            # 初始化变量
            init = tf.global_variables_initializer()

        """
            2.正式训练
        """
        # 建立会话
        with tf.Session(graph=temp_graph) as sess:
            # 这个时候才开始真正地计算
            sess.run(init)
            print('初始化参数:weight=', sess.run(weight), ', bias=', sess.run(bias))
            # 拟合平面,过程就是执行1000遍梯度下降算法,得到最佳的w和b
            for step in range(1000):
                sess.run(train)
                if step % 100 == 0:
                    print("第%u步:权重:%s,偏置:%f,损失:%f" %
                          (step, weight.eval(), bias.eval(), loss.eval()))
                self.__weight = weight.eval()
                self.__bias = bias.eval()
Example #3
0
def main(ckpt_weights, image_size, output_size, model_def, class_num,
         depth_multiplier, obj_thresh, iou_thresh, train_set, test_image):
    h = Helper(None, class_num, f'data/{train_set}_anchor.npy',
               np.reshape(np.array(image_size), (-1, 2)),
               np.reshape(np.array(output_size), (-1, 2)))
    network = eval(model_def)  # type :yolo_mobilev2
    yolo_model, yolo_model_warpper = network([image_size[0], image_size[1], 3],
                                             len(h.anchors[0]),
                                             class_num,
                                             alpha=depth_multiplier)

    yolo_model_warpper.load_weights(str(ckpt_weights))
    print(INFO, f' Load CKPT {str(ckpt_weights)}')
    orig_img = h._read_img(str(test_image))
    image_shape = orig_img.shape[0:2]
    img, _ = h._process_img(orig_img,
                            true_box=None,
                            is_training=False,
                            is_resize=True)
    """ load images """
    img = tf.expand_dims(img, 0)
    y_pred = yolo_model_warpper.predict(img)
    """ box list """
    _yxyx_box = []
    _yxyx_box_scores = []
    """ preprocess label """
    for l, pred_label in enumerate(y_pred):
        """ split the label """
        pred_xy = pred_label[..., 0:2]
        pred_wh = pred_label[..., 2:4]
        pred_confidence = pred_label[..., 4:5]
        pred_cls = pred_label[..., 5:]
        # box_scores = obj_score * class_score
        box_scores = tf.sigmoid(pred_cls) * tf.sigmoid(pred_confidence)
        # obj_mask = pred_confidence_score[..., 0] > obj_thresh
        """ reshape box  """
        # NOTE tf_xywh_to_all will auto use sigmoid function
        pred_xy_A, pred_wh_A = tf_xywh_to_all(pred_xy, pred_wh, l, h)
        boxes = correct_box(pred_xy_A, pred_wh_A, image_size, image_shape)
        boxes = tf.reshape(boxes, (-1, 4))
        box_scores = tf.reshape(box_scores, (-1, class_num))
        """ append box and scores to global list """
        _yxyx_box.append(boxes)
        _yxyx_box_scores.append(box_scores)

    yxyx_box = tf.concat(_yxyx_box, axis=0)
    yxyx_box_scores = tf.concat(_yxyx_box_scores, axis=0)

    mask = yxyx_box_scores >= obj_thresh
    """ do nms for every classes"""
    _boxes = []
    _scores = []
    _classes = []
    for c in range(class_num):
        class_boxes = tf.boolean_mask(yxyx_box, mask[:, c])
        class_box_scores = tf.boolean_mask(yxyx_box_scores[:, c], mask[:, c])
        select = tf.image.non_max_suppression(class_boxes,
                                              scores=class_box_scores,
                                              max_output_size=30,
                                              iou_threshold=iou_thresh)
        class_boxes = tf.gather(class_boxes, select)
        class_box_scores = tf.gather(class_box_scores, select)
        _boxes.append(class_boxes)
        _scores.append(class_box_scores)
        _classes.append(tf.ones_like(class_box_scores) * c)

    boxes = tf.concat(_boxes, axis=0)
    classes = tf.concat(_classes, axis=0)
    scores = tf.concat(_scores, axis=0)
    """ draw box  """
    font = ImageFont.truetype(font='asset/FiraMono-Medium.otf',
                              size=tf.cast(
                                  tf.floor(3e-2 * image_shape[0] + 0.5),
                                  tf.int32).numpy())

    thickness = (image_shape[0] + image_shape[1]) // 300
    """ show result """
    if len(classes) > 0:
        pil_img = Image.fromarray(orig_img)
        print(f'[top\tleft\tbottom\tright\tscore\tclass]')
        for i, c in enumerate(classes):
            box = boxes[i]
            score = scores[i]
            label = '{:2d} {:.2f}'.format(int(c.numpy()), score.numpy())
            draw = ImageDraw.Draw(pil_img)
            label_size = draw.textsize(label, font)
            top, left, bottom, right = box
            print(
                f'[{top:.1f}\t{left:.1f}\t{bottom:.1f}\t{right:.1f}\t{score:.2f}\t{int(c):2d}]'
            )
            top = max(0, tf.cast(tf.floor(top + 0.5), tf.int32))
            left = max(0, tf.cast(tf.floor(left + 0.5), tf.int32))
            bottom = min(image_shape[0],
                         tf.cast(tf.floor(bottom + 0.5), tf.int32))
            right = min(image_shape[1], tf.cast(tf.floor(right + 0.5),
                                                tf.int32))

            if top - image_shape[0] >= 0:
                text_origin = tf.convert_to_tensor([left, top - label_size[1]])
            else:
                text_origin = tf.convert_to_tensor([left, top + 1])

            for j in range(thickness):
                draw.rectangle([left + j, top + j, right - j, bottom - j],
                               outline=h.colormap[c])
            draw.rectangle(
                [tuple(text_origin),
                 tuple(text_origin + label_size)],
                fill=h.colormap[c])
            draw.text(text_origin, label, fill=(0, 0, 0), font=font)
            del draw
        pil_img.show()
    else:
        print(NOTE, ' no boxes detected')