Ejemplo n.º 1
0
 def __init__(self, IP, cameraId=vd.kTopCamera, resolution=vd.kVGA):
     super(ObjectDetection, self).__init__(IP, cameraId, resolution)
     self._boundRect = []
     self._cropKeep = 1
     self._stickAngle = None # rad
     #self._classes_name = ["stick"]
     self._common_params = {'image_size': 448, 'num_classes': 1, 
             'batch_size':1}
     self._net_params = {'cell_size': 7, 'boxes_per_cell':2, 'weight_decay': 0.0005}
     self._net = YoloTinyNet(self._common_params, self._net_params, test=True)
     #self._modelFile = "/home/meringue/Documents/python-nao-golf/yoloNet/models/train/model.ckpt-95000"
     #self._objectRect = [0, 0, 0, 0]
     self._objectName = None
Ejemplo n.º 2
0
    def __init__(self,
                 IP,
                 classes_name,
                 cameraId=vd.kTopCamera,
                 resolution=vd.kVGA):
        super(MultiObjectDetection, self).__init__(IP, cameraId, resolution)
        self._classes_name = classes_name
        self._num_classes = len(classes_name)

        self._common_params = {
            'image_size': 448,
            'num_classes': self._num_classes,
            'batch_size': 1
        }
        self._net_params = {
            'cell_size': 7,
            'boxes_per_cell': 2,
            'weight_decay': 0.0005
        }
        self._net = YoloTinyNet(self._common_params,
                                self._net_params,
                                test=True)
Ejemplo n.º 3
0
def main():

    net = YoloTinyNet(common_params, net_params, test=True)
    # tensorflow中声明占位符号image, 这在后面run的时候
    # feed_dict中会出现该占位符和对应的值,意思就是输入数据的来源
    image = tf.placeholder(tf.float32, (1, 448, 448, 3))
    predicts = net.inference(image)

    sess = tf.Session()

    # 转化数据格式
    np_img = cv2.imread('cat.jpg')
    resized_img = cv2.resize(np_img, (448, 448))
    np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)

    np_img = np_img.astype(np.float32)
    #白化输入的数据
    np_img = np_img / 255.0 * 2 - 1
    np_img = np.reshape(np_img, (1, 448, 448, 3))

    saver = tf.train.Saver(net.trainable_collection)

    saver.restore(sess, 'models/pretrain/yolo_tiny.ckpt')
    # The optional feed_dict argument allows the caller to override
    # the value of tensors in the graph.
    np_predict = sess.run(predicts, feed_dict={image: np_img})

    xmin, ymin, xmax, ymax, class_num = process_predicts(np_predict)
    class_name = classes_name[class_num]
    # 绘制预测框, 输出预测类型
    cv2.rectangle(resized_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)),
                  (0, 0, 255))
    cv2.putText(resized_img, class_name, (int(xmin), int(ymin)), 2, 1.5,
                (0, 0, 255))
    cv2.imwrite('cat_out.jpg', resized_img)
    sess.close()
Ejemplo n.º 4
0
            src_xmax = xmax * width_ratio
            src_ymax = ymax * height_ratio
            score = float("%.3f" %score)

            cv2.rectangle(src_img, (int(src_xmin), int(src_ymin)), (int(src_xmax), int(src_ymax)), (0, 0, 255))
            cv2.putText(src_img, object_name + str(score), (int(src_xmin), int(src_ymin)), 1, 2, (0, 0, 255))

    #cv2.imshow("result", src_img)
    cv2.imwrite("result.jpg", src_img)
  

if __name__ == '__main__':
    common_params = {'image_size': 448, 'num_classes': 20, 'batch_size': 1}
    net_params = {'cell_size': 7, 'boxes_per_cell': 2, 'weight_decay': 0.0005}

    net = YoloTinyNet(common_params, net_params, test=True)

    image = tf.placeholder(tf.float32, (1, 448, 448, 3))
    predicts = net.yoloTinyModel(image)
    
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True 

    sess = tf.Session(config=config)
    src_img = cv2.imread("./test2.jpg")
    #src_img = cv2.imread("./data/VOCdevkit2007/VOC2007/JPEGImages/000058.jpg")
    resized_img = cv2.resize(src_img, (448, 448))
    #height_ratio = src_img.shape[0]/448.0
    #width_ratio = src_img.shape[1]/448.0
Ejemplo n.º 5
0
    w = w * x_size
    h = h * y_size

    xmin = xcenter - w / 2.0
    ymin = ycenter - h / 2.0

    xmax = xmin + w
    ymax = ymin + h

    return xmin, ymin, xmax, ymax, class_num


common_params = {'image_size': x_size, 'num_classes': 20, 'batch_size': 1}
net_params = {'cell_size': 7, 'boxes_per_cell': 2, 'weight_decay': 0.0005}

net = YoloTinyNet(common_params, net_params, test=True)

image = tf.placeholder(tf.float32, (1, x_size, y_size, channel))
predicts = net.inference(image)
saver = tf.train.Saver(net.trainable_collection)

with tf.Session() as sess:
    saver.restore(sess, model_path)

    forderlist = os.listdir(directory)
    filecnt = 0
    for forder in forderlist:
        filelist = os.listdir(directory + '/' + forder)
        for filename in filelist:
            # PNG -> JPEG
            img = Image.open(directory + '/' + forder + '/' + filename)
Ejemplo n.º 6
0
  w = w * 448
  h = h * 448

  xmin = xcenter - w/2.0
  ymin = ycenter - h/2.0

  xmax = xmin + w
  ymax = ymin + h

  return xmin, ymin, xmax, ymax, class_num

common_params = {'image_size': 448, 'num_classes': 20, 
                'batch_size':1}
net_params = {'cell_size': 7, 'boxes_per_cell':2, 'weight_decay': 0.0005}

net = YoloTinyNet(common_params, net_params, test=True)

image = tf.placeholder(tf.float32, (1, 448, 448, 3))
predicts = net.inference(image)

sess = tf.Session()

np_img = cv2.imread('dining_table.jpg')
resized_img = cv2.resize(np_img, (448, 448))
np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)


np_img = np_img.astype(np.float32)

np_img = np_img / 255.0 * 2 - 1
np_img = np.reshape(np_img, (1, 448, 448, 3))
Ejemplo n.º 7
0
class MultiObjectDetection(VisualBasis):
    def __init__(self,
                 IP,
                 classes_name,
                 cameraId=vd.kTopCamera,
                 resolution=vd.kVGA):
        super(MultiObjectDetection, self).__init__(IP, cameraId, resolution)
        self._classes_name = classes_name
        self._num_classes = len(classes_name)

        self._common_params = {
            'image_size': 448,
            'num_classes': self._num_classes,
            'batch_size': 1
        }
        self._net_params = {
            'cell_size': 7,
            'boxes_per_cell': 2,
            'weight_decay': 0.0005
        }
        self._net = YoloTinyNet(self._common_params,
                                self._net_params,
                                test=True)

    def predict_object(self, image):
        predicts = self._net.inference(image)
        return predicts

    def process_predicts(self, resized_img, predicts, thresh=0.2):
        """
        process the predicts of object detection with one image input.
        
        Args:
            resized_img: resized source image.
            predicts: output of the model.
            thresh: thresh of bounding box confidence.
        Return:
            predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}.
        """
        cls_num = self._num_classes
        bbx_per_cell = self._net_params["boxes_per_cell"]
        cell_size = self._net_params["cell_size"]
        img_size = self._common_params["image_size"]
        p_classes = predicts[0, :, :, 0:cls_num]
        C = predicts[0, :, :, cls_num:cls_num +
                     bbx_per_cell]  # two bounding boxes in one cell.
        coordinate = predicts[0, :, :, cls_num +
                              bbx_per_cell:]  # all bounding boxes position.

        p_classes = np.reshape(p_classes, (cell_size, cell_size, 1, cls_num))
        C = np.reshape(C, (cell_size, cell_size, bbx_per_cell, 1))

        P = C * p_classes  # confidencefor all classes of all bounding boxes (cell_size, cell_size, bounding_box_num, class_num) = (7, 7, 2, 1).

        predicts_dict = {}
        for i in range(cell_size):
            for j in range(cell_size):
                temp_data = np.zeros_like(P, np.float32)
                temp_data[i, j, :, :] = P[i, j, :, :]
                position = np.argmax(
                    temp_data
                )  # refer to the class num (with maximum confidence) for every bounding box.
                index = np.unravel_index(position, P.shape)

                if P[index] > thresh:
                    class_num = index[-1]
                    coordinate = np.reshape(
                        coordinate, (cell_size, cell_size, bbx_per_cell, 4)
                    )  # (cell_size, cell_size, bbox_num_per_cell, coordinate)[xmin, ymin, xmax, ymax]
                    max_coordinate = coordinate[index[0], index[1],
                                                index[2], :]

                    xcenter = max_coordinate[0]
                    ycenter = max_coordinate[1]
                    w = max_coordinate[2]
                    h = max_coordinate[3]

                    xcenter = (index[1] + xcenter) * (1.0 * img_size /
                                                      cell_size)
                    ycenter = (index[0] + ycenter) * (1.0 * img_size /
                                                      cell_size)

                    w = w * img_size
                    h = h * img_size
                    xmin = 0 if (xcenter - w / 2.0 < 0) else (xcenter -
                                                              w / 2.0)
                    ymin = 0 if (xcenter - w / 2.0 < 0) else (ycenter -
                                                              h / 2.0)
                    xmax = resized_img.shape[0] if (
                        xmin + w) > resized_img.shape[0] else (xmin + w)
                    ymax = resized_img.shape[1] if (
                        ymin + h) > resized_img.shape[1] else (ymin + h)

                    class_name = self._classes_name[class_num]
                    predicts_dict.setdefault(class_name, [])
                    predicts_dict[class_name].append(
                        [int(xmin),
                         int(ymin),
                         int(xmax),
                         int(ymax), P[index]])

        return predicts_dict

    def non_max_suppress(self, predicts_dict, threshold=0.5):
        """
        implement non-maximum supression on predict bounding boxes.
        Args:
            predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}.
            threshhold: iou threshold
        Return:
            predicts_dict processed by non-maximum suppression
        """
        for object_name, bbox in predicts_dict.items():
            bbox_array = np.array(bbox, dtype=np.float)
            x1, y1, x2, y2, scores = bbox_array[:,
                                                0], bbox_array[:,
                                                               1], bbox_array[:,
                                                                              2], bbox_array[:,
                                                                                             3], bbox_array[:,
                                                                                                            4]
            areas = (x2 - x1 + 1) * (y2 - y1 + 1)
            order = scores.argsort()[::-1]
            keep = []
            while order.size > 0:
                i = order[0]
                keep.append(i)
                xx1 = np.maximum(x1[i], x1[order[1:]])
                yy1 = np.maximum(y1[i], y1[order[1:]])
                xx2 = np.minimum(x2[i], x2[order[1:]])
                yy2 = np.minimum(y2[i], y2[order[1:]])
                inter = np.maximum(0.0, xx2 - xx1 + 1) * np.maximum(
                    0.0, yy2 - yy1 + 1)
                iou = inter / (areas[i] + areas[order[1:]] - inter)
                indexs = np.where(iou <= threshold)[0]
                order = order[indexs + 1]
            bbox = bbox_array[keep]
            predicts_dict[object_name] = bbox.tolist()
            predicts_dict = predicts_dict
        return predicts_dict

    def plot_result(self, src_img, predicts_dict, save_name=None):
        """
        plot bounding boxes on source image.
        Args:
            src_img: source image
            predicts_dict: {"stick": [[x1, y1, x2, y2, scores1], [...]]}.
        """
        height_ratio = 1.0 * src_img.shape[0] / self._common_params[
            "image_size"]
        width_ratio = 1.0 * src_img.shape[1] / self._common_params["image_size"]
        for object_name, bbox in predicts_dict.items():
            for box in bbox:
                xmin, ymin, xmax, ymax, score = box
                src_xmin = xmin * width_ratio
                src_ymin = ymin * height_ratio
                src_xmax = xmax * width_ratio
                src_ymax = ymax * height_ratio
                score = float("%.3f" % score)

                cv2.rectangle(src_img, (int(src_xmin), int(src_ymin)),
                              (int(src_xmax), int(src_ymax)), (0, 0, 255))
                cv2.putText(src_img, object_name + str(score),
                            (int(src_xmin), int(src_ymin)), 1, 2, (0, 0, 255))

        cv2.imshow("result", src_img)
        if save_name is not None:
            cv2.imwrite(save_name, src_img)

    def object_track(self, predicts_dict, object_name="cup"):
        """track the specified object with maximum confidence.
        Args:
            object_name: object name.
        """
        if self._motionProxy.getStiffnesses("Head") < 1.0:
            self._motionProxy.setStiffnesses("Head", 1.0)

        if self._motionProxy.getStiffnesses("LArm") < 1.0:
            self._motionProxy.setStiffnesses("LArm", 1.0)
        img_size = self._common_params["image_size"]
        img_center_x = img_size / 2
        img_center_y = img_size / 2

        if predicts_dict.has_key(object_name):
            predict_coords = predicts_dict[object_name]
            predict_coords.sort(key=lambda coord: coord[-1], reverse=True)
            predict_coord = predict_coords[0]
            xmin, ymin, xmax, ymax, _ = predict_coord
            center_x = (xmin + xmax) / 2
            center_y = (ymin + ymax) / 2

            angle_yaw = (center_x -
                         img_center_x) / (img_size) * self._cameraYawRange
            angle_pitch = (center_y -
                           img_center_y) / (img_size) * self._cameraPitchRange
            self._motionProxy.angleInterpolation(
                ["HeadPitch", "HeadYaw"],
                [0.8 * angle_pitch, -0.8 * angle_yaw], 0.5, False)
            head_pitch, head_yaw = self._motionProxy.getAngles("Head", False)
            arm_angle = [
                head_yaw - 7 / 180 * np.pi, head_pitch, -1.15, -0.035, -1.54,
                0.01
            ]
            self._motionProxy.setAngles("LArm", arm_angle, 0.2)
            self._motionProxy.openHand("LHand")
Ejemplo n.º 8
0
    xmax = xmin + w
    ymax = ymin + h
    #输出左上角和右下角坐标
    return xmin, ymin, xmax, ymax, class_num, confidence


common_params = {'image_size': 448, 'num_classes': 20, 'batch_size': 1}

net_params = {
    'cell_size': 7,
    'boxes_per_cell': 2,
    'weight_decay': 0.0005
}  #网络结构参数

net = YoloTinyNet(common_params, net_params, test=True)  #定义网络 传入字典 获得网络类对象

image = tf.placeholder(tf.float32, (1, 448, 448, 3))  # 图片大小
predicts = net.inference(image)
# print(predicts.shape)
sess = tf.Session()

np_img = cv2.imread('dog.jpg')
resized_img = cv2.resize(np_img, (448, 448))
np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB)  #颜色通道转换

np_img = np_img.astype(np.float32)

np_img = np_img / 255.0 * 2 - 1  #预处理[-1,1]
np_img = np.reshape(np_img, (1, 448, 448, 3))
Ejemplo n.º 9
0
class ObjectDetection(VisualBasis):

    def __init__(self, IP, cameraId=vd.kTopCamera, resolution=vd.kVGA):
        super(ObjectDetection, self).__init__(IP, cameraId, resolution)
        self._boundRect = []
        self._cropKeep = 1
        self._stickAngle = None # rad
        #self._classes_name = ["stick"]
        self._common_params = {'image_size': 448, 'num_classes': 1, 
                'batch_size':1}
        self._net_params = {'cell_size': 7, 'boxes_per_cell':2, 'weight_decay': 0.0005}
        self._net = YoloTinyNet(self._common_params, self._net_params, test=True)
        #self._modelFile = "/home/meringue/Documents/python-nao-golf/yoloNet/models/train/model.ckpt-95000"
        #self._objectRect = [0, 0, 0, 0]
        self._objectName = None

    def predict_single_object(self, image):
        predicts = self._net.inference(image)
        return predicts


    def process_predicts(self, predicts):
        p_classes = predicts[0, :, :, 0:1]
        C = predicts[0, :, :, 1:3]
        coordinate = predicts[0, :, :, 3:]

        p_classes = np.reshape(p_classes, (7, 7, 1, 1))
        C = np.reshape(C, (7, 7, 2, 1))

        P = C * p_classes

        index = np.argmax(P)
        print("confidence = ", np.max(P))
        index = np.unravel_index(index, P.shape)

        class_num = index[3]
        coordinate = np.reshape(coordinate, (7, 7, 2, 4))
        max_coordinate = coordinate[index[0], index[1], index[2], :]
        xcenter = max_coordinate[0]
        ycenter = max_coordinate[1]
        w = max_coordinate[2]
        h = max_coordinate[3]

        xcenter = (index[1] + xcenter) * (448/7.0)
        ycenter = (index[0] + ycenter) * (448/7.0)

        w = w * 448
        h = h * 448

        xmin = xcenter - w/2.0
        ymin = ycenter - h/2.0

        xmax = xmin + w
        ymax = ymin + h

        return [xmin, ymin, xmax, ymax], class_num

    def showDetectResult(self, frame, rect, object_name):
        object_min_xy = (int(rect[0]), int(rect[1]))
        object_max_xy = (int(rect[2]), int(rect[3]))
        cv2.rectangle(frame, object_min_xy, object_max_xy, (0, 0, 255))
        cv2.putText(frame, object_name, object_min_xy, 2, 2, (0, 0, 255))
        cv2.imshow("detect result", frame)
        #cv2.waitKey(10)