Beispiel #1
0
    def getitem_yolo(self):
        for i in range(self.num_batches):
            img = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                inp_dim = int(opt.inp_dim)
                im_name_k = self.imglist[k].rstrip('\n').rstrip('\r')
                im_name_k = os.path.join(self.img_dir, im_name_k)
                img_k, orig_img_k, im_dim_list_k = prep_image(
                    im_name_k, inp_dim)
                # For data preprocessing
                img_k = self.transform(Image.open(im_name_k)).unsqueeze(0)

                img.append(img_k)
                orig_img.append(orig_img_k)
                im_name.append(im_name_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():

                img = torch.cat(img)
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list_ = im_dim_list

            while self.Q.full():
                time.sleep(2)

            self.Q.put((img, orig_img, im_name, im_dim_list))
Beispiel #2
0
    def getitem_mtcnn(self):
        """Same as getitem_yolo()"""
        for i in range(self.num_batches):
            img = []
            orig_img = []
            im_name = []
            im_dim_list = []
            for k in range(i * self.batchSize,
                           min((i + 1) * self.batchSize, self.datalen)):
                inp_dim = int(opt.inp_dim)
                im_name_k = self.imglist[k].rstrip('\n').rstrip('\r')
                im_name_k = os.path.join(self.img_dir, im_name_k)

                try:
                    img_k, orig_img_k, im_dim_list_k = prep_image(
                        im_name_k, inp_dim)
                except BaseException as e:
                    print(im_name_k, e)
                    continue

                img.append(img_k)
                orig_img.append(orig_img_k)
                im_name.append(im_name_k)
                im_dim_list.append(im_dim_list_k)

            with torch.no_grad():
                # Human Detection
                img = torch.cat(img)
                im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                im_dim_list_ = im_dim_list

            while self.Q.full():
                time.sleep(2)
            self.Q.put((img, orig_img, im_name, im_dim_list))
Beispiel #3
0
def detect_person(frame, model, inp_dim, confidence, num_classes, nms_thesh):
    img, orig_im, dim = prep_image(frame, inp_dim)
    im_dim = torch.FloatTensor(dim).repeat(1, 2)
    im_dim = im_dim.cuda()
    img = img.cuda()
    with torch.no_grad():
        output = model(Variable(img), True)
    output = write_results(output,
                           confidence,
                           num_classes,
                           nms=True,
                           nms_conf=nms_thesh)
    if type(output) == int:
        curr = []
        return curr
    im_dim = im_dim.repeat(output.size(0), 1)
    scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1)
    output[:,
           [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2
    output[:,
           [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2
    output[:, 1:5] /= scaling_factor
    for i in range(output.shape[0]):
        output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0])
        output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1])
    curr = list(map(lambda x: write(x), output))
    return curr
Beispiel #4
0
    def getitem_yolo(self, index):
        inp_dim = int(opt.inp_dim)
        im_name = self.imglist[index].rstrip('\n').rstrip('\r')
        im_name = os.path.join(self.img_dir, im_name)
        im, orig_img, im_dim = prep_image(im_name, inp_dim)
        #im_dim = torch.FloatTensor([im_dim]).repeat(1, 2)

        inp = load_image(im_name)
        return im, inp, orig_img, im_name, im_dim
Beispiel #5
0
    def detect_one_img(self, img_name):
        """
        Detect bboxs in one image
        Input: 'str', full path of image
        Output: '[{"category_id":1,"score":float,"bbox":[x,y,w,h],"image_id":str},...]',
        The output results are similar with coco results type, except that image_id uses full path str
        instead of coco %012d id for generalization. 
        """
        args = self.detector_opt
        _CUDA = True
        if args:
            if args.gpus[0] < 0:
                _CUDA = False
        if not self.model:
            self.load_model()
        if isinstance(self.model, torch.nn.DataParallel):
            self.model = self.model.module
        dets_results = []
        # pre-process(scale, normalize, ...) the image
        img, orig_img, img_dim_list = prep_image(img_name, self.inp_dim)
        with torch.no_grad():
            img_dim_list = torch.FloatTensor([img_dim_list]).repeat(1, 2)
            img = img.to(args.device) if args else img.cuda()
            prediction = self.model(img, args=args)
            # do nms to the detection results, only human category is left
            dets = self.dynamic_write_results(prediction, self.confidence,
                                              self.num_classes, nms=True,
                                              nms_conf=self.nms_thres)
            if isinstance(dets, int) or dets.shape[0] == 0:
                return None
            dets = dets.cpu()

            img_dim_list = torch.index_select(img_dim_list, 0, dets[:, 0].long())
            scaling_factor = torch.min(self.inp_dim / img_dim_list, 1)[0].view(-1, 1)
            dets[:, [1, 3]] -= (self.inp_dim - scaling_factor * img_dim_list[:, 0].view(-1, 1)) / 2
            dets[:, [2, 4]] -= (self.inp_dim - scaling_factor * img_dim_list[:, 1].view(-1, 1)) / 2
            dets[:, 1:5] /= scaling_factor
            for i in range(dets.shape[0]):
                dets[i, [1, 3]] = torch.clamp(dets[i, [1, 3]], 0.0, img_dim_list[i, 0])
                dets[i, [2, 4]] = torch.clamp(dets[i, [2, 4]], 0.0, img_dim_list[i, 1])

                # write results
                det_dict = {}
                x = float(dets[i, 1])
                y = float(dets[i, 2])
                w = float(dets[i, 3] - dets[i, 1])
                h = float(dets[i, 4] - dets[i, 2])
                det_dict["category_id"] = 1
                det_dict["score"] = float(dets[i, 5])
                det_dict["bbox"] = [x, y, w, h]
                det_dict["image_id"] = int(os.path.basename(img_name).split('.')[0])
                dets_results.append(det_dict)

            return dets_results
Beispiel #6
0
    def getitem_yolo(self, index):
        inp_dim = int(opt.inp_dim)
        im_name = self.imglist[index].rstrip('\n').rstrip('\r')
        im_name = os.path.join(self.img_dir, im_name)

        # For data preprocessing

        im, orig_img, im_dim = prep_image(im_name, inp_dim)
        im_dim = torch.FloatTensor([im_dim]).repeat(1, 2)
        inp = self.transform(Image.open(im_name))

        # inp = load_image(im_name)
        return im, inp, orig_img, im_name, im_dim
Beispiel #7
0
    def image_preprocess(self, img_source):
        """
        Pre-process the img before fed to the object detection network
        Input: image name(str) or raw image data(ndarray or torch.Tensor,channel GBR)
        Output: pre-processed image data(torch.FloatTensor,(1,3,h,w))
        """
        if isinstance(img_source, str):
            img, orig_img, im_dim_list = prep_image(img_source, self.inp_dim)
        elif isinstance(img_source, torch.Tensor) or isinstance(img_source, np.ndarray):
            img, orig_img, im_dim_list = prep_frame(img_source, self.inp_dim)
        else:
            raise IOError('Unknown image source type: {}'.format(type(img_source)))

        return img
Beispiel #8
0
    def cnv_img(Frame):
        img = []
        orig_img = []
        im_dim_list = []
#            for k in range(i*self.batchSize, min((i +  1)*self.batchSize, self.datalen)):
        inp_dim = int(opt.inp_dim)

        img_k, orig_img_k, im_dim_list_k = prep_image(Frame, inp_dim)
    
        img.append(img_k)
        orig_img.append(orig_img_k)
        im_dim_list.append(im_dim_list_k)
        with torch.no_grad():
            # Human Detection
            img = torch.cat(img)
            im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
        return img, orig_img, im_dim_list
Beispiel #9
0
def main():
    global confidence, nms_thesh, num_classes, classes

    # Load configuration
    model = erfnet.Net(11)
    model = torch.nn.DataParallel(model.cuda(), [0])
    model.eval()

    detect_model = detect_net.DetectionNetwork()

    # Inference
    # Data loading
    matches = [
        '/home/vertensj/Documents/annotated_real_data/processed/images/track3_withCP/image_paths.txt'
    ]
    list_dataset_paths = []  # Stores pairs of consecutive image paths

    for f in matches:
        with open(f) as f:
            content = f.readlines()
        content = [x.strip() for x in content]

        for line in content:
            frames = line.split(" ")  # Current first, then previous
            list_dataset_paths.append(frames[0])  # Image, Seg, Dt

    for path in list_dataset_paths:

        # Data preparation #############################################################################################
        image_prepared, org_img, dim = prep_image(path, detect_model.inp_dim)
        image_prepared = image_prepared.cuda()
        im_dim_list = [dim]
        im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2).cuda()

        # Detection Inference ##########################################################################################
        detect_output = detect_model.detect(image_prepared, im_dim_list)
        if detect_output is not None:
            detect_image = detect_model.visualize_outputs(
                detect_output, org_img)

            cv2.imshow('detect_image', detect_image)
            cv2.waitKey()
Beispiel #10
0
    def get_pose(self, img_names):
        if len(img_names) > 1:
            start_lc = 4000
            start_rc = 4000
            now_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())
            print('========START-Ten========')
            final_result = []
            vis_images = []
            height_difference = []
            for img_index in range(len(img_names)):
                print('--------------------')
                img_name = img_names[img_index]
                try:
                    img, orig_img, im_name, im_dim_list = [], [], [], []
                    inp_dim = int(self.args.inp_dim)
                    im_name_k = img_name
                    img_k, orig_img_k, im_dim_list_k = prep_image(
                        im_name_k, inp_dim)
                    img.append(img_k)
                    orig_img.append(orig_img_k)
                    im_name.append(im_name_k)
                    im_dim_list.append(im_dim_list_k)
                except:
                    print('index-{}: image have problem'.format(img_index))
                    final_result.append((None, None))
                    continue
                with torch.no_grad():
                    img = torch.cat(img)
                    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)

                    img = img.cuda()
                    prediction = self.det_model(img, CUDA=True)
                    dets = dynamic_write_results(prediction,
                                                 self.args.confidence,
                                                 self.args.num_classes,
                                                 nms=True,
                                                 nms_conf=self.args.nms_thesh)
                    if isinstance(dets, int) or dets.shape[0] == 0:
                        print('index-{}: No person detected'.format(img_index))
                        final_result.append((None, None))
                        height_difference.append(None)
                        continue
                    dets = dets.cpu()
                    im_dim_list = torch.index_select(im_dim_list, 0,
                                                     dets[:, 0].long())
                    scaling_factor = torch.min(self.det_inp_dim / im_dim_list,
                                               1)[0].view(-1, 1)
                    dets[:, [1, 3]] -= (self.det_inp_dim - scaling_factor *
                                        im_dim_list[:, 0].view(-1, 1)) / 2
                    dets[:, [2, 4]] -= (self.det_inp_dim - scaling_factor *
                                        im_dim_list[:, 1].view(-1, 1)) / 2
                    dets[:, 1:5] /= scaling_factor
                    for j in range(dets.shape[0]):
                        dets[j, [1, 3]] = torch.clamp(dets[j, [1, 3]], 0.0,
                                                      im_dim_list[j, 0])
                        dets[j, [2, 4]] = torch.clamp(dets[j, [2, 4]], 0.0,
                                                      im_dim_list[j, 1])
                    boxes = dets[:, 1:5]
                    scores = dets[:, 5:6]
                    k = 0
                    boxes_k = boxes[dets[:, 0] == k]
                    inps = torch.zeros(boxes_k.size(0), 3, self.args.inputResH,
                                       self.args.inputResW)
                    pt1 = torch.zeros(boxes_k.size(0), 2)
                    pt2 = torch.zeros(boxes_k.size(0), 2)

                    orig_img, im_name, boxes, scores, inps, pt1, pt2 = orig_img[
                        k], im_name[k], boxes_k, scores[dets[:, 0] ==
                                                        k], inps, pt1, pt2
                    inp = im_to_torch(cv2.cvtColor(orig_img,
                                                   cv2.COLOR_BGR2RGB))
                    inps, pt1, pt2 = crop_from_dets(inp, boxes, inps, pt1, pt2)

                    batchSize = self.args.posebatch
                    datalen = inps.size(0)
                    leftover = 0
                    if (datalen) % batchSize:
                        leftover = 1
                    num_batches = datalen // batchSize + leftover

                    hm = []
                    for j in range(num_batches):
                        inps_j = inps[j * batchSize:min(
                            (j + 1) * batchSize, datalen)].cuda()
                        hm_j = self.pose_model(inps_j)
                        hm.append(hm_j)
                    hm = torch.cat(hm)
                    hm_data = hm.cpu()
                    orig_img = np.array(orig_img, dtype=np.uint8)
                    im_name = im_name.split('/')[-1]
                    preds_hm, preds_img, preds_scores = getPrediction(
                        hm_data, pt1, pt2, self.args.inputResH,
                        self.args.inputResW, self.args.outputResH,
                        self.args.outputResW)
                    result = pose_nms(boxes, scores, preds_img, preds_scores)
                    result = {'imgname': im_name, 'result': result}
                    img = vis_frame(orig_img, result)
                    vis_images.append(img)
                    outpur_dir = os.path.join(self.args.outputpath, 'vis')
                    outpur_dir_raw = os.path.join(self.args.outputpath, 'raw')
                    if not os.path.exists(outpur_dir):
                        os.makedirs(outpur_dir)
                    if not os.path.exists(outpur_dir_raw):
                        os.makedirs(outpur_dir_raw)
                width = img.shape[1]
                keypoints = [res['keypoints'][0] for res in result['result']]
                distance = [xy[0] - width / 2 for xy in keypoints]
                distance = torch.tensor([torch.abs(m) for m in distance])
                indice = torch.argsort(distance)[0]
                pose_result = result['result'][indice]['keypoints']
                # left_arm = pose_result[[6, 8, 10]].numpy()
                # right_arm = pose_result[[5, 7, 9]].numpy()
                # ['Nose', 'LEye', 'REye', 'LEar', 'REar', 'LShoulder', 'RShoulder', 'LElbow', 'RElbow', 'LWrist', 'RWrist', 'LHip',
                # 'RHip', 'LKnee', 'RKnee', 'LAnkle', 'RAnkle']
                left_arm = pose_result[[10]].numpy().astype(int)
                right_arm = pose_result[[9]].numpy().astype(int)
                left_arm_c_y = np.mean(left_arm, axis=0)[1]
                right_arm_c_y = np.mean(right_arm, axis=0)[1]
                # left_arm_c = tuple(np.mean(left_arm, axis=0).astype(int))
                # right_arm_c = tuple(np.mean(right_arm, axis=0).astype(int))
                left_arm_c = tuple(left_arm[0])
                right_arm_c = tuple(right_arm[0])
                hd = np.abs(left_arm_c_y - right_arm_c_y)
                height_difference.append(hd)

                cv2.circle(img, left_arm_c, 10, (0, 255, 0), -1, 8)
                cv2.circle(img, right_arm_c, 10, (0, 255, 0), -1, 8)
                log__vis_name = now_time + '-' + im_name
                cv2.imwrite(os.path.join(outpur_dir_raw, log__vis_name),
                            orig_img)
                cv2.imwrite(os.path.join(outpur_dir, log__vis_name), img)
                if start_lc == 4000 and start_rc == 4000:
                    start_lc = left_arm_c_y
                    start_rc = right_arm_c_y
                    left_move = 0
                    right_move = 0
                else:
                    left_move = left_arm_c_y - start_lc
                    right_move = right_arm_c_y - start_rc
                print('index-{}--{}: left_c {:0f},right_c {:0f}'.format(
                    img_index, im_name, left_arm_c_y, right_arm_c_y))
                print('index-{}--{}: start_lc {:0f},start_rc {:0f}'.format(
                    img_index, im_name, start_lc, start_rc))
                print('index-{}--{}: left_move {:0f},right_move {:0f}'.format(
                    img_index, im_name, left_move, right_move))
                print('index-{}--{}: height_difference {:0f}'.format(
                    img_index, im_name, hd))
                final_result.append((left_move, right_move))
            return final_result, vis_images, now_time, height_difference

        elif len(img_names) == 1:
            now_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())
            print('========START-One========')
            final_result = []
            vis_images = []
            height_difference = []
            for img_index in range(len(img_names)):
                img_name = img_names[img_index]
                try:
                    img, orig_img, im_name, im_dim_list = [], [], [], []
                    inp_dim = int(self.args.inp_dim)
                    im_name_k = img_name
                    img_k, orig_img_k, im_dim_list_k = prep_image(
                        im_name_k, inp_dim)
                    img.append(img_k)
                    orig_img.append(orig_img_k)
                    im_name.append(im_name_k)
                    im_dim_list.append(im_dim_list_k)
                except:
                    print('index-{}: image have problem'.format(img_index))
                    final_result.append((None, None))
                with torch.no_grad():
                    img = torch.cat(img)
                    vis_img = img.numpy()[0]
                    vis_img = np.transpose(vis_img, (1, 2, 0))
                    vis_img = vis_img[:, :, ::-1]
                    vis_images.append(vis_img)
                    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2)
                    img = img.cuda()
                    prediction = self.det_model(img, CUDA=True)
                    dets = dynamic_write_results(prediction,
                                                 self.args.confidence,
                                                 self.args.num_classes,
                                                 nms=True,
                                                 nms_conf=self.args.nms_thesh)
                    if isinstance(dets, int) or dets.shape[0] == 0:
                        print('index-{}: No person detected'.format(img_index))
                        final_result.append((None, None))
                    else:
                        print('index-{}: Person detected'.format(img_index))
                        final_result.append((4, 4))
            return final_result, vis_images, now_time, height_difference
Beispiel #11
0
def main():
    global args
    args = parser.parse_args()

    # Yolo
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0
    CUDA = torch.cuda.is_available()

    num_classes = 80
    bbox_attrs = 5 + num_classes

    model = Darknet(args.config_file)
    model.load_weights(args.weights_file)

    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])

    assert inp_dim % 32 == 0
    assert inp_dim > 32

    if CUDA:
        model.cuda()

    model.eval()

    # Connect
    client = paho.Client()
    host, port = args.broker_url.split(':')
    client.connect(host, int(port))

    # subscribe a system messages
    client.message_callback_add("$SYS/#", system_message)
    client.subscribe("$SYS/#")

    # Open rtsp stream
    cap = cv2.VideoCapture(args.input_url)

    assert cap.isOpened(), 'Cannot capture source {}'.format(args.input_url)

    # Inspect input stream
    input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    input_fps = cap.get(cv2.CAP_PROP_FPS)
    print("[input stream] width: {}, height: {}, fps: {}".format(
        input_width, input_height, input_fps))

    # Open output stream
    output_command = stream_factory(args.output_url, input_width, input_height,
                                    input_fps)
    print(output_command)
    output_stream = sp.Popen(output_command, stdin=sp.PIPE, stderr=sp.PIPE)

    frames = 0
    start = time.time()

    while cap.isOpened():
        ret, frame = cap.read()  # frame size: 640x360x3(=691200)
        if ret:
            # Our detect operations on the frame come here

            img, orig_im, dim = prep_image(frame, inp_dim)

            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            output = model(Variable(img), CUDA)
            output = write_results(output,
                                   confidence,
                                   num_classes,
                                   nms=True,
                                   nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format(
                    frames / (time.time() - start)))
                cv2.imshow("frame", orig_im)
                key = cv2.waitKey(1)
                if key & 0xFF == ord('q'):
                    break
                continue

            output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0,
                                         float(inp_dim)) / inp_dim

            output[:, [1, 3]] *= frame.shape[1]
            output[:, [2, 4]] *= frame.shape[0]

            classes = load_classes('yolo/data/coco.names')
            colors = pkl.load(open("yolo/pallete", "rb"))

            # Overlay on screen
            list(map(lambda x: write(x, orig_im, classes, colors), output))
            # Send a BBoxes

            # Display the resulting frame
            cv2.imshow("frame", orig_im)
            frames += 1
            print("FPS of the video is {:5.2f}, size: {}".format(
                frames / (time.time() - start), orig_im.size))

            # Write rtmp stream
            output_stream.stdin.write(frame.tostring())
        else:
            break
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Close
    cap.release()
    cv2.destroyAllWindows()
    client.disconnect()
Beispiel #12
0
    if CUDA:
        model.cuda()

    model.eval()

    cap = cv2.VideoCapture(args.video)

    assert cap.isOpened(), 'Cannot capture source'

    frames = 0
    start = time.time()
    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            img, orig_im, dim = prep_image(frame, inp_dim)
            im_dim = torch.FloatTensor(dim).repeat(1, 2)
            if CUDA:
                im_dim = im_dim.cuda()
                img = img.cuda()

            output = model(img)
            output = sift_results(output,
                                  confidence,
                                  num_classes,
                                  nms=True,
                                  nms_conf=nms_thesh)

            if type(output) == int:
                frames += 1
                print("FPS of the video is {:5.2f}".format(