Example #1
0
def test_TRAIN_FROM_WEIGHTS_CLI__LOAD_CHECKPOINT_RETURNPREDICT_YOLOv2():
    #Test training using pre-generated weights for tiny-yolo-voc
    #NOTE: This test verifies that the code executes properly, and that the expected checkpoint file (tiny-yolo-voc-20.meta in this case) is generated.
    #      In addition, predictions are generated using the checkpoint file to verify that training completed successfully.

    testString = "flow --model {0} --load {1} --train --dataset {2} --annotation {3} --epoch 20".format(tiny_yolo_voc_CfgPath, tiny_yolo_voc_WeightPath, os.path.join(buildPath, "test", "training", "images"), os.path.join(buildPath, "test", "training", "annotations"))
    with pytest.raises(SystemExit):
        executeCLI(testString)

    checkpointPath = os.path.join(buildPath, "ckpt", "tiny-yolo-voc-20.meta")
    assert os.path.exists(checkpointPath), "Expected output checkpoint file: {0} was not found.".format(checkpointPath)

    #Using trained weights
    options = {"model": tiny_yolo_voc_CfgPath, "load": 20, "config": generalConfigPath, "threshold": 0.1}
    tfnet = TFNet(options)

    #Make sure predictions very roughly match the expected values for image with bike and person
    imgcv = cv2.imread(trainImgBikePerson["path"])
    loadedPredictions = tfnet.return_predict(imgcv)
    assert compareObjectData(trainImgBikePerson["expected-objects"]["tiny-yolo-voc"], loadedPredictions, trainImgBikePerson["width"], trainImgBikePerson["height"], 0.7, 0.25), "Generated object predictions from training (for image with person on the bike) were not anywhere close to what they are expected to be.\nTraining may not have completed successfully."
    differentThanExpectedBike = compareObjectData(trainImgBikePerson["expected-objects"]["tiny-yolo-voc"], loadedPredictions, trainImgBikePerson["width"], trainImgBikePerson["height"], 0.01, 0.001)

    #Make sure predictions very roughly match the expected values for image with horse and person
    imgcv = cv2.imread(trainImgHorsePerson["path"])
    loadedPredictions = tfnet.return_predict(imgcv)
    assert compareObjectData(trainImgHorsePerson["expected-objects"]["tiny-yolo-voc"], loadedPredictions, trainImgHorsePerson["width"], trainImgHorsePerson["height"], 0.7, 0.25), "Generated object predictions from training (for image with person on the horse) were not anywhere close to what they are expected to be.\nTraining may not have completed successfully."
    differentThanExpectedHorse = compareObjectData(trainImgHorsePerson["expected-objects"]["tiny-yolo-voc"], loadedPredictions, trainImgHorsePerson["width"], trainImgHorsePerson["height"], 0.01, 0.001)

    assert not (differentThanExpectedBike and differentThanExpectedHorse), "The generated object predictions for both images appear to be exactly the same as the ones generated with the original weights.\nTraining may not have completed successfully.\n\nNOTE: It is possible this is a fluke error and training did complete properly (try running this build again to confirm) - but most likely something is wrong."
Example #2
0
def test_RETURNPREDICT_PBLOAD_YOLOv2():
    #Test the .pb and .meta files generated in the previous step
    #NOTE: This test verifies that the code executes properly, and the .pb and .meta files that were created are able to be loaded and used for inference.
    #      The predictions that are generated will be compared against expected predictions.

    options = {"pbLoad": pbPath, "metaLoad": metaPath, "threshold": 0.4}
    tfnet = TFNet(options)
    imgcv = cv2.imread(testImg["path"])
    loadedPredictions = tfnet.return_predict(imgcv)

    assert compareObjectData(testImg["expected-objects"]["yolo"], loadedPredictions, testImg["width"], testImg["height"], threshCompareThreshold, posCompareThreshold), "Generated object predictions from return_predict() were not within margin of error compared to expected values."
Example #3
0
    def run(self):
        option = {
            #	'model': 'cfg/tiny-yolo-voc-1c.cfg',
            #	'load': 2375,
            'pbLoad': 'built_graph/tiny-yolo-voc-1c.pb',
            'metaLoad': 'built_graph/tiny-yolo-voc-1c.meta',
            'threshold': 0.5,
            'gpu': 0.7
        }

        tfnet = TFNet(option)
        avg = 0
        divider = 700  #horizontal line after which we count the bolls
        checkedbolls = 0  #initialized check boll index
        refinedunique_bolls = []
        colors = [tuple(np.random.randint(255, size=3)) for i in range(10000)
                  ]  #make multiple colors to display bolls and tracklets
        #out = cv.VideoWriter('video_'+self.videoname+'.mp4',fourcc, 12.0, (1280,720))
        bollcnt = 0
        while True:
            stime = time.time()
            _ret, frame = self.cam.read()
            if _ret == False:
                print("Bad frame, video ended")
                # out.release()
                exit(1)
            frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
            vis = frame.copy()

            results = tfnet.return_predict(frame)
            cur_match = []
            print("Image # " + str(self.frame_idx))
            # print(len(results))
            ##    refinedBoxes = non_max_suppression_slow(results, 0.4) ## overlap
            #  refinedBoxes = self.non_max_suppression(results, 0.55)
            #print(len(refinedBoxes))
            check_mask = 0
            for i, (color, result) in enumerate(zip(colors, results)):
                #  if i in refinedBoxes:

                cur_match.append(
                    (result['topleft']['x'], result['topleft']['y'],
                     result['bottomright']['x'], result['bottomright']['y']))

            ##color segmentation to get undetected bolls
            #----------Bgr----------
            lower = np.array([190, 190, 150], dtype="uint16")
            upper = np.array([255, 255, 255], dtype="uint16")
            mask = cv.inRange(frame, lower, upper)

            kernel = np.ones((4, 4), np.uint8)
            mask = cv.erode(mask, kernel, iterations=2)
            mask = cv.dilate(mask, kernel, iterations=5)

            #check_mask = 1
            output = cv.bitwise_and(frame, frame, mask=mask)

            fg_mask = cv.cvtColor(output, cv.COLOR_BGR2GRAY)
            #if check_mask == 0:
            # continue
            # for (i, match) in enumerate(cur_match):
            #  xi, yi, xj, yj = match
            #   fg_mask_ = cv.rectangle(fg_mask, (xi, yi), (xj+1, yj+1), (0,0,0),-1,8)

            if imutils.is_cv2():

                (contours, hier) = cv.findContours(fg_mask, cv.RETR_LIST,
                                                   cv.CHAIN_APPROX_SIMPLE)

    # check to see if we are using OpenCV 3
            elif imutils.is_cv3():
                (_, contours, hier) = cv.findContours(fg_mask, cv.RETR_LIST,
                                                      cv.CHAIN_APPROX_SIMPLE)

            matches_ = []
            for cnt in contours:
                if (80 < cv.contourArea(cnt)):
                    cv.drawContours(fg_mask, [cnt], 0, 255, -1)
                    (x, y, w, h) = cv.boundingRect(cnt)
                    centroid = self.get_centroid(x, y, w, h)
                    matches_.append(((x, y, x + w - 1, y + h - 1), centroid,
                                     cv.contourArea(cnt)))
            ##check the false positives of the YOLOv2

            refinedMatches_ = self.non_max_suppression_no_dict(matches_, 0.40)

            for (i, match) in enumerate(matches_):
                contour, centroid, contour_area = match
                x, y, xw, yh = contour
                w = xw - x
                h = yh - y
                cx = x + w / 2
                cy = y + h / 2
                yolocovered = False
                res = False
                for j, (result) in enumerate(zip(results)):
                    xi = result[0]['topleft']['x']
                    yi = result[0]['topleft']['y']
                    xj = result[0]['bottomright']['x']
                    yj = result[0]['bottomright']['y']
                    res = True
                    #    if j in refinedBoxes:
                    if (xi <= cx and yi <= cy and xj >= cx and yj >= cy):
                        yolocovered = True
                        break
                if not (yolocovered) and (res):
                    if i in refinedMatches_:
                        # frame = cv.rectangle(frame, (x, y), (xw-1, yh-1), (255,0,0), 3)
                        cur_match.append((x, y, xw - 1, yh - 1))
            vis = frame.copy()
            if len(self.tracks) > 0:
                img0, img1 = self.prev_gray, frame_gray
                p0 = np.float32([tr[-1]
                                 for tr in self.tracks]).reshape(-1, 1, 2)
                p1, _st, _err = cv.calcOpticalFlowPyrLK(
                    img0, img1, p0, None, **lk_params)
                p0r, _st, _err = cv.calcOpticalFlowPyrLK(
                    img1, img0, p1, None, **lk_params)
                d = abs(p0 - p0r).reshape(-1, 2).max(-1)
                good = d < 1
                new_tracks = []
                new_bolls = []
                new_counts = []
                pts_src = p0.reshape(-1, 2)
                pts_dst = p1.reshape(-1, 2)
                H, status = cv.findHomography(pts_src, pts_dst)
                present_bolls = []
                for boll_bbox in zip(cur_match):
                    for k, (tr, bo, cnt, (x, y), good_flag) in enumerate(
                            zip(self.tracks, self.bolls, self.count_bolls,
                                p1.reshape(-1, 2), good)):
                        if not good_flag:
                            continue
                        xi, yi, xj, yj = boll_bbox[0]
                        area = (xj - xi + 1) * (yj - yi + 1)
                        cx, cy = self.get_bbox_centre(xi, yi, xj, yj)
                        xi = cx - 50
                        yi = cy - 50
                        xj = cx + 50
                        yj = cy + 50
                        if (xi <= x and yi <= y and xj >= x and yj >= y):

                            xii, yii, xjj, yjj, area2 = bo[
                                -1]  # get previous bolls
                            x0, y0 = tr[-1]  #get previos tracklet head

                            if k not in present_bolls:
                                tr.append((x, y))
                                bo.append((xi, yi, xj, yj, area))
                                cnt.append((cnt[-1]))
                                if len(tr) > self.track_len:
                                    del tr[0]
                                    del bo[0]
                                    del cnt[0]

                                new_tracks.append(tr)
                                new_bolls.append(bo)
                                present_bolls.append(k)
                                new_counts.append(cnt)
                                cv.circle(vis, (x, y), 2, (0, 255, 0), -1)
                                break

                #restore the lost tracklets that are greater than 200 and less than 700 in vertical pixel position
                new_tracklets = []
                update_tracklets = []
                tracklets = []
                for k, (tr, bo, cnt) in enumerate(
                        zip(self.tracks, self.bolls, self.count_bolls)):
                    if len(tr) > 2:
                        xii, yii, xjj, yjj, area2 = bo[
                            -1]  # get previous boll bbox position
                        x0, y0 = tr[-1]  #get previos tracklet head position
                        for ki, (tri, boo, cntt) in enumerate(
                                zip(self.tracks, self.bolls,
                                    self.count_bolls)):
                            # if ki not in present_bolls:
                            xiii, yiii, xjjj, yjjj, area3 = boo[
                                -1]  # get previous boll bbox position
                            if len(tri) > 0:
                                x0i, y0i = tri[
                                    -1]  #get previos tracklet head position
                                #get newer points only

                                if (len(boo) <= 2 and xii <= x0i and yii <= y0i
                                        and xjj >= x0i and yjj >= y0i):
                                    tracklets.append((x0i, y0i))
                                    update_tracklets.append(k)
                                    new_tracklets.append(ki)
                                    break

                for k, (tr, bo, cnt) in enumerate(
                        zip(self.tracks, self.bolls, self.count_bolls)):
                    # if k in update_tracklets:
                    #  x0i, y0i = tracklets[update_tracklets.index(k)]

                    # self.tracks[k][-1] = (x0i, y0i)

                    if len(tr) > 0 and k not in present_bolls:
                        xii, yii, xjj, yjj, area2 = bo[
                            -1]  # get previous boll bbox position
                        x0, y0 = tr[-1]  #get previos tracklet head position

                        if (len(tr) > 7 and y0 >= 360 and y0 <= 700
                                and len(present_bolls) >
                                0):  ##check if the tracklet is old NOT new

                            dst_pts_1 = (x0, y0, 1)
                            dst_pts_2 = (xii, yii, 1)
                            dst_pts_3 = (xjj, yjj, 1)
                            src_pts_1 = np.matmul(H, dst_pts_1)
                            src_pts_2 = np.matmul(H, dst_pts_2)
                            src_pts_3 = np.matmul(H, dst_pts_3)
                            x = np.int32(src_pts_1[0] / src_pts_1[2])
                            y = np.int32(src_pts_1[1] / src_pts_1[2])
                            xi = np.int32(src_pts_2[0] / src_pts_2[2])
                            yi = np.int32(src_pts_2[1] / src_pts_2[2])
                            xj = np.int32(src_pts_3[0] / src_pts_3[2])
                            yj = np.int32(src_pts_3[1] / src_pts_3[2])

                            tr.append((x, y))
                            bo.append((xi, yi, xj, yj, area))
                            cnt.append((cnt[-1]))
                            if (len(tr) > self.track_len):

                                del tr[0]
                                del bo[0]
                                del cnt[0]

                            new_tracks.append(tr)
                            new_bolls.append(bo)
                            new_counts.append(cnt)
                            cv.circle(vis, (x, y), 2, (0, 255, 0), -1)

                    if (k in new_tracklets):

                        del tr[-1]
                        del bo[-1]
                        del cnt[-1]

                self.tracks = new_tracks
                self.bolls = new_bolls
                self.count_bolls = new_counts

                for j, (tr, bo, cnt) in enumerate(
                        zip(self.tracks, self.bolls, self.count_bolls)):
                    k = cnt[-1]
                    b, g, r = colors[k]
                    b = int(b)
                    g = int(g)
                    r = int(r)
                    if (len(tr) > 1):
                        cv.polylines(vis, [np.int32(tr)], False, (b, g, r), 2)

                        xc1, yc1 = np.int32(tr[-1])
                        xc2, yc2 = np.int32(tr[-2])
                        if (yc2 <= divider and yc1 > divider and len(tr) > 7):
                            self.boll_count = self.boll_count + 1

                        xi, yi, xj, yj, area = np.int32(bo[-1])

                        cx, cy = self.get_bbox_centre(xi, yi, xj, yj)
                        #  text = '{}'.format(cnt[-1])
                        vis = cv.rectangle(vis, (xi, yi), (xj, yj), (b, g, r),
                                           2)
                    # vis = cv.putText(vis, text, (cx, cy), cv.FONT_HERSHEY_COMPLEX, 1, (160, 0, 200), 2)
                    color

            if self.frame_idx % self.detect_interval == 0:
                mask = np.zeros_like(frame_gray)
                mask[:] = 255
                for x, y in [np.int32(tr[-1]) for tr in self.tracks]:
                    cv.circle(mask, (x, y), 5, 0, -1)

                p = cv.goodFeaturesToTrack(frame_gray,
                                           mask=mask,
                                           **feature_params)
                print("--------------------------------------------------")
                if p is not None:
                    for boll_bbox in zip(cur_match):
                        for x, y in np.float32(p).reshape(-1, 2):
                            xi, yi, xj, yj = boll_bbox[0]
                            area = (xj - xi + 1) * (yj - yi + 1)
                            #if (x,y) not in self.tracks:
                            cx, cy = self.get_bbox_centre(xi, yi, xj, yj)
                            xi = cx - 50
                            yi = cy - 50
                            xj = cx + 50
                            yj = cy + 50
                            # if (x,y) not in self.tracks:
                            if (xi <= x and yi <= y and xj >= x and yj >= y):
                                self.count_bolls.append([(bollcnt)])
                                self.tracks.append([(x, y)])
                                self.bolls.append([(xi, yi, xj, yj, area)])
                                bollcnt = bollcnt + 1
                                break

            cv.line(vis, (0, 360), (1280, 360), (245, 8, 0),
                    thickness=2,
                    lineType=8)
            cv.line(vis, (0, 700), (1280, 700), (10, 255, 90),
                    thickness=2,
                    lineType=8)
            draw_str(vis, (20, 20), 'track count: %d' % self.boll_count)
            avg = (avg * (self.frame_idx) +
                   (1 / (time.time() - stime))) / (self.frame_idx + 1)
            fps = (1 / (time.time() - stime))
            vis = cv.putText(vis, 'FPS {:.1f} AFPS {:.1f}'.format(fps, avg),
                             (900, 20), cv.FONT_HERSHEY_COMPLEX, 1,
                             (25, 0, 200), 2)
            file_name_format = IMAGE_DIR_RES + "/proc_" + self.videoname + "_%04d.jpg"
            file_name = file_name_format % self.frame_idx
            print(file_name)
            cv.imwrite(file_name, vis)
            #cv.imshow('lk_track', vis)
            #Write the frame into the file 'output.avi'
            # out.write(vis)

            self.frame_idx += 1
            self.prev_gray = frame_gray
            ##-------------calculate FPS ----------------------------------------------
            # avg = (avg*(self.frame_idx-1) + (1 / (time.time() - stime)))/self.frame_idx
            # print('FPS {:.1f}'.format(avg))
            #fps =  (1 / (time.time() - stime))
            print('FPS {:.1f}'.format(fps))

            #if self.frame_idx > 80:
            #    exit(1)
            ch = cv.waitKey(0)
            if ch == 27:
                break
import pyttsx3

engine = pyttsx3.init()
option = {
    'model': 'cfg/yolo.cfg',
    'load': 'bin/yolo.weights',
    'thresold': 0.5,
    'gpu': 1.0
}

tfnet = TFNet(option)
choice = 'y'
while (choice == 'y'):
    path = input("Enter Path: ")
    img = cv2.imread(path)
    result = tfnet.return_predict(img)
    print(result)
    print(len(result))
    base = 0
    for j in result:
        tl = (result[base]['topleft']['x'], result[base]['topleft']['y'])
        br = (result[base]['bottomright']['x'],
              result[base]['bottomright']['y'])
        label = (result[base]['label'])
        conf = (result[base]['confidence'])
        print("\n\ntl: ", tl, " br: ", br, " label: ", label, "Confidence: ",
              conf)
        base += 1
        if (conf > 0.25):
            img = cv2.rectangle(img, tl, br, (0, 255, 0), 7)
            img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 4,
           'threshold': 0.15,
           'gpu': 0.7}

tfnet = TFNet(options)

C = []  # Center
R = []  # Radius
L = []  # Label

im_name = 'HRI001'
image = cv2.imread('data/' + im_name + '.jpg')

for h in range(0, 2592, 890):
    for w in range(0, 3872, 1290):
        im = image[h:h + 890, w:w + 1290]
        output = tfnet.return_predict(im)

        RBC = 0
        WBC = 0
        Platelets = 0

        for prediction in output:
            label = prediction['label']
            confidence = prediction['confidence']

            tl = (prediction['topleft']['x'], prediction['topleft']['y'])
            br = (prediction['bottomright']['x'], prediction['bottomright']['y'])

            height, width, _ = image.shape
            center_x = int((tl[0] + br[0]) / 2)
            center_y = int((tl[1] + br[1]) / 2)
options = {
    'model':
    'C:/Users/Dell/Desktop/Programming/YOLO/darkflow-master/cfg/yolo.cfg',
    'load':
    'C:/Users/Dell/Desktop/Programming/YOLO/darkflow-master/bin/yolov2.weights',
    'threshold': 0.15,
}

tfnet = TFNet(options)
capture = cv2.VideoCapture('vid1.mp4')
colors = [tuple(255 * np.random.rand(3)) for i in range(5)]
fourcc = cv2.VideoWriter_fourcc(*'MP4V')
output = cv2.VideoWriter('labelled.mp4', fourcc, 20.0, (640, 480))
while (capture.isOpened()):
    ret, frame = capture.read()
    results = tfnet.return_predict(frame)
    if ret:
        for color, result in zip(colors, results):
            tl = (result['topleft']['x'], result['topleft']['y'])
            br = (result['bottomright']['x'], result['bottomright']['y'])
            label = result['label']
            frame = cv2.rectangle(frame, tl, br, (0, 255, 0), 4)
            frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1,
                                (0, 0, 0), 2)
            output.write(frame)
        cv2.imshow('frame', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break
capture.release()
Example #7
0
def main():
    xml_dir = os.path.join(os.getcwd(), 'train/Annotations')
    xml_df = xml_to_csv(xml_dir)
    xml_df.to_csv('455_labels.csv', index=None)

    options = {"model": "cfg/455.cfg", "load": 18375, "threshold": 0.4}

    tfnet = TFNet(options)

    img = cv2.imread('/home/dan/darkflow/train/Images/dolphin_batch600027.jpg',
                     cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # use YOLO to predict the image
    result = tfnet.return_predict(img)

    img.shape

    if os.path.exists("yolo.csv"):
        os.remove("yolo.csv")
    with open('yolo.csv', 'a') as writeFile:
        writer = csv.writer(writeFile)
        writer.writerow(
            ['id', 'label', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax'])

    # Get the ground truth results in a list for the given image
    gtList = getListOfGroundTruthForImage('dolphin_batch600027.jpg')
    # Get the IOU value for each detection

    gtList.reverse()
    id = 0
    for i in range(0, len(result) - 1):
        tl = (result[i]['topleft']['x'], result[i]['topleft']['y'])
        br = (result[i]['bottomright']['x'], result[i]['bottomright']['y'])

        label = result[i]['label']
        boxA = {
            'x1': int(result[i]['topleft']['x']),
            'y1': int(result[i]['topleft']['y']),
            'x2': int(result[i]['bottomright']['x']),
            'y2': int(result[i]['bottomright']['y'])
        }

        boxB = {
            'x1': int(gtList[i]['xmin']),
            'y1': int(gtList[i]['ymin']),
            'x2': int(gtList[i]['xmax']),
            'y2': int(gtList[i]['ymax'])
        }
        print(" get iou = " + str(get_iou(boxA, boxB)))
        box_1 = [[
            int(result[i]['topleft']['x']),
            int(result[i]['topleft']['y'])
        ], [
            int(result[i]['bottomright']['x']),
            int(result[i]['topleft']['y'])
        ],
                 [
                     int(result[i]['bottomright']['x']),
                     int(result[i]['bottomright']['y'])
                 ],
                 [
                     int(result[i]['topleft']['x']),
                     int(result[i]['bottomright']['y'])
                 ]]
        box_2 = [[int(gtList[i]['xmin']),
                  int(gtList[i]['ymin'])],
                 [int(gtList[i]['xmax']),
                  int(gtList[i]['ymin'])],
                 [int(gtList[i]['xmax']),
                  int(gtList[i]['ymax'])],
                 [int(gtList[i]['xmin']),
                  int(gtList[i]['ymax'])]]
        iou = calculate_iou(box_1, box_2)
        print(str(iou))

        tl2 = (int(gtList[i]['xmin']), int(gtList[i]['ymin']))
        br2 = (int(gtList[i]['xmax']), int(gtList[i]['ymax']))
        id = id + 1
        img = cv2.rectangle(img, tl, br, (0, 255, 0),
                            7)  # draw a ractangle onto an image
        img = cv2.rectangle(img, tl2, br2, (0, 100, 0), 7)
        img = cv2.putText(img, "IOU {0:.2f}".format(iou) + " " + label, tl,
                          cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0),
                          2)  # add laebl name
    writeFile.close()
    plt.imshow(img)
    plt.show()
Example #8
0
        cv2.namedWindow(windowName1)
        cv2.namedWindow(windowName2)
        cv2.setMouseCallback(windowName1,draw_shape)
        cv2.setMouseCallback(windowName2,draw_shape)

        if startPoint == True and endPoint == True:
            #print('inside of if')
            #frame = cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (255, 0, 255), -1)
            x = frame[rect[1]:rect[3],rect[0]:rect[2]]
            #print('x: ',x)
            #img_arr = np.array(bytearray(x.content),dtype = np.uint8)
            #x = cv2.imdecode(x, -1)
            #capture0 = cv2.resize(capture0, (640, 480))
            x = cv2.resize(x, (640, 480))
            results1 = tfnet.return_predict(x)

            count = 0
            for color1, result1 in zip(colors, results1):
                print('inside for of video cam')
                tl = (result1['topleft']['x'], result1['topleft']['y'])
                br = (result1['bottomright']['x'], result1['bottomright']['y'])
                label = result1['label']
                confidence = result1['confidence']
                if label == 'person' and xCount > 50:
                    print('inside if too..')
                    diff = cv2.absdiff(frame1, frame2)
                    gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)
                    blur = cv2.GaussianBlur(gray, (5,5), 0)
                    _, thresh = cv2.threshold(blur, 20, 255, cv2.THRESH_BINARY)
                    dilated = cv2.dilate(thresh, None, iterations=3)
Example #9
0
inlane = "no"
# Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
out = cv2.VideoWriter('eman.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'),
                      30, (frame_width, frame_height))

while (cap.isOpened()):
    stime = time.time()
    ret, frame = cap.read()
    newframe = frame.copy()
    if ret == False:
        break
    tls = []
    brs = []
    tock = time.time()
    if ret:
        detecteds = tfnet.return_predict(frame)
        for color, detected in zip(colors, detecteds):
            tl = (detected['topleft']['x'], detected['topleft']['y'])
            br = (detected['bottomright']['x'], detected['bottomright']['y'])
            label = detected['label']
            frame = cv2.rectangle(frame, tl, br, color, 7)
            frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1,
                                (0, 0, 0), 2)
            newframe[tl[1]:br[1], tl[0]:br[0], :] = 0
            tls.append(tl)
            brs.append(br)

        binary = b.binary(newframe)
        warped, m, im = p.perspective_view(binary)

        fl.find_lane(warped)
def rotateServo():
    pi = pigpio.pi()

    #initializing hw pwm pins
    pin_gpio_z = 12
    pin_gpio_x = 13
    pi.hardware_PWM(pin_gpio_z, 0, 0)
    pi.hardware_PWM(pin_gpio_x, 0, 0)
    time.sleep(0.1)

    options = {
        "model": "darkflow/cfg/tiny-yolo.cfg",
        "load": "darkflow/bin/tiny-yolo.weights",
        "threshold": 0.2,
        "config": "darkflow/cfg/",
        "gpu": 1.0
    }
    print("loading TFnet")
    tfnet = TFNet(options)

    camera = picamera.PiCamera()
    camera.resolution = (320, 240)
    rawCapture = picamera.array.PiRGBArray(camera, size=(320, 240))
    time.sleep(1)

    z, x = 75000, 75000
    #cycle through the stream of images from the camera
    for frame in camera.capture_continuous(rawCapture,
                                           format="bgr",
                                           use_video_port=True):
        image = frame.array  #the frame will be stroed in the varible called image    x=75000
        #  find the amount that is needed to rotate
        # #z,x = blackFollow(image,z,x)

        print("Predicting")
        results = tfnet.return_predict(image)
        dz, dx = 0, 0
        print("Found ", len(results), "objects")
        for result in results:
            #cv2.rectangle(image,
            #      (result["topleft"]["x"], result["topleft"]["y"]),
            #      (result["bottomright"]["x"],result["bottomright"]["y"]),
            #      (0, 255, 0), 4)
            #text_x, text_y = result["topleft"]["x"] - 10, result["topleft"]["y"] - 10
            #cv2.putText(image, result["label"], (text_x, text_y),cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)

            if (result["label"] == "person"):
                dz = result["topleft"]["x"] + result["bottomright"][
                    "x"] - image.shape[
                        1]  #how much should we rotate to left/right
                dx = result["topleft"]["y"] + result["bottomright"][
                    "y"] - image.shape[
                        0]  #how much should we rotate to up/down
                print(result)

        z -= int(10000 * (1.0 * dz / image.shape[1]))
        x += int(10000 * (1.0 * dx / image.shape[0]))
        z = max(z, 25000)
        z = min(z, 125000)
        x = max(x, 25000)
        x = min(x, 90000)
        print("dz", dz, "dx", dx, "z", z, "x", x)

        # end rotating logic
        if (dz < 10 and dx < 10):
            break

        pi.hardware_PWM(pin_gpio_z, 50, z)  #50Hz
        pi.hardware_PWM(pin_gpio_x, 50, x)  #50Hz
        time.sleep(0.1)
        rawCapture.truncate(0)
Example #11
0
def main():
    """
    Interface with a ZED depth camera (via ROS), using YOLO for object detection, 
    and a custom CNN for target classification.
    """

    # Argument parser for command line arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("--skipInit",
                    help="Flag whether to skip initialization or not",
                    action='store_true')
    args = vars(ap.parse_args())

    # TF Options (either build from config/weights, or load pre-built from pb/meta)
    # # Build tiny-yolo from the config and weights
    # options = {"model": "darkflow/cfg/tiny-yolo-voc.cfg",
    #            "load":"darkflow/bin/tiny-yolo-voc.weights",
    #            "threshold":0.5, "gpu":0.5}
    # Load the pre-trained and pre-built model of tiny-yolo
    options = {
        "pbLoad": "darkflow/built_graph/tiny-yolo-voc.pb",
        "metaLoad": "darkflow/built_graph/tiny-yolo-voc.meta",
        "threshold": 0.5,
        "gpu": 0.4
    }

    # Initialize YOLO
    tfnet = TFNet(options)
    # Initialize CNN
    CNN = TargetCNN()

    # ROS Setup
    rospy.init_node('ZED_test', anonymous=True)
    # Subscribe to topics
    depth_sub = rospy.Subscriber("/zed/depth/depth_registered",
                                 Image,
                                 depth_cb,
                                 queue_size=1)
    left_sub = rospy.Subscriber("/zed/left/image_rect_color",
                                Image,
                                bgr_cb,
                                queue_size=1)
    # Publish depth, angle, and target lost
    depth_pub = rospy.Publisher("/ZED_Tracker/depth", Float32, queue_size=10)
    angle_pub = rospy.Publisher("/ZED_Tracker/angle", Float32, queue_size=10)
    lost_pub = rospy.Publisher("/ZED_Tracker/targetLost", Bool, queue_size=10)

    # Thickness of cv draw objects
    thick = int((IMG_H + IMG_W) // 300)

    # Initialize variables
    lastTime = time()
    frame_count = -1  # index video frame
    init_target_count = 0  # number of Target samples captured
    other_count = 0  # number of Others(other people, not the Target) sample captured
    xc, yc = 0, 0  # Centroid of Bounding Box

    # Keep track of what 'state' the algorithm is in
    state = 'WAITING_1'  # First state, wait for user
    # Check if skipInit
    if (args["skipInit"]):
        state = "TRAIN"

    print("  State: %s" % state)

    #############################
    ###  Main Processing Loop ###
    #############################
    while (not rospy.is_shutdown()):
        frame_count += 1

        if (state == 'TRAIN'
            ):  # Training is a blocking function call, so visuals don't update
            print("[INFO] Starting training")
            CNN.train(PLOT_LEARNING=True, epochs=20, trainDir=trainDir)
            print("[INFO] Done Training")
            state = 'PREDICT'
            print("  State: %s" % state)

        # Use YOLO for Object Detection
        results = tfnet.return_predict(
            bgrImg)  # predict using bgrImg from ROS callback

        vis = bgrImg.copy()  # Create copy for visual

        # Initialize max values
        max_pred = 0  # Store max prediction value
        max_pred_coords = [0, 0, 0, 0]  # Store corresponding coords

        # Loop through all detections (from YOLO)
        for i in range(len(results)):
            # Skip any non-'person' detections
            if (results[i]['label'] != 'person'):
                continue
            # Bounding Box indexes
            x1, y1 = results[i]['topleft']['x'], results[i]['topleft']['y']
            x2, y2 = results[i]['bottomright']['x'], results[i]['bottomright'][
                'y']
            xc, yc = toCentroid(x1, y1, x2, y2)  # Get centroid

            # Draw Bounding Box
            cv2.rectangle(vis, (x1, y1), (x2, y2), (255, 255, 255), thick)
            cv2.circle(vis, (xc, yc), 3, (255, 255, 255),
                       thick)  # Center circle
            cv2.putText(vis, results[i]['label'],
                        (x1 + thick * 2, y1 + thick * 2), 0, 1e-3 * IMG_H,
                        (255, 255, 255), thick // 3)

            if (state == 'INIT_1'):
                # Only consider objects within middle window
                # (Center window extends full top to bottom, but only +/- INIT_WIN_W)
                if (abs(xc - MID_X) < INIT_WIN_W):
                    obj = bgrImg[y1:y2, x1:x2].copy()
                    obj_name = targetDir + "Target-%s.png" % (
                        str(init_target_count).zfill(4))
                    init_target_count += 1
                    cv2.imwrite(obj_name, obj)

            if (state == 'INIT_2'):
                # Save any person, no restriction on location
                other = bgrImg[y1:y2, x1:x2].copy()
                other_name = notTargetDir + "Other-%s.png" % (
                    str(other_count).zfill(4))
                other_count += 1
                cv2.imwrite(other_name, other)

            if (state == 'PREDICT'):
                # Use trained CNN to classify as Target/Not-Target
                label, prob, prediction = CNN.predict(bgrImg[y1:y2,
                                                             x1:x2].copy())
                # Keep track of highest classification
                if (prediction > max_pred):
                    max_pred = prediction
                    max_pred_coords = [x1, y1, x2, y2]

        # Wait 1ms (allows visuals to update), and take in user input if any
        charin = cv2.waitKey(1)

        ## Draw State specific graphics/instructions/info
        ## and transition states when appropriate

        if (state == 'WAITING_1'):
            # Draw initialization box area
            cv2.rectangle(vis, (MID_X - INIT_WIN_W, 0),
                          (MID_X + INIT_WIN_W, IMG_H), (0, 150, 0), thick // 3)
            # Show instructions on screen
            instr = [
                "Press 'i' to continue,",
                "then you will have %d seconds" % time_till_init,
                "before initialization starts"
            ]
            cv2.rectangle(vis, (0, 0), (300, 100),
                          color=(255, 255, 255),
                          thickness=-1)  # box for text
            cv2.putText(vis, instr[0], (20, 20), FONT, 0.5, (0, 0, 255),
                        2)  # Show instructions
            cv2.putText(vis, instr[1], (20, 40), FONT, 0.5, (0, 0, 255),
                        2)  # Show instructions
            cv2.putText(vis, instr[2], (20, 60), FONT, 0.5, (0, 0, 255),
                        2)  # Show instructions
            # User press 'i' to move to initialization
            if (charin == ord('i')):
                cdown_start = time()  # Start the time
                state = 'COUNTDOWN_1'
                print("  State: %s" % state)
        if (state == 'COUNTDOWN_1'):
            # Draw initialization box area
            cv2.rectangle(vis, (MID_X - INIT_WIN_W, 0),
                          (MID_X + INIT_WIN_W, IMG_H), (0, 150, 0), thick // 3)
            # Countdown time, wait for X seconds before capturing init data
            dur = time() - cdown_start
            cv2.putText(vis, "%d" % (time_till_init - (dur) + 1),
                        (IMG_W // 4, IMG_H // 4), FONT, 4, (0, 0, 255),
                        2)  # Show countdown
            if (dur > time_till_init):
                state = 'INIT_1'
                print("  State: %s" % state)
                print("     Saving Target samples as: %sTarget-####.png" %
                      targetDir)
        if (state == 'INIT_1'):
            # Draw initialization box area
            cv2.rectangle(vis, (MID_X - INIT_WIN_W, 0),
                          (MID_X + INIT_WIN_W, IMG_H), (0, 150, 0), thick // 3)
            # Display progress on image acquisition
            cv2.rectangle(vis, (0, 0), (400, 45),
                          color=(255, 255, 255),
                          thickness=-1)  # box for text
            cv2.putText(vis, "%d of %d images taken" %
                        (init_target_count, NUM_INIT_DATA), (10, 30), FONT, 1,
                        (0, 0, 255), 2)  # Show instructions
            # Continue once X samples are taken
            if (init_target_count >= NUM_INIT_DATA):
                state = 'WAITING_2'
                print("  State: %s" % state)
        if (state == 'WAITING_2'):
            # Show instructions on screen
            instr = [
                "Press 'b' to continue,",
                "then you will have %d seconds" % time_till_init_2,
                "before the background capture starts"
            ]
            cv2.rectangle(vis, (0, 0), (300, 100),
                          color=(255, 255, 255),
                          thickness=-1)  # box for text
            cv2.putText(vis, instr[0], (20, 20), FONT, 0.5, (0, 0, 255),
                        2)  # Show instructions
            cv2.putText(vis, instr[1], (20, 40), FONT, 0.5, (0, 0, 255),
                        2)  # Show instructions
            cv2.putText(vis, instr[2], (20, 60), FONT, 0.5, (0, 0, 255),
                        2)  # Show instructions
            # User press 'b' to move to background initialization
            if (charin == ord('b')):
                cdown_start = time()
                state = 'COUNTDOWN_2'
                print("  State: %s" % state)
        if (state == 'COUNTDOWN_2'):
            dur = time() - cdown_start
            cv2.putText(vis, "%d" % (time_till_init_2 - (dur) + 1),
                        (IMG_W // 4, IMG_H // 4), FONT, 4, (0, 0, 255),
                        2)  # Show countdown
            # Wait for X seconds before capturing init data
            if (dur > time_till_init_2):
                cdown_start = time()
                state = 'INIT_2'
                print("  State: %s" % state)
                print("     Saving Others samples as: %sOther-####.png" %
                      notTargetDir)
        if (state == 'INIT_2'):
            dur = time() - cdown_start
            # Wait for X seconds before capturing background
            if (dur > time_till_init_3):
                state = 'INIT_3'
                print("  State: %s" % state)
        if (state == 'INIT_3'):
            # Save 'background' patches to notTarget training directory
            grabBackground(bgrImg)
            msg = "Please wait while the CNN is trained..."
            cv2.rectangle(vis, (0, 0), (IMG_W, 100),
                          color=(255, 255, 255),
                          thickness=-1)  # box for text
            cv2.putText(vis, msg, (50, 50), FONT, 2, (0, 0, 255),
                        2)  # Show instructions
            cv2.imshow('ZED Tracking', vis)
            cv2.waitKey(1)
            state = 'TRAIN'
            print("  State: %s" % state)
        if (state == 'PREDICT'):
            # Display max classification
            x1, y1, x2, y2 = max_pred_coords
            cv2.rectangle(vis, (x1, y1), (x2, y2),
                          color=(0, 255, 0),
                          thickness=thick)
            cv2.rectangle(vis, (0, 0), (300, 160),
                          color=(255, 255, 255),
                          thickness=-1)
            cv2.putText(vis,
                        "Pred: %.3f" % max_pred, (10, 30),
                        FONT,
                        1,
                        color=(255, 0, 0),
                        thickness=2)
            xc, yc = toCentroid(x1, y1, x2, y2)

            # Display and publish Depth
            depth = getDepth(xc, yc, depthImg)
            if (not np.isnan(depth)):
                cv2.putText(vis,
                            "Depth: %.2f m" % depth, (10, 60),
                            FONT,
                            1,
                            color=(255, 0, 0),
                            thickness=2)
                # Publish depth
                depth_pub.publish(depth)
            else:
                cv2.putText(vis,
                            "Depth: ---- m", (10, 60),
                            FONT,
                            1,
                            color=(255, 0, 0),
                            thickness=2)
                ## Uncomment line below if you want constant publishing of depth data
                # depth_pub.publish(-1.0) # -1.0 is simply an 'error' value

            # Display and publish Angle
            angle = getAngle(xc)
            if (not angle == -45.0):
                cv2.putText(vis,
                            "Angle: %.1f deg" % angle, (10, 90),
                            FONT,
                            1,
                            color=(255, 0, 0),
                            thickness=2)
                angle_pub.publish(angle)
            else:
                cv2.putText(vis,
                            "Angle: ---- deg", (10, 90),
                            FONT,
                            1,
                            color=(255, 0, 0),
                            thickness=2)
                ## Uncomment line below if you want constant publishing of angle data
                # angle_pub.publish(-180.0) # -180.0 is simply an 'error' value

            # Target Lost
            LOST = max_pred < pred_thresh
            if (LOST):
                cv2.putText(vis,
                            "TARGET LOST", (10, 150),
                            FONT,
                            1.4,
                            color=(0, 0, 255),
                            thickness=2)
            lost_pub.publish(LOST)

        # Press 'q' to quit
        if (charin == ord('q')):
            print("Quitting program...")
            break

        # FPS calculation
        f = 1 / (time() - lastTime)
        fps = "FPS: %.2f" % f
        lastTime = time()
        # Display FPS data
        cv2.rectangle(vis, (IMG_W - 170, IMG_H - 50), (IMG_W, IMG_H),
                      (0, 0, 0), -1)
        cv2.putText(vis, fps, (IMG_W - 160, IMG_H - 15), FONT, 1, (0, 0, 255),
                    2)

        # Show visuals
        cv2.imshow('ZED Tracking', vis)

    # Cleanup
    cv2.destroyAllWindows()
    depth_sub.unregister()
    left_sub.unregister()
Example #12
0
while (True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    # Our operations on the frame come here
    RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Display the resulting frame

    if x == ord('d') and mark <= 440:
        mark += 10
    if x == ord('a') and mark >= 60:
        mark -= 10
    if x == 27:
        break
    threshold = 0.00225 * (mark - 50) + 0.1

    result = tfnet.return_predict(RGB)

    for detection in result:
        label = detection['label']
        confidence = format(float(detection['confidence']), '.2f')
        tl = (int(detection['topleft']['x']), int(detection['topleft']['y']))
        br = (int(detection['bottomright']['x']),
              int(detection['bottomright']['y']))

        if float(confidence) >= threshold:
            cv2.rectangle(frame, tl, br, (0, 255, 0), 3)

            cv2.putText(frame, label + ' ' + confidence, tl,
                        cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), 1,
                        cv2.LINE_AA)
    licensePlate = "".join(sortedList)
    return licensePlate


cap = cv2.VideoCapture('vid1.MOV')
counter = 0

while (cap.isOpened()):
    ret, frame = cap.read()
    h, w, l = frame.shape
    frame = imutils.rotate(frame, 270)

    if counter % 6 == 0:
        licensePlate = []
        try:
            predictions = yoloPlate.return_predict(frame)
            firstCropImg = firstCrop(frame, predictions)
            secondCropImg = secondCrop(firstCropImg)
            cv2.imshow('Second crop plate', secondCropImg)
            secondCropImgCopy = secondCropImg.copy()
            licensePlate.append(opencvReadPlate(secondCropImg))
            print("OpenCV+CNN : " + licensePlate[0])
        except:
            pass
        try:
            predictions = yoloCharacter.return_predict(secondCropImg)
            licensePlate.append(
                yoloCharDetection(predictions, secondCropImgCopy))
            print("Yolo+CNN : " + licensePlate[1])
        except:
            pass
Example #14
0
class darkflow_prediction():

    def __init__(self):
        self.options = {"model": "cfg/yolo.cfg", "load": "bin/yolov2.weights", "threshold": 0.5}
        self.tfnet = TFNet(self.options)
        self.cluster = []
        self.FRAME_BUFFER = 5
    def image(self, image_file):
        self.image = cv2.imread(image_file, 1)
        self.result = self.tfnet.return_predict(self.image)
        print(self.result)
        self.print_box()
        cv2.waitKey(0)

    def print_box(self, frame_result, frame_image):
        font = cv2.FONT_HERSHEY_PLAIN
        for i in range(len(frame_result)):
            coordtl = (frame_result[i]['topleft']['x'], frame_result[i]['topleft']['y'])
            coordbr = (frame_result[i]['bottomright']['x'], frame_result[i]['bottomright']['y'])
            cv2.rectangle(frame_image, coordtl, coordbr, (0,255,0), 2)
            s = str(frame_result[i]['label'] + ": " + str(frame_result[i]['confidence']))
            text_coord = (coordtl[0], coordtl[1]-10)
            cv2.putText(frame_image, s, text_coord, font, 1, (250,250,0))
        #cv2.imshow("memes", frame_image)

    def print_box_with_clusters(self, asd):
        font = cv2.FONT_HERSHEY_PLAIN
        for i in range(len(self.result)):
            coordtl = (self.result[i]['topleft']['x'], self.result[i]['topleft']['y'])
            coordbr = (self.result[i]['bottomright']['x'], self.result[i]['bottomright']['y'])
            cv2.rectangle(self.image,coordtl,coordbr,(0,255,0),2)
            s = str(self.result[i]['label'] + ": " + str(self.result[i]['confidence']))
            text_coord = (coordtl[0], coordtl[1]-10)
            cv2.putText(self.image, s, text_coord, font, 1, (250,250,0))
        for i, val in enumerate(asd):
            cv2.putText(self.image, str(val[1]), tuple(val[0]), font, 1, (250,250,0), 3)
        #cv2.imshow("memes", self.image)

    def video(self, video_file):
        self.video = cv2.VideoCapture(video_file)
        self.images, interm, count = [], [], 1

        # Results: list of lists of object dictionaries [frame1[{object1}, {object2}, {object3}], frame2[{}, {}, {}]]
        self.video_results_full = []

        # Same as above, but results are split into groups of 5
        self.video_results_split = []

        # Grand boxes for each group of 5 frames
        self.group_grand_boxes = []

        # Actual trajectories of objects through frames
        self.object_trajectories = {}

        try:
            while self.video.isOpened():
                # Read 1 frame of the video (as image), append to our list of images
                ret, frame_image = self.video.read()
                self.images.append(np.copy(frame_image))

                # Run YOLO to get the detected objects for this particular frame
                frame_result = self.tfnet.return_predict(frame_image)
                self.print_box(frame_result, frame_image)
                frame_result = self.refactor_result(frame_result)
                self.video_results_full.append(frame_result)
                interm.append(frame_result)

                # Use a sliding window approach to compute groups/grand boxes
                if count > self.FRAME_BUFFER:
                    interm.pop(0)
                if len(interm) == self.FRAME_BUFFER:
                    self.video_results_split.append(interm[:])
                    grand_boxes = self.get_clusters(self.video_results_split[-1])
                    if not grand_boxes:
                        continue
                    self.print_grand_box(grand_boxes)
                    self.group_grand_boxes.append(grand_boxes)
                    if len(self.group_grand_boxes) >= 2:
                        self.track_objects_between_frames(self.group_grand_boxes[-2], self.group_grand_boxes[-1])
                count += 1
                #cv2.waitKey(1)
        except AssertionError:
            pass

        #print("GROUP GRAND BOXES: ", self.group_grand_boxes)
        for group in self.group_grand_boxes: # group is a list with dictionaries
            for obj in group: # obj is a dictionary
                if "prev" not in obj: # prev is the string key in the dictionary
                    self.object_trajectories[self.hash_object(obj)] = [obj]
                else:
                    #print(self.object_trajectories[obj['prev']])
                    self.object_trajectories[obj['prev']].append(obj)
                    self.object_trajectories[self.hash_object(obj)] = self.object_trajectories[obj['prev']]
                    #self.object_trajectories[obj['prev']] = None
        self.object_trajectories = {k: v for k, v in self.object_trajectories.items() if v is not None}
        for i in self.object_trajectories:
            pass
            #print("\n\n")
            #print(self.object_trajectories[i])

        # trainingSize = int(round(0.75 * len(self.object_trajectories)))
        # trainingSet = [self.object_trajectories[i] for i in range(trainingSize)]
        # testSet = [self.object_trajectories[i] for i in range(trainingSize, len(self.object_trajectories))]
        dataIn, dataOut = self.getXYAll(self.object_trajectories)
        #trainX = []
        #trainY = []
        ''''testX = []
        testY = []
        for i in range(len(dataX)): #length is the number of objects we have in frame, same for both X and Y
            pathX = dataX[i]
            pathY = dataY[i]
            trainingSize = int(round(0.75 * len(pathX)))
            trainX.extend([[pathX[i]] for i in range(trainingSize)])
            testX.extend([[pathX[i]] for i in range(trainingSize, len(pathX))])
            [trainY.append(pathY[i]) for i in range(trainingSize)]
            [testY.append(pathY[i]) for i in range(trainingSize, len(pathX))]

        look_back = 1
        trainX = np.array(trainX)
        testX = np.array(testX)
        trainY = np.array(trainY)
        testY = np.array(testY)

        trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
        testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))'''

       # create and fit the LSTM network
        print("DATA IN (OBJECT DATA) SHAPE: ", dataIn.shape)
        print("DATA OUT (OBJECT OUTPUT) SHAPE: ", dataOut.shape)
        print("Model save name: ", video_file[3:])
        model = Sequential()
        model.add(LSTM(200, input_shape=(5, 2)))
        model.add(Dense(2))
        model.compile(loss='mean_squared_error', optimizer=Adam(decay=0.0001))
        try:
          model.fit(dataIn, dataOut, epochs=200, batch_size=1, verbose=2)
        except:
          print("ERROR IN PROCESSING VID")
          return
        model.save(video_file[3:])
        # make predictions
        trainPredict = model.predict(dataIn)
        #testPredict = model.predict(testX)

        #print("TRAIN PREDICT: ", trainPredict)

    """
    def separateXandY(self, object_trajectories):
        #print("OBJECT TRAJECTORIES")
        #print(object_trajectories)
        objectDataX, objectDataY = [], []
        for object in object_trajectories:
            #print("OBJECT " + object)
            dataX, dataY = [], []
            for point in object_trajectories[object]:
                #print("POINT")
                #print(point)
                dataX.append(point["x"])
                dataY.append(point["y"])
            objectDataX.append(dataX)
            objectDataY.append(dataY)
        print(objectDataX)
        print(objectDataY)
        return np.array(objectDataX), np.array(objectDataY)
    """

    def getXYAll(self, object_trajectories):
        objectData = []
        objectOutput = []
        #print("OBJECT TRAJECTORIES: ", object_trajectories)
        for obj in object_trajectories:
            #print("LEN OBJECT TRAJ OF OBJ: ", len(object_trajectories[obj]))
            for i in range(0, len(object_trajectories[obj])-5):
                data = []
                for j in range(i, i+5):
                    data.append([object_trajectories[obj][j]["x"], object_trajectories[obj][j]["y"]])
                objectOutput.append([object_trajectories[obj][i+5]["x"], object_trajectories[obj][i+5]["y"]]) # next position after 5 frames
                objectData.append(data[:]) # append single sample, data = an instance of 5 time steps
        #print('OBJECT DATA: ', objectData) # [[[x, y], [x, y], [x, y], [x, y], [x, y]],
                                        # [[x, y], [x, y], [x, y], [x, y], [x, y]]]
        #print('OBJECT OUTPUT: ', objectOutput) # output = where is the next location

        #print('objectOutput', objectOutput) # output matches samples [[x, y], [x, y], [x, y], [x, y], [x, y]]
        return np.array(objectData), np.array(objectOutput)

    def hash_object(self, detected_object):
        return str(detected_object["x"]) + str(detected_object["y"]) + str(detected_object["class"]) + str(detected_object["confidence"])

    def refactor_result(self, result):
        new_result = []
        for elem in result:
            coordtl = (elem['topleft']['x'], elem['topleft']['y'])
            coordbr = (elem['bottomright']['x'], elem['bottomright']['y'])
            classif = elem['label']
            confidence = elem['confidence']
            width = coordbr[0] - coordtl[0]
            height = coordbr[1] - coordtl[1]
            x_center = coordtl[0] + width//2
            y_center = coordtl[1] + height//2
            new_elem = {'x': x_center, 'y': y_center, 'width': width,
                        'height': height, "class": classif, "confidence": confidence}
            new_result.append(new_elem)
        return new_result

    def get_clusters(self, group):
        cluster_points = [] #still need all the individual point data to get individual box data for box averaging
        for frame in group: #each frame object is 5 video frames
            for object_det in frame:
                cluster_points.append([object_det['x'], object_det['y']]) #extracts the coordinates of each object
        #print("cluster_points", cluster_points)
        if cluster_points == []:
            return None
        model = DBSCAN(eps=100, min_samples=2).fit(np.array(cluster_points))
        clusters = [(cluster_points[i], model.labels_[i]) for i in range(len(cluster_points))]
        clustered_points = {}
        for point in clusters:
            if point[1] not in clustered_points:
                clustered_points[point[1]] = []
            clustered_points[point[1]].append((point[0][0], point[0][1]))
        #print(clustered_points)
        grand_boxes = [] #creating each single grand box
        for cluster_val in clustered_points:
            avgd_x, avgd_y = 0, 0
            for point in clustered_points[cluster_val]:
                avgd_x += point[0]
                avgd_y += point[1]
            avgd_x /= len(clustered_points[cluster_val])
            avgd_y /= len(clustered_points[cluster_val])
            width, height = 0, 0
            classif, confidence = '', ''
            for point in clustered_points[cluster_val]:
                for frame in group:
                    for object_det in frame:
                        if object_det['x'] == point[0] and object_det['y'] == point[1]:
                            width += object_det['width']
                            height += object_det['height']
                            classif = object_det['class']
                            confidence = object_det['confidence']
            width /= len(clustered_points[cluster_val])
            height /= len(clustered_points[cluster_val])
            box = {'x': avgd_x, 'y': avgd_y, 'width': width,
                   'height': height, "class": classif, "confidence": confidence}
            grand_boxes.append(box) #creating a grand box for each object for every 5 frames
        #print("grand boxes", grand_boxes)
        return grand_boxes

    def print_grand_box(self, grand_boxes):
        x_points = [box['x'] for box in grand_boxes]
        y_points = [box['y'] for box in grand_boxes]
        image = self.images[-1].copy()
        for i in range(len(x_points)):
            cv2.circle(image, (int(x_points[i]), int(y_points[i])), 15, (255-int(255*i//len(x_points)),0,int(255*i//len(x_points))), -1)
        #cv2.imshow("centroids", image)
        #cv2.waitKey(2)

    def track_objects_between_frames(self, curr_frame, next_frame):
        for grand_object in curr_frame:
            closest_dist = 999999999999
            closest_obj, closest_obj_idx = None, None
            for next_object_idx in range(len(next_frame)):
                next_object = next_frame[next_object_idx]
                euclidean_dist = np.sqrt((next_object['x'] - grand_object['x'])**2 + (next_object['y'] - grand_object['y'])**2)
                if euclidean_dist < closest_dist:
                    closest_dist = euclidean_dist
                    closest_obj = next_object
                    closest_obj_idx = next_object_idx
            if closest_dist <= 25:
                grand_object['next'] = self.hash_object(closest_obj)
                next_frame[closest_obj_idx]['prev'] = self.hash_object(grand_object)

    def video_with_frame_drop(self, video_file, FPS=30):
        self.video = cv2.VideoCapture(video_file)
        skip_frames = 0
        t = time.time()
        try:
            while self.video.isOpened():
                for i in range(skip_frames):
                    _, _ = self.video.read()
                ret, self.image = self.video.read()
                self.result = self.tfnet.return_predict(self.image)
                self.print_box()
                cv2.waitKey(1)
                skip_frames = int((time.time()-t)*FPS)
                t = time.time()
        except AssertionError:
            pass
    "load": "4000",
    "gpu": 1.0,
    "threshold": 0.1
}
tfnet = TFNet(options)

video = cv2.VideoCapture(0)
video.set(cv2.CAP_PROP_FRAME_HEIGHT, 1280)
video.set(cv2.CAP_PROP_FRAME_WIDTH, 720)

colors = [tuple(np.random.rand(3) * 255) for _ in range(100)]

while True:
    start_time = time.time()
    ret, frame = video.read()
    returns = tfnet.return_predict(frame)

    if ret:
        for c, r in zip(colors, returns):
            tl = (r["topleft"]["x"], r["topleft"]["y"])
            br = (r["bottomright"]["x"], r["bottomright"]["y"])
            label = "{}: {:.0f}%".format(r["label"], r["confidence"] * 100)
            frame = cv2.rectangle(frame, tl, br, c, 4)
            frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1,
                                c, 2)

        cv2.imshow("frame", frame)
        print("FPS {:.1f}".format(1 / (time.time() - start_time)))

    else:
        video.release()
from io import BytesIO
import time
import requests
from PIL import Image, ImageDraw
import numpy as np


import glob

options = {"model": "cfg/tiny-yolo-voc.cfg", "load": "bin/tiny-yolo-voc.weights", "threshold": 0.15}

tfnet = TFNet(options)


counter = 0
for filename in glob.glob('birds/*.jpg'):
    curr_img = Image.open(filename).convert('RGB')
    curr_imgcv2 = cv2.cvtColor(np.array(curr_img), cv2.COLOR_RGB2BGR)

    result = tfnet.return_predict(curr_imgcv2)
    print(result)
    draw = ImageDraw.Draw(curr_img)
    for det in result:
        draw.rectangle([det['topleft']['x'], det['topleft']['y'], 
                        det['bottomright']['x'], det['bottomright']['y']],
                       outline=(255, 0, 0))
        draw.text([det['topleft']['x'], det['topleft']['y'] - 13], det['label'], fill=(255, 0, 0))
    curr_img.save('birds_labeled/%i.jpg' % counter)
    counter += 1

Example #17
0
def detection(inputImg):
    global size
    global predictions
    global tfnet2

    options = {
        "model": "cfg/tiny-yolo-voc-3c.cfg",
        "load": 8625,
        "gpu": 1.0,
        "threshold": 0.5
    }
    tfnet2 = TFNet(options)
    predictions = tfnet2.return_predict(inputImg)
    print(type(predictions))
    #    filter the results obtained from model

    newImage = np.copy(inputImg)

    for i in range(len(predictions)):
        top_x = predictions[i]['topleft']['x']
        top_y = predictions[i]['topleft']['y']

        btm_x = predictions[i]['bottomright']['x']
        btm_y = predictions[i]['bottomright']['y']

        newImage = cv2.rectangle(newImage, (top_x, top_y), (btm_x, btm_y),
                                 (255, 0, 0), 10)

    cv2.imwrite("detected.jpg", newImage)
    size = newImage.shape
    sorted(predictions, key=lambda i: (i['topleft']['x'], i['topleft']['y']))
    newImage = np.copy(inputImg)
    for result in predictions:
        for r in predictions:
            if result != r:
                #                print('disticnt values')
                if result["label"] == r["label"] and Overlap(
                        result['topleft'], result['bottomright'], r['topleft'],
                        r['bottomright']):
                    print("match")
                    if result['confidence'] > r['confidence']:
                        predictions.remove(r)
                    else:
                        predictions.remove(result)
    print(predictions)
    for result in predictions:
        top_x = result['topleft']['x']
        top_y = result['topleft']['y']

        btm_x = result['bottomright']['x']
        btm_y = result['bottomright']['y']

        confidence = result['confidence']

        label = result['label'] + " " + str(round(confidence, 3))
        newImage = cv2.rectangle(newImage, (top_x, top_y), (btm_x, btm_y),
                                 (255, 0, 0), 1)
        newImage = cv2.putText(newImage, label, (top_x, top_y + 20),
                               cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), 1,
                               cv2.LINE_AA)
        size = newImage.shape
#        cv2.imwrite("out1.jpg", newImage)
    return newImage
Example #18
0
    'threshold': 0.4,
    'gpu': 0
}

print('Loading darkflow.')
tfnet = TFNet(options)


print('Read image')
# read the color image and covert to RGB
img = cv2.imread('media/input/1395447642477-linzerstr.jpg', cv2.IMREAD_COLOR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

print('Predict the image')
# use YOLO to predict the image
predictions = tfnet.return_predict(img)

# pull out some info from the results
print('Render result')

for prediction in predictions:
    tl = (prediction['topleft']['x'], prediction['topleft']['y'])
    br = (prediction['bottomright']['x'], prediction['bottomright']['y'])
    label = prediction['label']

    # add the box and label and display it
    img = cv2.rectangle(img, tl, br, (0, 255, 0), 4)
    img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
    
plt.imshow(img)
plt.show()
class stereo_vision:
    def __init__(self, threshold, gpu):
        options = {
            'model': 'cfg/yolo.cfg',
            'load': 'bin/yolo.weights',
            'threshold': threshold,
            'gpu': gpu
        }
        cal = StereoCalibration('')
        self.data, self.stereo, self.map = cal.read_data()
        self.tfnet = TFNet(options)

    def distance_measurement(self, resL, resR, frameL, frameR, rang):
        count = 0
        subtracted_mean_pre = 1000
        differeceL = abs(resL['bottomright']['x'] - resL['topleft']['x'])
        differece = abs(resL['bottomright']['y'] - resL['topleft']['y'])
        gray_croped_L = cv2.cvtColor(frameL, cv2.COLOR_RGB2GRAY)
        gray_croped_L = gray_croped_L[
            resL['topleft']['y'] +
            int(0.1 * differece):resL['bottomright']['y'] -
            int(0.1 * differece), resL['topleft']['x'] +
            int(0.1 * differeceL):resL['bottomright']['x'] -
            int(0.1 * differeceL)]
        gray_R = cv2.cvtColor(frameR, cv2.COLOR_RGB2GRAY)
        cv2.imshow("gray_croped", gray_croped_L)
        cv2.waitKey(0)
        for i in range(rang):
            gray_croped_R = gray_R[
                resL['topleft']['y']:resL['bottomright']['y'],
                resR['topleft']['x'] + i - 25:resR['topleft']['x'] + i - 25 +
                differeceL]
            cv2.imshow("CR", gray_croped_R)
            subtracted_image = np.abs(np.subtract(gray_croped_R,
                                                  gray_croped_L))
            subtracted_mean = np.sum(subtracted_image.flatten(), axis=0)
            #print("subtracted_mean"+str(subtracted_mean))

            if subtracted_mean < subtracted_mean_pre:
                count = i
                subtracted_mean_pre = subtracted_mean

        disparity1 = abs((resR['topleft']['x'] + count - 15) -
                         resL['topleft']['x'])
        disparity = abs(resL["middleL"] - resR["middleR"])
        x = frameL.shape[0]
        print("disparity1:" + str(disparity1) + "dis:" + str(disparity))

        distance = (13 * x) / (math.tan(math.radians(30)) * disparity)
        distance1 = (13 * x) / (math.tan(math.radians(30)) * disparity1)
        distance = distance - 3.3
        distance1 = distance1 - 1

        return distance, distance1

    def object_finding(self, frameL, frameR, search):
        Left = []
        Right = []
        imgU1 = cv2.remap(frameL, self.map["map1x"], self.map["map1y"],
                          cv2.INTER_LANCZOS4)
        imgU2 = cv2.remap(frameR, self.map["map2x"], self.map["map2y"],
                          cv2.INTER_LANCZOS4)
        resultL = self.tfnet.return_predict(imgU1)
        resultR = self.tfnet.return_predict(imgU2)
        distance = {}

        if not resultL == []:
            for res in resultL:
                if res['label'] == search:
                    res.update({
                        "AreaL":
                        (res['bottomright']['x'] - res['topleft']['x']) *
                        (res['bottomright']['y'] - res['topleft']['y'])
                    })
                    res.update({
                        "middleL":
                        res['topleft']['x'] +
                        (res['bottomright']['x'] - res['topleft']['x']) / 2
                    })
                    Left.append(res)

        if not resultR == []:
            for res in resultR:
                if res['label'] == search:
                    res.update({
                        "AreaR":
                        (res['bottomright']['x'] - res['topleft']['x']) *
                        (res['bottomright']['y'] - res['topleft']['y'])
                    })
                    res.update({
                        "middleR":
                        res['topleft']['x'] +
                        (res['bottomright']['x'] - res['topleft']['x']) / 2
                    })
                    Right.append(res)
        m = 0
        for left in Left:
            for right in Right:
                if abs(left["AreaL"] - right["AreaR"]) < 1100:
                    distance.update({
                        "dis" + str(m):
                        self.distance_measurement(left, right, imgU1, imgU2,
                                                  50)
                    })
                    m += 1

                print("Left->" + str(left) + " Right->" + str(right))

        return distance
Example #20
0
import cv2
from darkflow.net.build import TFNet

# darkflow 옵션
options = {"model": "cfg/my-tiny-yolo.cfg", "load": -1, "threshold": 0.5}
tfnet = TFNet(options)
curr_frame = cv2.imread("./test_pic1.jpg")
results = tfnet.return_predict(curr_frame)
for result in results:
    cv2.rectangle(curr_frame, (result["topleft"]["x"], result["topleft"]["y"]),
                  (result["bottomright"]["x"], result["bottomright"]["y"]),
                  (255, 0, 0), 4)
    text_x, text_y = result["topleft"]["x"] - 10, result["topleft"]["y"] - 10
    cv2.putText(curr_frame, result["label"], (text_x, text_y),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA)
cv2.imwrite("video_capture/test.jpg", curr_frame)
Example #21
0
    'load': 'bin/yolo.weights',
    'threshold': 0.3,
    'gpu': 0.7
}

tfnet = TFNet(option)

capture = cv2.VideoCapture('test.mp4')  # 读取视频
colors = [tuple(255 * np.random.rand(3)) for i in range(10)]  # 随机创建10中颜色,RGB形式
# 当视频打开时,进行处理
while capture.isOpened():
    stime = time.time()  # 计算起始时间
    ret, frame = capture.read(
    )  # 读取每一帧,第一个参数是bool型的ret,其值为True或False,代表有没有读到图片,第二个参数是当前帧图像
    if ret:
        results = tfnet.return_predict(frame)  # 送入网络进行预测
        # 将 colors results 进行打包
        for color, result in zip(colors, results):
            tl = (result['topleft']['x'], result['topleft']['y'])
            br = (result['bottomright']['x'], result['bottomright']['y'])
            label = result['label']
            frame = cv2.rectangle(frame, tl, br, color, 3)
            frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1,
                                color, 1)
        cv2.imshow('frame', frame)  # 显示当前帧
        print('FPS {:.1f}'.format(1 / (time.time() - stime)))  # 计算帧率
        # 按 q 键退出
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        capture.release()
Example #22
0
from darkflow.net.build import TFNet
import argparse

# the cfg file and weights location change this thing according to your model location
model = {"model": "cfg/yolov2.cfg", "load": "yolov2.weights", "threshold": 0.4}

# creating the object
tfnet = TFNet(model)

# to get the image path
parser = argparse.ArgumentParser()
parser.add_argument('--img', type=str, help='path of the image')
arg = parser.parse_args()

imgcv = cv2.imread(arg.img)  # read the image
result = tfnet.return_predict(
    imgcv)  # predict the classes and cordinates of the oject

# This is for draw the bounding box around the predicted classes
tl = []
br = []
labels = []
for i in range(len(result)):
    topleft = (
        result[i]['topleft']['x'], result[i]['topleft']['y']
    )  # to get the labels from the predicted class ,it's in the form of dictionary
    bottomright = (result[i]['bottomright']['x'],
                   result[i]['bottomright']['y'])
    label = (result[i]['label'])
    st = result[i]['topleft']['x']
    nd = result[i]['bottomright']['x']
    mid_x = (nd - st) // 2 + st  # mid point of the top box line
Example #23
0
        return dx * dy


options = {"model": "cfg\\yolo_custom.cfg", "load": -1, "gpu": 0.1}
tfnet2 = TFNet(options)
tfnet2.load_from_ckpt()
# Change the path according to the windows directory
# Don't forget to keep \\ as separator in windows path
# See whether all images are of jpg extension
# Keep images and annotations in separate folders
test_image_paths = glob(
    os.path.join('/home/falcon01/Desktop/test/images/', '*.jpg'))
for image_path in test_image_paths:
    original_img = cv2.imread(image_path)
    original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
    results = tfnet2.return_predict(original_img)
    image_name = os.path.basename(image_path)
    xml_file_name = image_name[:-4] + '.xml'
    # Change the path according to requirement
    xml_file = open(
        os.path.join(os.path.dirname(image_path),
                     '../annotations/{}'.format(xml_file_name)))

    pred_rectcoord = predicted_rectangle(results)
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for obj in root.iter('object'):
        xml_box = obj.find('bndbox')
        xmin = int(float(xml_box.find('xmin').text))
        ymin = int(float(xml_box.find('ymin').text))
        xmax = int(float(xml_box.find('xmax').text))
class ObjectCountingAPI:
    def __init__(self, options):
        self.options = options
        self.tfnet = TFNet(options)

    def _write_quantities(self, frame, labels_quantities_dic):
        for i, (label, quantity) in enumerate(labels_quantities_dic.items()):
            class_id = [
                i for i, x in enumerate(labels_quantities_dic.keys())
                if x == label
            ][0]
            color = [int(c) for c in COLORS[class_id % len(COLORS)]]

            cv2.putText(
                frame,
                f"{label}: {quantity}",
                (10, (i + 1) * 35),
                OBJECTS_ON_FRAME_COUNTER_FONT,
                OBJECTS_ON_FRAME_COUNTER_FONT_SIZE,
                color,
                2,
                cv2.FONT_HERSHEY_SIMPLEX,
            )

    def _draw_detection_results(self, frame, results, labels_quantities_dic):
        for start_point, end_point, label, confidence in results:
            x1, y1 = start_point

            class_id = [
                i for i, x in enumerate(labels_quantities_dic.keys())
                if x == label
            ][0]

            color = [int(c) for c in COLORS[class_id % len(COLORS)]]

            cv2.rectangle(frame, start_point, end_point, color,
                          DETECTION_FRAME_THICKNESS)

            cv2.putText(frame, label, (x1, y1 - 5),
                        OBJECTS_ON_FRAME_COUNTER_FONT,
                        OBJECTS_ON_FRAME_COUNTER_FONT_SIZE, color, 2)

    def _convert_detections_into_list_of_tuples_and_count_quantity_of_each_label(
            self, objects):
        labels_quantities_dic = {}
        results = []

        for object in objects:
            x1, y1 = object["topleft"]["x"], object["topleft"]["y"]
            x2, y2 = object["bottomright"]["x"], object["bottomright"]["y"]
            confidence = object["confidence"]
            label = object["label"]

            try:
                labels_quantities_dic[label] += 1
            except KeyError:
                labels_quantities_dic[label] = 1

            start_point = (x1, y1)
            end_point = (x2, y2)

            results.append((start_point, end_point, label, confidence))
        return results, labels_quantities_dic

    def count_objects_on_image(self,
                               frame,
                               targeted_classes=[],
                               output_path="count_people_output.jpg",
                               show=False):
        objects = self.tfnet.return_predict(frame)

        if targeted_classes:
            objects = list(
                filter(lambda res: res["label"] in targeted_classes, objects))

        results, labels_quantities_dic = self._convert_detections_into_list_of_tuples_and_count_quantity_of_each_label(
            objects)

        self._draw_detection_results(frame, results, labels_quantities_dic)

        self._write_quantities(frame, labels_quantities_dic)

        if show:
            cv2.imshow("frame", frame)
            cv2.waitKey()
            cv2.destroyAllWindows()

        cv2.imwrite(output_path, frame)

        # return frame, objects

    def count_objects_on_video(self,
                               cap,
                               targeted_classes=[],
                               output_path="the_output.avi",
                               show=False):
        ret, frame = cap.read()

        fps, height, width = get_output_fps_height_and_width(cap)

        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        output_movie = cv2.VideoWriter(output_path, fourcc, fps,
                                       (width, height))

        while ret:
            objects = self.tfnet.return_predict(frame)

            if targeted_classes:
                objects = list(
                    filter(lambda res: res["label"] in targeted_classes,
                           objects))

            results, labels_quantities_dic = self._convert_detections_into_list_of_tuples_and_count_quantity_of_each_label(
                objects)

            self._draw_detection_results(frame, results, labels_quantities_dic)

            self._write_quantities(frame, labels_quantities_dic)

            output_movie.write(frame)

            if show:
                cv2.imshow('frame', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            ret, frame = cap.read()

        cap.release()
        cv2.destroyAllWindows()

    def count_objects_crossing_the_virtual_line(self,
                                                cap,
                                                line_begin,
                                                line_end,
                                                targeted_classes=[],
                                                output_path="the_output.avi",
                                                show=False):

        ret, frame = cap.read()

        fps, height, width = get_output_fps_height_and_width(cap)
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        output_movie = cv2.VideoWriter(output_path, fourcc, fps,
                                       (width, height))

        tracker = Sort()
        memory = {}

        line = [line_begin, line_end]
        counter = 0

        while ret:

            objects = self.tfnet.return_predict(frame)

            if targeted_classes:
                objects = list(
                    filter(lambda res: res["label"] in targeted_classes,
                           objects))

            results, _ = self._convert_detections_into_list_of_tuples_and_count_quantity_of_each_label(
                objects)

            # convert to format required for dets [x1, y1, x2, y2, confidence]
            dets = [[*start_point, *end_point]
                    for (start_point, end_point, label, confidence) in results]

            np.set_printoptions(
                formatter={'float': lambda x: "{0:0.3f}".format(100)})
            dets = np.asarray(dets)
            tracks = tracker.update(dets)

            boxes = []
            indexIDs = []
            previous = memory.copy()
            memory = {}

            for track in tracks:
                boxes.append([track[0], track[1], track[2], track[3]])
                indexIDs.append(int(track[4]))
                memory[indexIDs[-1]] = boxes[-1]

            if len(boxes) > 0:
                i = int(0)
                for box in boxes:
                    (x, y) = (int(box[0]), int(box[1]))
                    (w, h) = (int(box[2]), int(box[3]))

                    color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]
                    cv2.rectangle(frame, (x, y), (w, h), color,
                                  DETECTION_FRAME_THICKNESS)

                    if indexIDs[i] in previous:
                        previous_box = previous[indexIDs[i]]
                        (x2, y2) = (int(previous_box[0]), int(previous_box[1]))
                        (w2, h2) = (int(previous_box[2]), int(previous_box[3]))
                        p0 = (int(x + (w - x) / 2), int(y + (h - y) / 2))
                        p1 = (int(x2 + (w2 - x2) / 2), int(y2 + (h2 - y2) / 2))
                        cv2.line(frame, p0, p1, color, 3)

                        if intersect(p0, p1, line[0], line[1]):
                            counter += 1

                    text = "{}".format(indexIDs[i])
                    cv2.putText(frame, text, (x, y - 5),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
                    i += 1

            cv2.line(frame, line[0], line[1], LINE_COLOR, LINE_THICKNESS)

            cv2.putText(frame, str(counter), LINE_COUNTER_POSITION,
                        LINE_COUNTER_FONT, LINE_COUNTER_FONT_SIZE, LINE_COLOR,
                        2)

            output_movie.write(frame)

            if show:
                cv2.imshow('frame', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

            ret, frame = cap.read()

        cap.release()
        cv2.destroyAllWindows()
Example #25
0
    'threshold': 0.6,
    'gpu': 1.0
}

tfnet = TFNet(option)

capture = cv2.VideoCapture(
    'aot-id-card.mp4'
)  #reading the video from current directory, in which that .py file is present.
color = [tuple(255 * np.random.rand(3)) for i in range(5)]

while (capture.isOpened()):  #capture is opened
    stime = time.time()  #calculating the frame reading time.
    ret, frame = capture.read(
    )  #A frame is read from capture. ret holds frame read or not if read then ret=True else ret=False
    result = tfnet.return_predict(frame)  #predict the frame
    if ret:  #if ret=True
        #all below is same as image processing...
        for colr, reslt in zip(color, result):
            t1 = (reslt['topleft']['x'], reslt['topleft']['y'])
            t2 = (reslt['bottomright']['x'], reslt['bottomright']['y'])

            label = reslt['label']
            confidence = reslt['confidence']

            label = '{} : {:0.2f}%'.format(label, confidence * 100)

            frame = cv2.rectangle(frame, t1, t2, colr, 3)
            frame = cv2.putText(frame, label, t1, cv2.FONT_HERSHEY_COMPLEX, 1,
                                colr, 2)
        cv2.imshow('frame', frame)
def yolo_in_R(myDir, imageName, threshold):

    myDir = myDir
    imageName = imageName

    import os
    os.chdir(myDir)

    # import libraries
    from darkflow.net.build import TFNet
    import cv2
    import matplotlib.pyplot as plt
    import matplotlib.image as mpimg

    # options for tiny-yolo-voc-1class (trained model)
    options = {
        'model': 'cfg/tiny-yolo-voc-1class_toh.cfg',
        'load': 1000,
        'threshold': threshold,
        'gpu': 1.0
    }

    tfnet = TFNet(options)

    # create image object
    # global img
    # img = None
    img = cv2.imread("Downloaded_images/" + imageName + ".jpg",
                     cv2.IMREAD_COLOR)
    # img = cv2.imread("sample_img/sample_toh.jpg", cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # plt.imshow(img)
    # plt.show()

    img
    img.shape

    # predict objects in image
    # result = None
    result = tfnet.return_predict(img)
    result

    # set up bounding boxes
    tl = (int(result[0]['topleft']['x']), int(result[0]['topleft']['y']))
    br = (int(result[0]['bottomright']['x']),
          int(result[0]['bottomright']['y']))
    label = result[0]['label']

    # open CV commands
    img = cv2.rectangle(img, tl, br, (0, 255, 0), 5)
    # add label to bounding box
    img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 253, 0),
                      2)

    # display image with bounding box
    # plt.imshow(img)
    # plt.show()

    # save image
    mpimg.imsave("Labeled_images/" + imageName + "_labeled.jpg", img)
options = {
    'model' : 'cfg/yolov2-tiny-voc-8c.cfg',
    'load' : 21750,
    'threshold': 0.2,
    'gpu' : 0.8
}

tf = TFNet(options)
img_dir = 'check_img/'
img_name = 'strawberry0'
img_type = '.jpg'
img = cv2.imread(img_dir+img_name+img_type,cv2.IMREAD_COLOR)



result = tf.return_predict(img)

res_len = len(result)

for i in range(res_len):
	tl = (result[i]['topleft']['x'], result[i]['topleft']['y'])
	br = (result[i]['bottomright']['x'], result[i]['bottomright']['y'])
	label = result[i]['label']

	img = cv2.rectangle(img, tl, br, (0, 255, 0), 4)
	img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)

cv2.imwrite(img_dir+img_name+'_detected'+img_type,img)

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
Example #28
0
while (cap1.isOpened() and cap2.isOpened()):
    ret, frame = cap1.read()
    ret2, frame2 = cap2.read()

    #Extract frame
    try:
        framed = cv2.resize(frame, (960, 540))
        framed2 = cv2.resize(frame2, (960, 540))

        cv2.imshow('frame', framed)
        cv2.imshow('frame2', framed2)

        #Only run every 15 frames for efficiency
        if counter % 15 == 0:
            try:
                predictions = yoloPlate.return_predict(frame)
                firstCropImg = firstCrop(frame, predictions)
                secondCropImg = secondCrop(firstCropImg)
                cv2.imshow('Second crop plate', secondCropImg)
                secondCropImg = cv2.cvtColor(secondCropImg, cv2.COLOR_BGR2GRAY)

                if not (os.path.exists(dirname + '/results')):
                    os.mkdir(dirname + '/results')

                cv2.imwrite(
                    dirname + '/results/result' + str(plateNum) + '.jpg',
                    secondCropImg)
                rtnJSON = ('{"source": "./results/result' + str(plateNum) +
                           '.jpg", "camera": "1" , "time": "' +
                           str(datetime.datetime.now()) + '"}')
                print(rtnJSON)
class DogDetector():
    def __init__(self, cfg='yolo', weights='yolov2', threshold=0.3):
        model_path = os.path.join(root_path, 'cfg/' + cfg + '.cfg')
        load_path = os.path.join(root_path, 'weights/' + weights + '.weights')
        self.options = {
            'model': model_path,
            'load': load_path,
            'threshold': threshold,
            'gpu': 1.0,
            'savepb': True
        }
        print(self.options['model'], self.options['load'])
        self.tfnet = TFNet(self.options)

    def detectsOneDog(self, img):
        result = self.tfnet.return_predict(img)
        dog_list = []
        for res in result:
            if res['label'] == 'dog':
                dog_list.append(res)
        if len(dog_list) == 0:
            return False
        else:
            print(dog_list)
            temp_area = 0
            index = 0
            for i in range(len(dog_list)):
                rect = self.getDogRect(dog_list[i], img)
                area = rect[2] * rect[3]
                if area > temp_area:
                    temp_area = area
                    index = i

            print(dog_list[index])
            return dog_list[index]

    def detectDogHead(self, img):
        detector = dlib.simple_object_detector(
            os.path.join(root_path, "data/dog_detector.svm"))
        dets = detector(img)

        if (len(dets) == 0):
            print("Can't detect Head")
            return False
        else:
            dets_pop = dets.pop()
        return dets_pop

    def isLeft(self, dog_rect, head_rect):
        if (head_rect[0] + head_rect[2] > dog_rect[0] + dog_rect[2]):
            return False
        else:
            return True

    def getDogRect(self, result, originalImg):
        tl = result['topleft']
        br = result['bottomright']

        width = br['x'] - tl['x']
        height = br['y'] - tl['y']
        error_pixel = round(max(width, height) / 4)
        width += error_pixel
        height += error_pixel
        x = max(tl['x'] - round(error_pixel / 2), 1)
        y = max(tl['y'] - round(error_pixel / 2), 1)

        imgHeight, imgWidth = originalImg.shape[:2]

        if (x + width) >= imgWidth:
            width = imgWidth - x - 1
        if (y + height) >= imgHeight:
            height = imgHeight - y - 1
        return (x, y, width, height)

    def getDogPartRect(self, result, originalImg):
        t = result.top()
        l = result.left()
        b = result.bottom()
        r = result.right()

        print(t, l, b, r)

        width = r - l
        height = b - t
        error_pixel = round(max(width, height) / 4)
        width += error_pixel
        height += error_pixel
        x = max(l - round(error_pixel / 2), 0)
        y = max(t - round(error_pixel / 2), 0)

        imgHeight, imgWidth = originalImg.shape[:2]

        if (x + width) >= imgWidth:
            width = imgWidth - x - 1
        if (y + height) >= imgHeight:
            height = imgHeight - y - 1
        return (x, y, width, height)
Example #30
0
class FoodLabeller(Labeller):

    def __init__(self, env_spec, policy, options=None, darkflow_path=None, im_is_rgb=False, **kwargs):
        super(FoodLabeller, self).__init__(env_spec, policy)

        if options is None:
            options = {"model": "cfg/yolo.cfg", "load": "bin/yolov2.weights", "threshold": 1.e-2, "gpu": True}
        if darkflow_path is None:
            darkflow_path = os.path.join(str(Path.home()), 'darkflow')
        old_cwd = os.getcwd()
        os.chdir(darkflow_path)
        self._tfnet = TFNet(options)
        os.chdir(old_cwd)
        self._labels = []
        for key in self._env_spec.goal_spec:
            if key[-5:] == '_diff':
                self._labels.append(key[:-5])
        self._im_is_rgb = im_is_rgb

    def label(self, observations, curr_goals):
        obs_ims = observations[0]
        goals = []
        for obs_im, curr_goal in zip(obs_ims, curr_goals):
            if self._im_is_rgb:
                obs_im = obs_im[..., ::-1] # make bgr
            im_height, im_width, _ = obs_im.shape

            results = self._tfnet.return_predict(obs_im)
            boxes = self._get_boxes(results)
            goal = self._get_goal(boxes, curr_goal, im_height, im_width)
            goals.append(goal)
        return goals

    def _get_boxes(self, results):
        boxes = []
        for result in results:
            if result['label'] in self._labels:
                boxes.append(result)
        return boxes

    def _get_goal(self, boxes, curr_goal, im_height, im_width):
        # goal is weight of each goal and then desired center pixel
        goals = {}
        min_areas = {}
        for label in self._labels:
            goals[label] = (0., 0.) # (in image, normalized diff to middle)
            min_areas[label] = np.inf

        im_mid = 0.5 * (im_width - 1.)

        for box in boxes:
            label = box['label']
            br_x = box['bottomright']['x']
            br_y = box['bottomright']['y']
            tl_x = box['topleft']['x']
            tl_y = box['topleft']['y']
            mid_point = 0.5 * np.array((br_x + tl_x, br_y + tl_y))
            assert(mid_point[0] >= 0 and mid_point[0] < im_width)
            assert(mid_point[1] >= 0 and mid_point[1] < im_height)
            area = (tl_x - br_x) * (tl_y - br_y)
            if area < min_areas[label]:
                min_areas[label] = area
                norm_diff = (im_mid - mid_point[0]) / im_mid
                goals[label] = (1., norm_diff)  

        goal_keys = list(self._env_spec.goal_spec.keys())
        goal = np.array(curr_goal)

        for label in self._labels:
            idx = min([idx for idx, k in enumerate(goal_keys) if label in k])
            goal[idx:idx+2] = goals[label] # NOTE: assumes in image, normalized diff to middle

        return goal
Example #31
0
FRAME_HEIGHT = 480  #240

if __name__ == '__main__':

    camera = cv2.VideoCapture(0)
    print(camera.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH))
    print(camera.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT))

    tfnet = TFNet(options)

    #imgcv = cv2.imread("/Users/marco/Documents/Datasets/drAIver/object_detector/test_images/image1.jpg")

    #print(results)

    while True:
        if camera.isOpened():
            _, imgcv = camera.read()

            results = tfnet.return_predict(imgcv)

            for result in results:
                if result['confidence'] > 0.5:
                    cv2.rectangle(
                        imgcv,
                        (result['topleft']['x'], result['topleft']['y']),
                        (result['bottomright']['x'],
                         result['bottomright']['y']), (255, 0, 0))

                cv2.imshow("Image", imgcv)
                cv2.waitKey(1)
Example #32
0
#for feed2
tl2 = ()
bl2 = ()
fixed_top_left2 = ()
fixed_bottom_right2 = ()
flag2 = 0
time_flag2 = 0

while True:
    #start_time=time.time()
    ret1, frame1 = capture_1.read()
    ret2, frame2 = capture_2.read()
    countbottle1 = 0
    countbottle2 = 0
    if ret1 and ret2:
        results1 = tfnet.return_predict(frame1)
        results2 = tfnet.return_predict(frame2)
        for color, result in zip(colors, results1):
            tl = (result['topleft']['x'], result['topleft']['y'])
            br = (result['bottomright']['x'], result['bottomright']['y'])
            #the keys in result dictionary are:
            #[label], [confidence], [topleft][x or y], [bottomright][x or y]
            label = result['label']
            confidence = result['confidence']
            if label == 'bottle':
                time_flag = 0
                text = '{}: {:.0f}%'.format(label, confidence * 100)
                frame1 = cv2.rectangle(frame1, tl, br, color, 5)
                frame1 = cv2.putText(frame1, text, tl,
                                     cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2)
                btl_1 = tl
Example #33
0
    img_origin = cv2.imread('bin/yrm_origin.jpg')  # 无弹幕
    img_dm = cv2.imread('bin/yrm_dm.jpg')  # 有弹幕
    if img_origin is None or img_dm is None:
        print('img_origin is None or img_dm is None')
        exit(0)

    options = {
        'model': 'cfg/yolo.cfg',
        'load': 'bin/yolo.weights',
        'threshold': 0.5,
        'gpu': 0.7,
        # 'imgdir': 'sample_img/wa-yolo/',
    }
    tfnet = TFNet(options)

    boxes = tfnet.return_predict(img_origin)

    if len(boxes) == 0:
        print('there is no obj')
        exit(0)

    for result in boxes:
        if result['label'] != 'person':
            continue
        # 若此box为人
        (tx, ty) = (result['topleft']['x'], result['topleft']['y'])
        (bx, by) = (result['bottomright']['x'], result['bottomright']['y'])
        img_dm[ty:by, tx:bx] = img_origin[ty:by, tx:bx] // 4 * 3 + img_dm[ty:by, tx:bx] // 4

    while (1):
        cv2.imshow('image', img_dm)
class SubscribeImage(object):
	def __init__(self):

 		self.options = {"model": "cfg/tiny-yolo-voc.cfg", "load": "bin/tiny-yolo-voc.weights", "threshold": 0.1, "gpu": 0.2}
#		self.options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1, "gpu": 0.8}
		self.tfnet = TFNet(self.options)
		self.class_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle',
							'bus', 'car', 'cat', 'chair', 'cow', 'diningtable',
							'dog', 'horse', 'motorbike', 'person', 'pottedplant',
							'sheep', 'sofa', 'train', 'tvmonitor']

		self.num_classes = len(self.class_names)

		# 色リストの作成
		self.hsv_tuples = [(x / 80, 1., 1.) for x in range(80)]
		self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), self.hsv_tuples))
		self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),self.colors))
		random.seed(10101)  # Fixed seed for consistent colors across runs.
		random.shuffle(self.colors)  # Shuffle colors to decorrelate adjacent classes.
		random.seed(None)  # Reset seed to default.
	
		self.periods = []
		self.count = 0
		self.class_num = 0

		rospy.init_node('kinect_v1_image_sub')
		self.bridge = CvBridge()
		rospy.Subscriber('/camera/rgb/image_color', Image, self.callback)
		rospy.spin()


	def callback(self, data):

		try:
			cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
		except CvBridgeError as e:
			print(e)

		(rows,cols,channels) = cv_image.shape
		if cols > 60 and rows > 60 :
			cv2.circle(cv_image, (50,50), 10, 255)

		cv2.imshow("Image window", cv_image)
		self.DarkflowRos(cv_image)


	def DarkflowRos(self, frame):
		# 検出
		self.start = time.time()
		self.items = self.tfnet.return_predict(frame)
		self.count += 1
		self.period = time.time() - self.start
		if self.count % 30 == 0:
			print('FrameRate:' + str(1.0 / (sum(self.periods)/self.count)))

		self.periods.append(self.period)
		for item in self.items:
			self.tlx = item['topleft']['x']
			self.tly = item['topleft']['y']
			self.brx = item['bottomright']['x']
			self.bry = item['bottomright']['y']
			self.label = item['label']
			self.conf = item['confidence']

		# 自信のあるものを表示
		if self.conf > 0.4:
			for i in self.class_names:
				if self.label == i:
					self.class_num = self.class_names.index(i)
					break

		# 検出位置の表示
		cv2.rectangle(frame, (self.tlx, self.tly), (self.brx, self.bry), self.colors[self.class_num], 2)
		self.text = self.label + " " + ('%.2f' % self.conf)
		cv2.putText(frame, self.text, (self.tlx+10, self.tly-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, self.colors[self.class_num], 2)

		# 表示
		cv2.imshow("View", frame)

        k = cv2.waitKey(10)
        if k == ord('q'):
        	cv2.destroyAllWindows()