def test_TRAIN_FROM_WEIGHTS_CLI__LOAD_CHECKPOINT_RETURNPREDICT_YOLOv2(): #Test training using pre-generated weights for tiny-yolo-voc #NOTE: This test verifies that the code executes properly, and that the expected checkpoint file (tiny-yolo-voc-20.meta in this case) is generated. # In addition, predictions are generated using the checkpoint file to verify that training completed successfully. testString = "flow --model {0} --load {1} --train --dataset {2} --annotation {3} --epoch 20".format(tiny_yolo_voc_CfgPath, tiny_yolo_voc_WeightPath, os.path.join(buildPath, "test", "training", "images"), os.path.join(buildPath, "test", "training", "annotations")) with pytest.raises(SystemExit): executeCLI(testString) checkpointPath = os.path.join(buildPath, "ckpt", "tiny-yolo-voc-20.meta") assert os.path.exists(checkpointPath), "Expected output checkpoint file: {0} was not found.".format(checkpointPath) #Using trained weights options = {"model": tiny_yolo_voc_CfgPath, "load": 20, "config": generalConfigPath, "threshold": 0.1} tfnet = TFNet(options) #Make sure predictions very roughly match the expected values for image with bike and person imgcv = cv2.imread(trainImgBikePerson["path"]) loadedPredictions = tfnet.return_predict(imgcv) assert compareObjectData(trainImgBikePerson["expected-objects"]["tiny-yolo-voc"], loadedPredictions, trainImgBikePerson["width"], trainImgBikePerson["height"], 0.7, 0.25), "Generated object predictions from training (for image with person on the bike) were not anywhere close to what they are expected to be.\nTraining may not have completed successfully." differentThanExpectedBike = compareObjectData(trainImgBikePerson["expected-objects"]["tiny-yolo-voc"], loadedPredictions, trainImgBikePerson["width"], trainImgBikePerson["height"], 0.01, 0.001) #Make sure predictions very roughly match the expected values for image with horse and person imgcv = cv2.imread(trainImgHorsePerson["path"]) loadedPredictions = tfnet.return_predict(imgcv) assert compareObjectData(trainImgHorsePerson["expected-objects"]["tiny-yolo-voc"], loadedPredictions, trainImgHorsePerson["width"], trainImgHorsePerson["height"], 0.7, 0.25), "Generated object predictions from training (for image with person on the horse) were not anywhere close to what they are expected to be.\nTraining may not have completed successfully." differentThanExpectedHorse = compareObjectData(trainImgHorsePerson["expected-objects"]["tiny-yolo-voc"], loadedPredictions, trainImgHorsePerson["width"], trainImgHorsePerson["height"], 0.01, 0.001) assert not (differentThanExpectedBike and differentThanExpectedHorse), "The generated object predictions for both images appear to be exactly the same as the ones generated with the original weights.\nTraining may not have completed successfully.\n\nNOTE: It is possible this is a fluke error and training did complete properly (try running this build again to confirm) - but most likely something is wrong."
def test_RETURNPREDICT_PBLOAD_YOLOv2(): #Test the .pb and .meta files generated in the previous step #NOTE: This test verifies that the code executes properly, and the .pb and .meta files that were created are able to be loaded and used for inference. # The predictions that are generated will be compared against expected predictions. options = {"pbLoad": pbPath, "metaLoad": metaPath, "threshold": 0.4} tfnet = TFNet(options) imgcv = cv2.imread(testImg["path"]) loadedPredictions = tfnet.return_predict(imgcv) assert compareObjectData(testImg["expected-objects"]["yolo"], loadedPredictions, testImg["width"], testImg["height"], threshCompareThreshold, posCompareThreshold), "Generated object predictions from return_predict() were not within margin of error compared to expected values."
def run(self): option = { # 'model': 'cfg/tiny-yolo-voc-1c.cfg', # 'load': 2375, 'pbLoad': 'built_graph/tiny-yolo-voc-1c.pb', 'metaLoad': 'built_graph/tiny-yolo-voc-1c.meta', 'threshold': 0.5, 'gpu': 0.7 } tfnet = TFNet(option) avg = 0 divider = 700 #horizontal line after which we count the bolls checkedbolls = 0 #initialized check boll index refinedunique_bolls = [] colors = [tuple(np.random.randint(255, size=3)) for i in range(10000) ] #make multiple colors to display bolls and tracklets #out = cv.VideoWriter('video_'+self.videoname+'.mp4',fourcc, 12.0, (1280,720)) bollcnt = 0 while True: stime = time.time() _ret, frame = self.cam.read() if _ret == False: print("Bad frame, video ended") # out.release() exit(1) frame_gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY) vis = frame.copy() results = tfnet.return_predict(frame) cur_match = [] print("Image # " + str(self.frame_idx)) # print(len(results)) ## refinedBoxes = non_max_suppression_slow(results, 0.4) ## overlap # refinedBoxes = self.non_max_suppression(results, 0.55) #print(len(refinedBoxes)) check_mask = 0 for i, (color, result) in enumerate(zip(colors, results)): # if i in refinedBoxes: cur_match.append( (result['topleft']['x'], result['topleft']['y'], result['bottomright']['x'], result['bottomright']['y'])) ##color segmentation to get undetected bolls #----------Bgr---------- lower = np.array([190, 190, 150], dtype="uint16") upper = np.array([255, 255, 255], dtype="uint16") mask = cv.inRange(frame, lower, upper) kernel = np.ones((4, 4), np.uint8) mask = cv.erode(mask, kernel, iterations=2) mask = cv.dilate(mask, kernel, iterations=5) #check_mask = 1 output = cv.bitwise_and(frame, frame, mask=mask) fg_mask = cv.cvtColor(output, cv.COLOR_BGR2GRAY) #if check_mask == 0: # continue # for (i, match) in enumerate(cur_match): # xi, yi, xj, yj = match # fg_mask_ = cv.rectangle(fg_mask, (xi, yi), (xj+1, yj+1), (0,0,0),-1,8) if imutils.is_cv2(): (contours, hier) = cv.findContours(fg_mask, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE) # check to see if we are using OpenCV 3 elif imutils.is_cv3(): (_, contours, hier) = cv.findContours(fg_mask, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE) matches_ = [] for cnt in contours: if (80 < cv.contourArea(cnt)): cv.drawContours(fg_mask, [cnt], 0, 255, -1) (x, y, w, h) = cv.boundingRect(cnt) centroid = self.get_centroid(x, y, w, h) matches_.append(((x, y, x + w - 1, y + h - 1), centroid, cv.contourArea(cnt))) ##check the false positives of the YOLOv2 refinedMatches_ = self.non_max_suppression_no_dict(matches_, 0.40) for (i, match) in enumerate(matches_): contour, centroid, contour_area = match x, y, xw, yh = contour w = xw - x h = yh - y cx = x + w / 2 cy = y + h / 2 yolocovered = False res = False for j, (result) in enumerate(zip(results)): xi = result[0]['topleft']['x'] yi = result[0]['topleft']['y'] xj = result[0]['bottomright']['x'] yj = result[0]['bottomright']['y'] res = True # if j in refinedBoxes: if (xi <= cx and yi <= cy and xj >= cx and yj >= cy): yolocovered = True break if not (yolocovered) and (res): if i in refinedMatches_: # frame = cv.rectangle(frame, (x, y), (xw-1, yh-1), (255,0,0), 3) cur_match.append((x, y, xw - 1, yh - 1)) vis = frame.copy() if len(self.tracks) > 0: img0, img1 = self.prev_gray, frame_gray p0 = np.float32([tr[-1] for tr in self.tracks]).reshape(-1, 1, 2) p1, _st, _err = cv.calcOpticalFlowPyrLK( img0, img1, p0, None, **lk_params) p0r, _st, _err = cv.calcOpticalFlowPyrLK( img1, img0, p1, None, **lk_params) d = abs(p0 - p0r).reshape(-1, 2).max(-1) good = d < 1 new_tracks = [] new_bolls = [] new_counts = [] pts_src = p0.reshape(-1, 2) pts_dst = p1.reshape(-1, 2) H, status = cv.findHomography(pts_src, pts_dst) present_bolls = [] for boll_bbox in zip(cur_match): for k, (tr, bo, cnt, (x, y), good_flag) in enumerate( zip(self.tracks, self.bolls, self.count_bolls, p1.reshape(-1, 2), good)): if not good_flag: continue xi, yi, xj, yj = boll_bbox[0] area = (xj - xi + 1) * (yj - yi + 1) cx, cy = self.get_bbox_centre(xi, yi, xj, yj) xi = cx - 50 yi = cy - 50 xj = cx + 50 yj = cy + 50 if (xi <= x and yi <= y and xj >= x and yj >= y): xii, yii, xjj, yjj, area2 = bo[ -1] # get previous bolls x0, y0 = tr[-1] #get previos tracklet head if k not in present_bolls: tr.append((x, y)) bo.append((xi, yi, xj, yj, area)) cnt.append((cnt[-1])) if len(tr) > self.track_len: del tr[0] del bo[0] del cnt[0] new_tracks.append(tr) new_bolls.append(bo) present_bolls.append(k) new_counts.append(cnt) cv.circle(vis, (x, y), 2, (0, 255, 0), -1) break #restore the lost tracklets that are greater than 200 and less than 700 in vertical pixel position new_tracklets = [] update_tracklets = [] tracklets = [] for k, (tr, bo, cnt) in enumerate( zip(self.tracks, self.bolls, self.count_bolls)): if len(tr) > 2: xii, yii, xjj, yjj, area2 = bo[ -1] # get previous boll bbox position x0, y0 = tr[-1] #get previos tracklet head position for ki, (tri, boo, cntt) in enumerate( zip(self.tracks, self.bolls, self.count_bolls)): # if ki not in present_bolls: xiii, yiii, xjjj, yjjj, area3 = boo[ -1] # get previous boll bbox position if len(tri) > 0: x0i, y0i = tri[ -1] #get previos tracklet head position #get newer points only if (len(boo) <= 2 and xii <= x0i and yii <= y0i and xjj >= x0i and yjj >= y0i): tracklets.append((x0i, y0i)) update_tracklets.append(k) new_tracklets.append(ki) break for k, (tr, bo, cnt) in enumerate( zip(self.tracks, self.bolls, self.count_bolls)): # if k in update_tracklets: # x0i, y0i = tracklets[update_tracklets.index(k)] # self.tracks[k][-1] = (x0i, y0i) if len(tr) > 0 and k not in present_bolls: xii, yii, xjj, yjj, area2 = bo[ -1] # get previous boll bbox position x0, y0 = tr[-1] #get previos tracklet head position if (len(tr) > 7 and y0 >= 360 and y0 <= 700 and len(present_bolls) > 0): ##check if the tracklet is old NOT new dst_pts_1 = (x0, y0, 1) dst_pts_2 = (xii, yii, 1) dst_pts_3 = (xjj, yjj, 1) src_pts_1 = np.matmul(H, dst_pts_1) src_pts_2 = np.matmul(H, dst_pts_2) src_pts_3 = np.matmul(H, dst_pts_3) x = np.int32(src_pts_1[0] / src_pts_1[2]) y = np.int32(src_pts_1[1] / src_pts_1[2]) xi = np.int32(src_pts_2[0] / src_pts_2[2]) yi = np.int32(src_pts_2[1] / src_pts_2[2]) xj = np.int32(src_pts_3[0] / src_pts_3[2]) yj = np.int32(src_pts_3[1] / src_pts_3[2]) tr.append((x, y)) bo.append((xi, yi, xj, yj, area)) cnt.append((cnt[-1])) if (len(tr) > self.track_len): del tr[0] del bo[0] del cnt[0] new_tracks.append(tr) new_bolls.append(bo) new_counts.append(cnt) cv.circle(vis, (x, y), 2, (0, 255, 0), -1) if (k in new_tracklets): del tr[-1] del bo[-1] del cnt[-1] self.tracks = new_tracks self.bolls = new_bolls self.count_bolls = new_counts for j, (tr, bo, cnt) in enumerate( zip(self.tracks, self.bolls, self.count_bolls)): k = cnt[-1] b, g, r = colors[k] b = int(b) g = int(g) r = int(r) if (len(tr) > 1): cv.polylines(vis, [np.int32(tr)], False, (b, g, r), 2) xc1, yc1 = np.int32(tr[-1]) xc2, yc2 = np.int32(tr[-2]) if (yc2 <= divider and yc1 > divider and len(tr) > 7): self.boll_count = self.boll_count + 1 xi, yi, xj, yj, area = np.int32(bo[-1]) cx, cy = self.get_bbox_centre(xi, yi, xj, yj) # text = '{}'.format(cnt[-1]) vis = cv.rectangle(vis, (xi, yi), (xj, yj), (b, g, r), 2) # vis = cv.putText(vis, text, (cx, cy), cv.FONT_HERSHEY_COMPLEX, 1, (160, 0, 200), 2) color if self.frame_idx % self.detect_interval == 0: mask = np.zeros_like(frame_gray) mask[:] = 255 for x, y in [np.int32(tr[-1]) for tr in self.tracks]: cv.circle(mask, (x, y), 5, 0, -1) p = cv.goodFeaturesToTrack(frame_gray, mask=mask, **feature_params) print("--------------------------------------------------") if p is not None: for boll_bbox in zip(cur_match): for x, y in np.float32(p).reshape(-1, 2): xi, yi, xj, yj = boll_bbox[0] area = (xj - xi + 1) * (yj - yi + 1) #if (x,y) not in self.tracks: cx, cy = self.get_bbox_centre(xi, yi, xj, yj) xi = cx - 50 yi = cy - 50 xj = cx + 50 yj = cy + 50 # if (x,y) not in self.tracks: if (xi <= x and yi <= y and xj >= x and yj >= y): self.count_bolls.append([(bollcnt)]) self.tracks.append([(x, y)]) self.bolls.append([(xi, yi, xj, yj, area)]) bollcnt = bollcnt + 1 break cv.line(vis, (0, 360), (1280, 360), (245, 8, 0), thickness=2, lineType=8) cv.line(vis, (0, 700), (1280, 700), (10, 255, 90), thickness=2, lineType=8) draw_str(vis, (20, 20), 'track count: %d' % self.boll_count) avg = (avg * (self.frame_idx) + (1 / (time.time() - stime))) / (self.frame_idx + 1) fps = (1 / (time.time() - stime)) vis = cv.putText(vis, 'FPS {:.1f} AFPS {:.1f}'.format(fps, avg), (900, 20), cv.FONT_HERSHEY_COMPLEX, 1, (25, 0, 200), 2) file_name_format = IMAGE_DIR_RES + "/proc_" + self.videoname + "_%04d.jpg" file_name = file_name_format % self.frame_idx print(file_name) cv.imwrite(file_name, vis) #cv.imshow('lk_track', vis) #Write the frame into the file 'output.avi' # out.write(vis) self.frame_idx += 1 self.prev_gray = frame_gray ##-------------calculate FPS ---------------------------------------------- # avg = (avg*(self.frame_idx-1) + (1 / (time.time() - stime)))/self.frame_idx # print('FPS {:.1f}'.format(avg)) #fps = (1 / (time.time() - stime)) print('FPS {:.1f}'.format(fps)) #if self.frame_idx > 80: # exit(1) ch = cv.waitKey(0) if ch == 27: break
import pyttsx3 engine = pyttsx3.init() option = { 'model': 'cfg/yolo.cfg', 'load': 'bin/yolo.weights', 'thresold': 0.5, 'gpu': 1.0 } tfnet = TFNet(option) choice = 'y' while (choice == 'y'): path = input("Enter Path: ") img = cv2.imread(path) result = tfnet.return_predict(img) print(result) print(len(result)) base = 0 for j in result: tl = (result[base]['topleft']['x'], result[base]['topleft']['y']) br = (result[base]['bottomright']['x'], result[base]['bottomright']['y']) label = (result[base]['label']) conf = (result[base]['confidence']) print("\n\ntl: ", tl, " br: ", br, " label: ", label, "Confidence: ", conf) base += 1 if (conf > 0.25): img = cv2.rectangle(img, tl, br, (0, 255, 0), 7) img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 4,
'threshold': 0.15, 'gpu': 0.7} tfnet = TFNet(options) C = [] # Center R = [] # Radius L = [] # Label im_name = 'HRI001' image = cv2.imread('data/' + im_name + '.jpg') for h in range(0, 2592, 890): for w in range(0, 3872, 1290): im = image[h:h + 890, w:w + 1290] output = tfnet.return_predict(im) RBC = 0 WBC = 0 Platelets = 0 for prediction in output: label = prediction['label'] confidence = prediction['confidence'] tl = (prediction['topleft']['x'], prediction['topleft']['y']) br = (prediction['bottomright']['x'], prediction['bottomright']['y']) height, width, _ = image.shape center_x = int((tl[0] + br[0]) / 2) center_y = int((tl[1] + br[1]) / 2)
options = { 'model': 'C:/Users/Dell/Desktop/Programming/YOLO/darkflow-master/cfg/yolo.cfg', 'load': 'C:/Users/Dell/Desktop/Programming/YOLO/darkflow-master/bin/yolov2.weights', 'threshold': 0.15, } tfnet = TFNet(options) capture = cv2.VideoCapture('vid1.mp4') colors = [tuple(255 * np.random.rand(3)) for i in range(5)] fourcc = cv2.VideoWriter_fourcc(*'MP4V') output = cv2.VideoWriter('labelled.mp4', fourcc, 20.0, (640, 480)) while (capture.isOpened()): ret, frame = capture.read() results = tfnet.return_predict(frame) if ret: for color, result in zip(colors, results): tl = (result['topleft']['x'], result['topleft']['y']) br = (result['bottomright']['x'], result['bottomright']['y']) label = result['label'] frame = cv2.rectangle(frame, tl, br, (0, 255, 0), 4) frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) output.write(frame) cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break else: break capture.release()
def main(): xml_dir = os.path.join(os.getcwd(), 'train/Annotations') xml_df = xml_to_csv(xml_dir) xml_df.to_csv('455_labels.csv', index=None) options = {"model": "cfg/455.cfg", "load": 18375, "threshold": 0.4} tfnet = TFNet(options) img = cv2.imread('/home/dan/darkflow/train/Images/dolphin_batch600027.jpg', cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # use YOLO to predict the image result = tfnet.return_predict(img) img.shape if os.path.exists("yolo.csv"): os.remove("yolo.csv") with open('yolo.csv', 'a') as writeFile: writer = csv.writer(writeFile) writer.writerow( ['id', 'label', 'confidence', 'xmin', 'ymin', 'xmax', 'ymax']) # Get the ground truth results in a list for the given image gtList = getListOfGroundTruthForImage('dolphin_batch600027.jpg') # Get the IOU value for each detection gtList.reverse() id = 0 for i in range(0, len(result) - 1): tl = (result[i]['topleft']['x'], result[i]['topleft']['y']) br = (result[i]['bottomright']['x'], result[i]['bottomright']['y']) label = result[i]['label'] boxA = { 'x1': int(result[i]['topleft']['x']), 'y1': int(result[i]['topleft']['y']), 'x2': int(result[i]['bottomright']['x']), 'y2': int(result[i]['bottomright']['y']) } boxB = { 'x1': int(gtList[i]['xmin']), 'y1': int(gtList[i]['ymin']), 'x2': int(gtList[i]['xmax']), 'y2': int(gtList[i]['ymax']) } print(" get iou = " + str(get_iou(boxA, boxB))) box_1 = [[ int(result[i]['topleft']['x']), int(result[i]['topleft']['y']) ], [ int(result[i]['bottomright']['x']), int(result[i]['topleft']['y']) ], [ int(result[i]['bottomright']['x']), int(result[i]['bottomright']['y']) ], [ int(result[i]['topleft']['x']), int(result[i]['bottomright']['y']) ]] box_2 = [[int(gtList[i]['xmin']), int(gtList[i]['ymin'])], [int(gtList[i]['xmax']), int(gtList[i]['ymin'])], [int(gtList[i]['xmax']), int(gtList[i]['ymax'])], [int(gtList[i]['xmin']), int(gtList[i]['ymax'])]] iou = calculate_iou(box_1, box_2) print(str(iou)) tl2 = (int(gtList[i]['xmin']), int(gtList[i]['ymin'])) br2 = (int(gtList[i]['xmax']), int(gtList[i]['ymax'])) id = id + 1 img = cv2.rectangle(img, tl, br, (0, 255, 0), 7) # draw a ractangle onto an image img = cv2.rectangle(img, tl2, br2, (0, 100, 0), 7) img = cv2.putText(img, "IOU {0:.2f}".format(iou) + " " + label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) # add laebl name writeFile.close() plt.imshow(img) plt.show()
cv2.namedWindow(windowName1) cv2.namedWindow(windowName2) cv2.setMouseCallback(windowName1,draw_shape) cv2.setMouseCallback(windowName2,draw_shape) if startPoint == True and endPoint == True: #print('inside of if') #frame = cv2.rectangle(frame, (rect[0], rect[1]), (rect[2], rect[3]), (255, 0, 255), -1) x = frame[rect[1]:rect[3],rect[0]:rect[2]] #print('x: ',x) #img_arr = np.array(bytearray(x.content),dtype = np.uint8) #x = cv2.imdecode(x, -1) #capture0 = cv2.resize(capture0, (640, 480)) x = cv2.resize(x, (640, 480)) results1 = tfnet.return_predict(x) count = 0 for color1, result1 in zip(colors, results1): print('inside for of video cam') tl = (result1['topleft']['x'], result1['topleft']['y']) br = (result1['bottomright']['x'], result1['bottomright']['y']) label = result1['label'] confidence = result1['confidence'] if label == 'person' and xCount > 50: print('inside if too..') diff = cv2.absdiff(frame1, frame2) gray = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (5,5), 0) _, thresh = cv2.threshold(blur, 20, 255, cv2.THRESH_BINARY) dilated = cv2.dilate(thresh, None, iterations=3)
inlane = "no" # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file. out = cv2.VideoWriter('eman.avi', cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 30, (frame_width, frame_height)) while (cap.isOpened()): stime = time.time() ret, frame = cap.read() newframe = frame.copy() if ret == False: break tls = [] brs = [] tock = time.time() if ret: detecteds = tfnet.return_predict(frame) for color, detected in zip(colors, detecteds): tl = (detected['topleft']['x'], detected['topleft']['y']) br = (detected['bottomright']['x'], detected['bottomright']['y']) label = detected['label'] frame = cv2.rectangle(frame, tl, br, color, 7) frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) newframe[tl[1]:br[1], tl[0]:br[0], :] = 0 tls.append(tl) brs.append(br) binary = b.binary(newframe) warped, m, im = p.perspective_view(binary) fl.find_lane(warped)
def rotateServo(): pi = pigpio.pi() #initializing hw pwm pins pin_gpio_z = 12 pin_gpio_x = 13 pi.hardware_PWM(pin_gpio_z, 0, 0) pi.hardware_PWM(pin_gpio_x, 0, 0) time.sleep(0.1) options = { "model": "darkflow/cfg/tiny-yolo.cfg", "load": "darkflow/bin/tiny-yolo.weights", "threshold": 0.2, "config": "darkflow/cfg/", "gpu": 1.0 } print("loading TFnet") tfnet = TFNet(options) camera = picamera.PiCamera() camera.resolution = (320, 240) rawCapture = picamera.array.PiRGBArray(camera, size=(320, 240)) time.sleep(1) z, x = 75000, 75000 #cycle through the stream of images from the camera for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True): image = frame.array #the frame will be stroed in the varible called image x=75000 # find the amount that is needed to rotate # #z,x = blackFollow(image,z,x) print("Predicting") results = tfnet.return_predict(image) dz, dx = 0, 0 print("Found ", len(results), "objects") for result in results: #cv2.rectangle(image, # (result["topleft"]["x"], result["topleft"]["y"]), # (result["bottomright"]["x"],result["bottomright"]["y"]), # (0, 255, 0), 4) #text_x, text_y = result["topleft"]["x"] - 10, result["topleft"]["y"] - 10 #cv2.putText(image, result["label"], (text_x, text_y),cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA) if (result["label"] == "person"): dz = result["topleft"]["x"] + result["bottomright"][ "x"] - image.shape[ 1] #how much should we rotate to left/right dx = result["topleft"]["y"] + result["bottomright"][ "y"] - image.shape[ 0] #how much should we rotate to up/down print(result) z -= int(10000 * (1.0 * dz / image.shape[1])) x += int(10000 * (1.0 * dx / image.shape[0])) z = max(z, 25000) z = min(z, 125000) x = max(x, 25000) x = min(x, 90000) print("dz", dz, "dx", dx, "z", z, "x", x) # end rotating logic if (dz < 10 and dx < 10): break pi.hardware_PWM(pin_gpio_z, 50, z) #50Hz pi.hardware_PWM(pin_gpio_x, 50, x) #50Hz time.sleep(0.1) rawCapture.truncate(0)
def main(): """ Interface with a ZED depth camera (via ROS), using YOLO for object detection, and a custom CNN for target classification. """ # Argument parser for command line arguments ap = argparse.ArgumentParser() ap.add_argument("--skipInit", help="Flag whether to skip initialization or not", action='store_true') args = vars(ap.parse_args()) # TF Options (either build from config/weights, or load pre-built from pb/meta) # # Build tiny-yolo from the config and weights # options = {"model": "darkflow/cfg/tiny-yolo-voc.cfg", # "load":"darkflow/bin/tiny-yolo-voc.weights", # "threshold":0.5, "gpu":0.5} # Load the pre-trained and pre-built model of tiny-yolo options = { "pbLoad": "darkflow/built_graph/tiny-yolo-voc.pb", "metaLoad": "darkflow/built_graph/tiny-yolo-voc.meta", "threshold": 0.5, "gpu": 0.4 } # Initialize YOLO tfnet = TFNet(options) # Initialize CNN CNN = TargetCNN() # ROS Setup rospy.init_node('ZED_test', anonymous=True) # Subscribe to topics depth_sub = rospy.Subscriber("/zed/depth/depth_registered", Image, depth_cb, queue_size=1) left_sub = rospy.Subscriber("/zed/left/image_rect_color", Image, bgr_cb, queue_size=1) # Publish depth, angle, and target lost depth_pub = rospy.Publisher("/ZED_Tracker/depth", Float32, queue_size=10) angle_pub = rospy.Publisher("/ZED_Tracker/angle", Float32, queue_size=10) lost_pub = rospy.Publisher("/ZED_Tracker/targetLost", Bool, queue_size=10) # Thickness of cv draw objects thick = int((IMG_H + IMG_W) // 300) # Initialize variables lastTime = time() frame_count = -1 # index video frame init_target_count = 0 # number of Target samples captured other_count = 0 # number of Others(other people, not the Target) sample captured xc, yc = 0, 0 # Centroid of Bounding Box # Keep track of what 'state' the algorithm is in state = 'WAITING_1' # First state, wait for user # Check if skipInit if (args["skipInit"]): state = "TRAIN" print(" State: %s" % state) ############################# ### Main Processing Loop ### ############################# while (not rospy.is_shutdown()): frame_count += 1 if (state == 'TRAIN' ): # Training is a blocking function call, so visuals don't update print("[INFO] Starting training") CNN.train(PLOT_LEARNING=True, epochs=20, trainDir=trainDir) print("[INFO] Done Training") state = 'PREDICT' print(" State: %s" % state) # Use YOLO for Object Detection results = tfnet.return_predict( bgrImg) # predict using bgrImg from ROS callback vis = bgrImg.copy() # Create copy for visual # Initialize max values max_pred = 0 # Store max prediction value max_pred_coords = [0, 0, 0, 0] # Store corresponding coords # Loop through all detections (from YOLO) for i in range(len(results)): # Skip any non-'person' detections if (results[i]['label'] != 'person'): continue # Bounding Box indexes x1, y1 = results[i]['topleft']['x'], results[i]['topleft']['y'] x2, y2 = results[i]['bottomright']['x'], results[i]['bottomright'][ 'y'] xc, yc = toCentroid(x1, y1, x2, y2) # Get centroid # Draw Bounding Box cv2.rectangle(vis, (x1, y1), (x2, y2), (255, 255, 255), thick) cv2.circle(vis, (xc, yc), 3, (255, 255, 255), thick) # Center circle cv2.putText(vis, results[i]['label'], (x1 + thick * 2, y1 + thick * 2), 0, 1e-3 * IMG_H, (255, 255, 255), thick // 3) if (state == 'INIT_1'): # Only consider objects within middle window # (Center window extends full top to bottom, but only +/- INIT_WIN_W) if (abs(xc - MID_X) < INIT_WIN_W): obj = bgrImg[y1:y2, x1:x2].copy() obj_name = targetDir + "Target-%s.png" % ( str(init_target_count).zfill(4)) init_target_count += 1 cv2.imwrite(obj_name, obj) if (state == 'INIT_2'): # Save any person, no restriction on location other = bgrImg[y1:y2, x1:x2].copy() other_name = notTargetDir + "Other-%s.png" % ( str(other_count).zfill(4)) other_count += 1 cv2.imwrite(other_name, other) if (state == 'PREDICT'): # Use trained CNN to classify as Target/Not-Target label, prob, prediction = CNN.predict(bgrImg[y1:y2, x1:x2].copy()) # Keep track of highest classification if (prediction > max_pred): max_pred = prediction max_pred_coords = [x1, y1, x2, y2] # Wait 1ms (allows visuals to update), and take in user input if any charin = cv2.waitKey(1) ## Draw State specific graphics/instructions/info ## and transition states when appropriate if (state == 'WAITING_1'): # Draw initialization box area cv2.rectangle(vis, (MID_X - INIT_WIN_W, 0), (MID_X + INIT_WIN_W, IMG_H), (0, 150, 0), thick // 3) # Show instructions on screen instr = [ "Press 'i' to continue,", "then you will have %d seconds" % time_till_init, "before initialization starts" ] cv2.rectangle(vis, (0, 0), (300, 100), color=(255, 255, 255), thickness=-1) # box for text cv2.putText(vis, instr[0], (20, 20), FONT, 0.5, (0, 0, 255), 2) # Show instructions cv2.putText(vis, instr[1], (20, 40), FONT, 0.5, (0, 0, 255), 2) # Show instructions cv2.putText(vis, instr[2], (20, 60), FONT, 0.5, (0, 0, 255), 2) # Show instructions # User press 'i' to move to initialization if (charin == ord('i')): cdown_start = time() # Start the time state = 'COUNTDOWN_1' print(" State: %s" % state) if (state == 'COUNTDOWN_1'): # Draw initialization box area cv2.rectangle(vis, (MID_X - INIT_WIN_W, 0), (MID_X + INIT_WIN_W, IMG_H), (0, 150, 0), thick // 3) # Countdown time, wait for X seconds before capturing init data dur = time() - cdown_start cv2.putText(vis, "%d" % (time_till_init - (dur) + 1), (IMG_W // 4, IMG_H // 4), FONT, 4, (0, 0, 255), 2) # Show countdown if (dur > time_till_init): state = 'INIT_1' print(" State: %s" % state) print(" Saving Target samples as: %sTarget-####.png" % targetDir) if (state == 'INIT_1'): # Draw initialization box area cv2.rectangle(vis, (MID_X - INIT_WIN_W, 0), (MID_X + INIT_WIN_W, IMG_H), (0, 150, 0), thick // 3) # Display progress on image acquisition cv2.rectangle(vis, (0, 0), (400, 45), color=(255, 255, 255), thickness=-1) # box for text cv2.putText(vis, "%d of %d images taken" % (init_target_count, NUM_INIT_DATA), (10, 30), FONT, 1, (0, 0, 255), 2) # Show instructions # Continue once X samples are taken if (init_target_count >= NUM_INIT_DATA): state = 'WAITING_2' print(" State: %s" % state) if (state == 'WAITING_2'): # Show instructions on screen instr = [ "Press 'b' to continue,", "then you will have %d seconds" % time_till_init_2, "before the background capture starts" ] cv2.rectangle(vis, (0, 0), (300, 100), color=(255, 255, 255), thickness=-1) # box for text cv2.putText(vis, instr[0], (20, 20), FONT, 0.5, (0, 0, 255), 2) # Show instructions cv2.putText(vis, instr[1], (20, 40), FONT, 0.5, (0, 0, 255), 2) # Show instructions cv2.putText(vis, instr[2], (20, 60), FONT, 0.5, (0, 0, 255), 2) # Show instructions # User press 'b' to move to background initialization if (charin == ord('b')): cdown_start = time() state = 'COUNTDOWN_2' print(" State: %s" % state) if (state == 'COUNTDOWN_2'): dur = time() - cdown_start cv2.putText(vis, "%d" % (time_till_init_2 - (dur) + 1), (IMG_W // 4, IMG_H // 4), FONT, 4, (0, 0, 255), 2) # Show countdown # Wait for X seconds before capturing init data if (dur > time_till_init_2): cdown_start = time() state = 'INIT_2' print(" State: %s" % state) print(" Saving Others samples as: %sOther-####.png" % notTargetDir) if (state == 'INIT_2'): dur = time() - cdown_start # Wait for X seconds before capturing background if (dur > time_till_init_3): state = 'INIT_3' print(" State: %s" % state) if (state == 'INIT_3'): # Save 'background' patches to notTarget training directory grabBackground(bgrImg) msg = "Please wait while the CNN is trained..." cv2.rectangle(vis, (0, 0), (IMG_W, 100), color=(255, 255, 255), thickness=-1) # box for text cv2.putText(vis, msg, (50, 50), FONT, 2, (0, 0, 255), 2) # Show instructions cv2.imshow('ZED Tracking', vis) cv2.waitKey(1) state = 'TRAIN' print(" State: %s" % state) if (state == 'PREDICT'): # Display max classification x1, y1, x2, y2 = max_pred_coords cv2.rectangle(vis, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=thick) cv2.rectangle(vis, (0, 0), (300, 160), color=(255, 255, 255), thickness=-1) cv2.putText(vis, "Pred: %.3f" % max_pred, (10, 30), FONT, 1, color=(255, 0, 0), thickness=2) xc, yc = toCentroid(x1, y1, x2, y2) # Display and publish Depth depth = getDepth(xc, yc, depthImg) if (not np.isnan(depth)): cv2.putText(vis, "Depth: %.2f m" % depth, (10, 60), FONT, 1, color=(255, 0, 0), thickness=2) # Publish depth depth_pub.publish(depth) else: cv2.putText(vis, "Depth: ---- m", (10, 60), FONT, 1, color=(255, 0, 0), thickness=2) ## Uncomment line below if you want constant publishing of depth data # depth_pub.publish(-1.0) # -1.0 is simply an 'error' value # Display and publish Angle angle = getAngle(xc) if (not angle == -45.0): cv2.putText(vis, "Angle: %.1f deg" % angle, (10, 90), FONT, 1, color=(255, 0, 0), thickness=2) angle_pub.publish(angle) else: cv2.putText(vis, "Angle: ---- deg", (10, 90), FONT, 1, color=(255, 0, 0), thickness=2) ## Uncomment line below if you want constant publishing of angle data # angle_pub.publish(-180.0) # -180.0 is simply an 'error' value # Target Lost LOST = max_pred < pred_thresh if (LOST): cv2.putText(vis, "TARGET LOST", (10, 150), FONT, 1.4, color=(0, 0, 255), thickness=2) lost_pub.publish(LOST) # Press 'q' to quit if (charin == ord('q')): print("Quitting program...") break # FPS calculation f = 1 / (time() - lastTime) fps = "FPS: %.2f" % f lastTime = time() # Display FPS data cv2.rectangle(vis, (IMG_W - 170, IMG_H - 50), (IMG_W, IMG_H), (0, 0, 0), -1) cv2.putText(vis, fps, (IMG_W - 160, IMG_H - 15), FONT, 1, (0, 0, 255), 2) # Show visuals cv2.imshow('ZED Tracking', vis) # Cleanup cv2.destroyAllWindows() depth_sub.unregister() left_sub.unregister()
while (True): # Capture frame-by-frame ret, frame = cap.read() # Our operations on the frame come here RGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Display the resulting frame if x == ord('d') and mark <= 440: mark += 10 if x == ord('a') and mark >= 60: mark -= 10 if x == 27: break threshold = 0.00225 * (mark - 50) + 0.1 result = tfnet.return_predict(RGB) for detection in result: label = detection['label'] confidence = format(float(detection['confidence']), '.2f') tl = (int(detection['topleft']['x']), int(detection['topleft']['y'])) br = (int(detection['bottomright']['x']), int(detection['bottomright']['y'])) if float(confidence) >= threshold: cv2.rectangle(frame, tl, br, (0, 255, 0), 3) cv2.putText(frame, label + ' ' + confidence, tl, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 255), 1, cv2.LINE_AA)
licensePlate = "".join(sortedList) return licensePlate cap = cv2.VideoCapture('vid1.MOV') counter = 0 while (cap.isOpened()): ret, frame = cap.read() h, w, l = frame.shape frame = imutils.rotate(frame, 270) if counter % 6 == 0: licensePlate = [] try: predictions = yoloPlate.return_predict(frame) firstCropImg = firstCrop(frame, predictions) secondCropImg = secondCrop(firstCropImg) cv2.imshow('Second crop plate', secondCropImg) secondCropImgCopy = secondCropImg.copy() licensePlate.append(opencvReadPlate(secondCropImg)) print("OpenCV+CNN : " + licensePlate[0]) except: pass try: predictions = yoloCharacter.return_predict(secondCropImg) licensePlate.append( yoloCharDetection(predictions, secondCropImgCopy)) print("Yolo+CNN : " + licensePlate[1]) except: pass
class darkflow_prediction(): def __init__(self): self.options = {"model": "cfg/yolo.cfg", "load": "bin/yolov2.weights", "threshold": 0.5} self.tfnet = TFNet(self.options) self.cluster = [] self.FRAME_BUFFER = 5 def image(self, image_file): self.image = cv2.imread(image_file, 1) self.result = self.tfnet.return_predict(self.image) print(self.result) self.print_box() cv2.waitKey(0) def print_box(self, frame_result, frame_image): font = cv2.FONT_HERSHEY_PLAIN for i in range(len(frame_result)): coordtl = (frame_result[i]['topleft']['x'], frame_result[i]['topleft']['y']) coordbr = (frame_result[i]['bottomright']['x'], frame_result[i]['bottomright']['y']) cv2.rectangle(frame_image, coordtl, coordbr, (0,255,0), 2) s = str(frame_result[i]['label'] + ": " + str(frame_result[i]['confidence'])) text_coord = (coordtl[0], coordtl[1]-10) cv2.putText(frame_image, s, text_coord, font, 1, (250,250,0)) #cv2.imshow("memes", frame_image) def print_box_with_clusters(self, asd): font = cv2.FONT_HERSHEY_PLAIN for i in range(len(self.result)): coordtl = (self.result[i]['topleft']['x'], self.result[i]['topleft']['y']) coordbr = (self.result[i]['bottomright']['x'], self.result[i]['bottomright']['y']) cv2.rectangle(self.image,coordtl,coordbr,(0,255,0),2) s = str(self.result[i]['label'] + ": " + str(self.result[i]['confidence'])) text_coord = (coordtl[0], coordtl[1]-10) cv2.putText(self.image, s, text_coord, font, 1, (250,250,0)) for i, val in enumerate(asd): cv2.putText(self.image, str(val[1]), tuple(val[0]), font, 1, (250,250,0), 3) #cv2.imshow("memes", self.image) def video(self, video_file): self.video = cv2.VideoCapture(video_file) self.images, interm, count = [], [], 1 # Results: list of lists of object dictionaries [frame1[{object1}, {object2}, {object3}], frame2[{}, {}, {}]] self.video_results_full = [] # Same as above, but results are split into groups of 5 self.video_results_split = [] # Grand boxes for each group of 5 frames self.group_grand_boxes = [] # Actual trajectories of objects through frames self.object_trajectories = {} try: while self.video.isOpened(): # Read 1 frame of the video (as image), append to our list of images ret, frame_image = self.video.read() self.images.append(np.copy(frame_image)) # Run YOLO to get the detected objects for this particular frame frame_result = self.tfnet.return_predict(frame_image) self.print_box(frame_result, frame_image) frame_result = self.refactor_result(frame_result) self.video_results_full.append(frame_result) interm.append(frame_result) # Use a sliding window approach to compute groups/grand boxes if count > self.FRAME_BUFFER: interm.pop(0) if len(interm) == self.FRAME_BUFFER: self.video_results_split.append(interm[:]) grand_boxes = self.get_clusters(self.video_results_split[-1]) if not grand_boxes: continue self.print_grand_box(grand_boxes) self.group_grand_boxes.append(grand_boxes) if len(self.group_grand_boxes) >= 2: self.track_objects_between_frames(self.group_grand_boxes[-2], self.group_grand_boxes[-1]) count += 1 #cv2.waitKey(1) except AssertionError: pass #print("GROUP GRAND BOXES: ", self.group_grand_boxes) for group in self.group_grand_boxes: # group is a list with dictionaries for obj in group: # obj is a dictionary if "prev" not in obj: # prev is the string key in the dictionary self.object_trajectories[self.hash_object(obj)] = [obj] else: #print(self.object_trajectories[obj['prev']]) self.object_trajectories[obj['prev']].append(obj) self.object_trajectories[self.hash_object(obj)] = self.object_trajectories[obj['prev']] #self.object_trajectories[obj['prev']] = None self.object_trajectories = {k: v for k, v in self.object_trajectories.items() if v is not None} for i in self.object_trajectories: pass #print("\n\n") #print(self.object_trajectories[i]) # trainingSize = int(round(0.75 * len(self.object_trajectories))) # trainingSet = [self.object_trajectories[i] for i in range(trainingSize)] # testSet = [self.object_trajectories[i] for i in range(trainingSize, len(self.object_trajectories))] dataIn, dataOut = self.getXYAll(self.object_trajectories) #trainX = [] #trainY = [] ''''testX = [] testY = [] for i in range(len(dataX)): #length is the number of objects we have in frame, same for both X and Y pathX = dataX[i] pathY = dataY[i] trainingSize = int(round(0.75 * len(pathX))) trainX.extend([[pathX[i]] for i in range(trainingSize)]) testX.extend([[pathX[i]] for i in range(trainingSize, len(pathX))]) [trainY.append(pathY[i]) for i in range(trainingSize)] [testY.append(pathY[i]) for i in range(trainingSize, len(pathX))] look_back = 1 trainX = np.array(trainX) testX = np.array(testX) trainY = np.array(trainY) testY = np.array(testY) trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))''' # create and fit the LSTM network print("DATA IN (OBJECT DATA) SHAPE: ", dataIn.shape) print("DATA OUT (OBJECT OUTPUT) SHAPE: ", dataOut.shape) print("Model save name: ", video_file[3:]) model = Sequential() model.add(LSTM(200, input_shape=(5, 2))) model.add(Dense(2)) model.compile(loss='mean_squared_error', optimizer=Adam(decay=0.0001)) try: model.fit(dataIn, dataOut, epochs=200, batch_size=1, verbose=2) except: print("ERROR IN PROCESSING VID") return model.save(video_file[3:]) # make predictions trainPredict = model.predict(dataIn) #testPredict = model.predict(testX) #print("TRAIN PREDICT: ", trainPredict) """ def separateXandY(self, object_trajectories): #print("OBJECT TRAJECTORIES") #print(object_trajectories) objectDataX, objectDataY = [], [] for object in object_trajectories: #print("OBJECT " + object) dataX, dataY = [], [] for point in object_trajectories[object]: #print("POINT") #print(point) dataX.append(point["x"]) dataY.append(point["y"]) objectDataX.append(dataX) objectDataY.append(dataY) print(objectDataX) print(objectDataY) return np.array(objectDataX), np.array(objectDataY) """ def getXYAll(self, object_trajectories): objectData = [] objectOutput = [] #print("OBJECT TRAJECTORIES: ", object_trajectories) for obj in object_trajectories: #print("LEN OBJECT TRAJ OF OBJ: ", len(object_trajectories[obj])) for i in range(0, len(object_trajectories[obj])-5): data = [] for j in range(i, i+5): data.append([object_trajectories[obj][j]["x"], object_trajectories[obj][j]["y"]]) objectOutput.append([object_trajectories[obj][i+5]["x"], object_trajectories[obj][i+5]["y"]]) # next position after 5 frames objectData.append(data[:]) # append single sample, data = an instance of 5 time steps #print('OBJECT DATA: ', objectData) # [[[x, y], [x, y], [x, y], [x, y], [x, y]], # [[x, y], [x, y], [x, y], [x, y], [x, y]]] #print('OBJECT OUTPUT: ', objectOutput) # output = where is the next location #print('objectOutput', objectOutput) # output matches samples [[x, y], [x, y], [x, y], [x, y], [x, y]] return np.array(objectData), np.array(objectOutput) def hash_object(self, detected_object): return str(detected_object["x"]) + str(detected_object["y"]) + str(detected_object["class"]) + str(detected_object["confidence"]) def refactor_result(self, result): new_result = [] for elem in result: coordtl = (elem['topleft']['x'], elem['topleft']['y']) coordbr = (elem['bottomright']['x'], elem['bottomright']['y']) classif = elem['label'] confidence = elem['confidence'] width = coordbr[0] - coordtl[0] height = coordbr[1] - coordtl[1] x_center = coordtl[0] + width//2 y_center = coordtl[1] + height//2 new_elem = {'x': x_center, 'y': y_center, 'width': width, 'height': height, "class": classif, "confidence": confidence} new_result.append(new_elem) return new_result def get_clusters(self, group): cluster_points = [] #still need all the individual point data to get individual box data for box averaging for frame in group: #each frame object is 5 video frames for object_det in frame: cluster_points.append([object_det['x'], object_det['y']]) #extracts the coordinates of each object #print("cluster_points", cluster_points) if cluster_points == []: return None model = DBSCAN(eps=100, min_samples=2).fit(np.array(cluster_points)) clusters = [(cluster_points[i], model.labels_[i]) for i in range(len(cluster_points))] clustered_points = {} for point in clusters: if point[1] not in clustered_points: clustered_points[point[1]] = [] clustered_points[point[1]].append((point[0][0], point[0][1])) #print(clustered_points) grand_boxes = [] #creating each single grand box for cluster_val in clustered_points: avgd_x, avgd_y = 0, 0 for point in clustered_points[cluster_val]: avgd_x += point[0] avgd_y += point[1] avgd_x /= len(clustered_points[cluster_val]) avgd_y /= len(clustered_points[cluster_val]) width, height = 0, 0 classif, confidence = '', '' for point in clustered_points[cluster_val]: for frame in group: for object_det in frame: if object_det['x'] == point[0] and object_det['y'] == point[1]: width += object_det['width'] height += object_det['height'] classif = object_det['class'] confidence = object_det['confidence'] width /= len(clustered_points[cluster_val]) height /= len(clustered_points[cluster_val]) box = {'x': avgd_x, 'y': avgd_y, 'width': width, 'height': height, "class": classif, "confidence": confidence} grand_boxes.append(box) #creating a grand box for each object for every 5 frames #print("grand boxes", grand_boxes) return grand_boxes def print_grand_box(self, grand_boxes): x_points = [box['x'] for box in grand_boxes] y_points = [box['y'] for box in grand_boxes] image = self.images[-1].copy() for i in range(len(x_points)): cv2.circle(image, (int(x_points[i]), int(y_points[i])), 15, (255-int(255*i//len(x_points)),0,int(255*i//len(x_points))), -1) #cv2.imshow("centroids", image) #cv2.waitKey(2) def track_objects_between_frames(self, curr_frame, next_frame): for grand_object in curr_frame: closest_dist = 999999999999 closest_obj, closest_obj_idx = None, None for next_object_idx in range(len(next_frame)): next_object = next_frame[next_object_idx] euclidean_dist = np.sqrt((next_object['x'] - grand_object['x'])**2 + (next_object['y'] - grand_object['y'])**2) if euclidean_dist < closest_dist: closest_dist = euclidean_dist closest_obj = next_object closest_obj_idx = next_object_idx if closest_dist <= 25: grand_object['next'] = self.hash_object(closest_obj) next_frame[closest_obj_idx]['prev'] = self.hash_object(grand_object) def video_with_frame_drop(self, video_file, FPS=30): self.video = cv2.VideoCapture(video_file) skip_frames = 0 t = time.time() try: while self.video.isOpened(): for i in range(skip_frames): _, _ = self.video.read() ret, self.image = self.video.read() self.result = self.tfnet.return_predict(self.image) self.print_box() cv2.waitKey(1) skip_frames = int((time.time()-t)*FPS) t = time.time() except AssertionError: pass
"load": "4000", "gpu": 1.0, "threshold": 0.1 } tfnet = TFNet(options) video = cv2.VideoCapture(0) video.set(cv2.CAP_PROP_FRAME_HEIGHT, 1280) video.set(cv2.CAP_PROP_FRAME_WIDTH, 720) colors = [tuple(np.random.rand(3) * 255) for _ in range(100)] while True: start_time = time.time() ret, frame = video.read() returns = tfnet.return_predict(frame) if ret: for c, r in zip(colors, returns): tl = (r["topleft"]["x"], r["topleft"]["y"]) br = (r["bottomright"]["x"], r["bottomright"]["y"]) label = "{}: {:.0f}%".format(r["label"], r["confidence"] * 100) frame = cv2.rectangle(frame, tl, br, c, 4) frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, c, 2) cv2.imshow("frame", frame) print("FPS {:.1f}".format(1 / (time.time() - start_time))) else: video.release()
from io import BytesIO import time import requests from PIL import Image, ImageDraw import numpy as np import glob options = {"model": "cfg/tiny-yolo-voc.cfg", "load": "bin/tiny-yolo-voc.weights", "threshold": 0.15} tfnet = TFNet(options) counter = 0 for filename in glob.glob('birds/*.jpg'): curr_img = Image.open(filename).convert('RGB') curr_imgcv2 = cv2.cvtColor(np.array(curr_img), cv2.COLOR_RGB2BGR) result = tfnet.return_predict(curr_imgcv2) print(result) draw = ImageDraw.Draw(curr_img) for det in result: draw.rectangle([det['topleft']['x'], det['topleft']['y'], det['bottomright']['x'], det['bottomright']['y']], outline=(255, 0, 0)) draw.text([det['topleft']['x'], det['topleft']['y'] - 13], det['label'], fill=(255, 0, 0)) curr_img.save('birds_labeled/%i.jpg' % counter) counter += 1
def detection(inputImg): global size global predictions global tfnet2 options = { "model": "cfg/tiny-yolo-voc-3c.cfg", "load": 8625, "gpu": 1.0, "threshold": 0.5 } tfnet2 = TFNet(options) predictions = tfnet2.return_predict(inputImg) print(type(predictions)) # filter the results obtained from model newImage = np.copy(inputImg) for i in range(len(predictions)): top_x = predictions[i]['topleft']['x'] top_y = predictions[i]['topleft']['y'] btm_x = predictions[i]['bottomright']['x'] btm_y = predictions[i]['bottomright']['y'] newImage = cv2.rectangle(newImage, (top_x, top_y), (btm_x, btm_y), (255, 0, 0), 10) cv2.imwrite("detected.jpg", newImage) size = newImage.shape sorted(predictions, key=lambda i: (i['topleft']['x'], i['topleft']['y'])) newImage = np.copy(inputImg) for result in predictions: for r in predictions: if result != r: # print('disticnt values') if result["label"] == r["label"] and Overlap( result['topleft'], result['bottomright'], r['topleft'], r['bottomright']): print("match") if result['confidence'] > r['confidence']: predictions.remove(r) else: predictions.remove(result) print(predictions) for result in predictions: top_x = result['topleft']['x'] top_y = result['topleft']['y'] btm_x = result['bottomright']['x'] btm_y = result['bottomright']['y'] confidence = result['confidence'] label = result['label'] + " " + str(round(confidence, 3)) newImage = cv2.rectangle(newImage, (top_x, top_y), (btm_x, btm_y), (255, 0, 0), 1) newImage = cv2.putText(newImage, label, (top_x, top_y + 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), 1, cv2.LINE_AA) size = newImage.shape # cv2.imwrite("out1.jpg", newImage) return newImage
'threshold': 0.4, 'gpu': 0 } print('Loading darkflow.') tfnet = TFNet(options) print('Read image') # read the color image and covert to RGB img = cv2.imread('media/input/1395447642477-linzerstr.jpg', cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) print('Predict the image') # use YOLO to predict the image predictions = tfnet.return_predict(img) # pull out some info from the results print('Render result') for prediction in predictions: tl = (prediction['topleft']['x'], prediction['topleft']['y']) br = (prediction['bottomright']['x'], prediction['bottomright']['y']) label = prediction['label'] # add the box and label and display it img = cv2.rectangle(img, tl, br, (0, 255, 0), 4) img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) plt.imshow(img) plt.show()
class stereo_vision: def __init__(self, threshold, gpu): options = { 'model': 'cfg/yolo.cfg', 'load': 'bin/yolo.weights', 'threshold': threshold, 'gpu': gpu } cal = StereoCalibration('') self.data, self.stereo, self.map = cal.read_data() self.tfnet = TFNet(options) def distance_measurement(self, resL, resR, frameL, frameR, rang): count = 0 subtracted_mean_pre = 1000 differeceL = abs(resL['bottomright']['x'] - resL['topleft']['x']) differece = abs(resL['bottomright']['y'] - resL['topleft']['y']) gray_croped_L = cv2.cvtColor(frameL, cv2.COLOR_RGB2GRAY) gray_croped_L = gray_croped_L[ resL['topleft']['y'] + int(0.1 * differece):resL['bottomright']['y'] - int(0.1 * differece), resL['topleft']['x'] + int(0.1 * differeceL):resL['bottomright']['x'] - int(0.1 * differeceL)] gray_R = cv2.cvtColor(frameR, cv2.COLOR_RGB2GRAY) cv2.imshow("gray_croped", gray_croped_L) cv2.waitKey(0) for i in range(rang): gray_croped_R = gray_R[ resL['topleft']['y']:resL['bottomright']['y'], resR['topleft']['x'] + i - 25:resR['topleft']['x'] + i - 25 + differeceL] cv2.imshow("CR", gray_croped_R) subtracted_image = np.abs(np.subtract(gray_croped_R, gray_croped_L)) subtracted_mean = np.sum(subtracted_image.flatten(), axis=0) #print("subtracted_mean"+str(subtracted_mean)) if subtracted_mean < subtracted_mean_pre: count = i subtracted_mean_pre = subtracted_mean disparity1 = abs((resR['topleft']['x'] + count - 15) - resL['topleft']['x']) disparity = abs(resL["middleL"] - resR["middleR"]) x = frameL.shape[0] print("disparity1:" + str(disparity1) + "dis:" + str(disparity)) distance = (13 * x) / (math.tan(math.radians(30)) * disparity) distance1 = (13 * x) / (math.tan(math.radians(30)) * disparity1) distance = distance - 3.3 distance1 = distance1 - 1 return distance, distance1 def object_finding(self, frameL, frameR, search): Left = [] Right = [] imgU1 = cv2.remap(frameL, self.map["map1x"], self.map["map1y"], cv2.INTER_LANCZOS4) imgU2 = cv2.remap(frameR, self.map["map2x"], self.map["map2y"], cv2.INTER_LANCZOS4) resultL = self.tfnet.return_predict(imgU1) resultR = self.tfnet.return_predict(imgU2) distance = {} if not resultL == []: for res in resultL: if res['label'] == search: res.update({ "AreaL": (res['bottomright']['x'] - res['topleft']['x']) * (res['bottomright']['y'] - res['topleft']['y']) }) res.update({ "middleL": res['topleft']['x'] + (res['bottomright']['x'] - res['topleft']['x']) / 2 }) Left.append(res) if not resultR == []: for res in resultR: if res['label'] == search: res.update({ "AreaR": (res['bottomright']['x'] - res['topleft']['x']) * (res['bottomright']['y'] - res['topleft']['y']) }) res.update({ "middleR": res['topleft']['x'] + (res['bottomright']['x'] - res['topleft']['x']) / 2 }) Right.append(res) m = 0 for left in Left: for right in Right: if abs(left["AreaL"] - right["AreaR"]) < 1100: distance.update({ "dis" + str(m): self.distance_measurement(left, right, imgU1, imgU2, 50) }) m += 1 print("Left->" + str(left) + " Right->" + str(right)) return distance
import cv2 from darkflow.net.build import TFNet # darkflow 옵션 options = {"model": "cfg/my-tiny-yolo.cfg", "load": -1, "threshold": 0.5} tfnet = TFNet(options) curr_frame = cv2.imread("./test_pic1.jpg") results = tfnet.return_predict(curr_frame) for result in results: cv2.rectangle(curr_frame, (result["topleft"]["x"], result["topleft"]["y"]), (result["bottomright"]["x"], result["bottomright"]["y"]), (255, 0, 0), 4) text_x, text_y = result["topleft"]["x"] - 10, result["topleft"]["y"] - 10 cv2.putText(curr_frame, result["label"], (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite("video_capture/test.jpg", curr_frame)
'load': 'bin/yolo.weights', 'threshold': 0.3, 'gpu': 0.7 } tfnet = TFNet(option) capture = cv2.VideoCapture('test.mp4') # 读取视频 colors = [tuple(255 * np.random.rand(3)) for i in range(10)] # 随机创建10中颜色,RGB形式 # 当视频打开时,进行处理 while capture.isOpened(): stime = time.time() # 计算起始时间 ret, frame = capture.read( ) # 读取每一帧,第一个参数是bool型的ret,其值为True或False,代表有没有读到图片,第二个参数是当前帧图像 if ret: results = tfnet.return_predict(frame) # 送入网络进行预测 # 将 colors results 进行打包 for color, result in zip(colors, results): tl = (result['topleft']['x'], result['topleft']['y']) br = (result['bottomright']['x'], result['bottomright']['y']) label = result['label'] frame = cv2.rectangle(frame, tl, br, color, 3) frame = cv2.putText(frame, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, color, 1) cv2.imshow('frame', frame) # 显示当前帧 print('FPS {:.1f}'.format(1 / (time.time() - stime))) # 计算帧率 # 按 q 键退出 if cv2.waitKey(1) & 0xFF == ord('q'): break else: capture.release()
from darkflow.net.build import TFNet import argparse # the cfg file and weights location change this thing according to your model location model = {"model": "cfg/yolov2.cfg", "load": "yolov2.weights", "threshold": 0.4} # creating the object tfnet = TFNet(model) # to get the image path parser = argparse.ArgumentParser() parser.add_argument('--img', type=str, help='path of the image') arg = parser.parse_args() imgcv = cv2.imread(arg.img) # read the image result = tfnet.return_predict( imgcv) # predict the classes and cordinates of the oject # This is for draw the bounding box around the predicted classes tl = [] br = [] labels = [] for i in range(len(result)): topleft = ( result[i]['topleft']['x'], result[i]['topleft']['y'] ) # to get the labels from the predicted class ,it's in the form of dictionary bottomright = (result[i]['bottomright']['x'], result[i]['bottomright']['y']) label = (result[i]['label']) st = result[i]['topleft']['x'] nd = result[i]['bottomright']['x'] mid_x = (nd - st) // 2 + st # mid point of the top box line
return dx * dy options = {"model": "cfg\\yolo_custom.cfg", "load": -1, "gpu": 0.1} tfnet2 = TFNet(options) tfnet2.load_from_ckpt() # Change the path according to the windows directory # Don't forget to keep \\ as separator in windows path # See whether all images are of jpg extension # Keep images and annotations in separate folders test_image_paths = glob( os.path.join('/home/falcon01/Desktop/test/images/', '*.jpg')) for image_path in test_image_paths: original_img = cv2.imread(image_path) original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB) results = tfnet2.return_predict(original_img) image_name = os.path.basename(image_path) xml_file_name = image_name[:-4] + '.xml' # Change the path according to requirement xml_file = open( os.path.join(os.path.dirname(image_path), '../annotations/{}'.format(xml_file_name))) pred_rectcoord = predicted_rectangle(results) tree = ET.parse(xml_file) root = tree.getroot() for obj in root.iter('object'): xml_box = obj.find('bndbox') xmin = int(float(xml_box.find('xmin').text)) ymin = int(float(xml_box.find('ymin').text)) xmax = int(float(xml_box.find('xmax').text))
class ObjectCountingAPI: def __init__(self, options): self.options = options self.tfnet = TFNet(options) def _write_quantities(self, frame, labels_quantities_dic): for i, (label, quantity) in enumerate(labels_quantities_dic.items()): class_id = [ i for i, x in enumerate(labels_quantities_dic.keys()) if x == label ][0] color = [int(c) for c in COLORS[class_id % len(COLORS)]] cv2.putText( frame, f"{label}: {quantity}", (10, (i + 1) * 35), OBJECTS_ON_FRAME_COUNTER_FONT, OBJECTS_ON_FRAME_COUNTER_FONT_SIZE, color, 2, cv2.FONT_HERSHEY_SIMPLEX, ) def _draw_detection_results(self, frame, results, labels_quantities_dic): for start_point, end_point, label, confidence in results: x1, y1 = start_point class_id = [ i for i, x in enumerate(labels_quantities_dic.keys()) if x == label ][0] color = [int(c) for c in COLORS[class_id % len(COLORS)]] cv2.rectangle(frame, start_point, end_point, color, DETECTION_FRAME_THICKNESS) cv2.putText(frame, label, (x1, y1 - 5), OBJECTS_ON_FRAME_COUNTER_FONT, OBJECTS_ON_FRAME_COUNTER_FONT_SIZE, color, 2) def _convert_detections_into_list_of_tuples_and_count_quantity_of_each_label( self, objects): labels_quantities_dic = {} results = [] for object in objects: x1, y1 = object["topleft"]["x"], object["topleft"]["y"] x2, y2 = object["bottomright"]["x"], object["bottomright"]["y"] confidence = object["confidence"] label = object["label"] try: labels_quantities_dic[label] += 1 except KeyError: labels_quantities_dic[label] = 1 start_point = (x1, y1) end_point = (x2, y2) results.append((start_point, end_point, label, confidence)) return results, labels_quantities_dic def count_objects_on_image(self, frame, targeted_classes=[], output_path="count_people_output.jpg", show=False): objects = self.tfnet.return_predict(frame) if targeted_classes: objects = list( filter(lambda res: res["label"] in targeted_classes, objects)) results, labels_quantities_dic = self._convert_detections_into_list_of_tuples_and_count_quantity_of_each_label( objects) self._draw_detection_results(frame, results, labels_quantities_dic) self._write_quantities(frame, labels_quantities_dic) if show: cv2.imshow("frame", frame) cv2.waitKey() cv2.destroyAllWindows() cv2.imwrite(output_path, frame) # return frame, objects def count_objects_on_video(self, cap, targeted_classes=[], output_path="the_output.avi", show=False): ret, frame = cap.read() fps, height, width = get_output_fps_height_and_width(cap) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_movie = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) while ret: objects = self.tfnet.return_predict(frame) if targeted_classes: objects = list( filter(lambda res: res["label"] in targeted_classes, objects)) results, labels_quantities_dic = self._convert_detections_into_list_of_tuples_and_count_quantity_of_each_label( objects) self._draw_detection_results(frame, results, labels_quantities_dic) self._write_quantities(frame, labels_quantities_dic) output_movie.write(frame) if show: cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break ret, frame = cap.read() cap.release() cv2.destroyAllWindows() def count_objects_crossing_the_virtual_line(self, cap, line_begin, line_end, targeted_classes=[], output_path="the_output.avi", show=False): ret, frame = cap.read() fps, height, width = get_output_fps_height_and_width(cap) fourcc = cv2.VideoWriter_fourcc(*'XVID') output_movie = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) tracker = Sort() memory = {} line = [line_begin, line_end] counter = 0 while ret: objects = self.tfnet.return_predict(frame) if targeted_classes: objects = list( filter(lambda res: res["label"] in targeted_classes, objects)) results, _ = self._convert_detections_into_list_of_tuples_and_count_quantity_of_each_label( objects) # convert to format required for dets [x1, y1, x2, y2, confidence] dets = [[*start_point, *end_point] for (start_point, end_point, label, confidence) in results] np.set_printoptions( formatter={'float': lambda x: "{0:0.3f}".format(100)}) dets = np.asarray(dets) tracks = tracker.update(dets) boxes = [] indexIDs = [] previous = memory.copy() memory = {} for track in tracks: boxes.append([track[0], track[1], track[2], track[3]]) indexIDs.append(int(track[4])) memory[indexIDs[-1]] = boxes[-1] if len(boxes) > 0: i = int(0) for box in boxes: (x, y) = (int(box[0]), int(box[1])) (w, h) = (int(box[2]), int(box[3])) color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]] cv2.rectangle(frame, (x, y), (w, h), color, DETECTION_FRAME_THICKNESS) if indexIDs[i] in previous: previous_box = previous[indexIDs[i]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (int(x + (w - x) / 2), int(y + (h - y) / 2)) p1 = (int(x2 + (w2 - x2) / 2), int(y2 + (h2 - y2) / 2)) cv2.line(frame, p0, p1, color, 3) if intersect(p0, p1, line[0], line[1]): counter += 1 text = "{}".format(indexIDs[i]) cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) i += 1 cv2.line(frame, line[0], line[1], LINE_COLOR, LINE_THICKNESS) cv2.putText(frame, str(counter), LINE_COUNTER_POSITION, LINE_COUNTER_FONT, LINE_COUNTER_FONT_SIZE, LINE_COLOR, 2) output_movie.write(frame) if show: cv2.imshow('frame', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break ret, frame = cap.read() cap.release() cv2.destroyAllWindows()
'threshold': 0.6, 'gpu': 1.0 } tfnet = TFNet(option) capture = cv2.VideoCapture( 'aot-id-card.mp4' ) #reading the video from current directory, in which that .py file is present. color = [tuple(255 * np.random.rand(3)) for i in range(5)] while (capture.isOpened()): #capture is opened stime = time.time() #calculating the frame reading time. ret, frame = capture.read( ) #A frame is read from capture. ret holds frame read or not if read then ret=True else ret=False result = tfnet.return_predict(frame) #predict the frame if ret: #if ret=True #all below is same as image processing... for colr, reslt in zip(color, result): t1 = (reslt['topleft']['x'], reslt['topleft']['y']) t2 = (reslt['bottomright']['x'], reslt['bottomright']['y']) label = reslt['label'] confidence = reslt['confidence'] label = '{} : {:0.2f}%'.format(label, confidence * 100) frame = cv2.rectangle(frame, t1, t2, colr, 3) frame = cv2.putText(frame, label, t1, cv2.FONT_HERSHEY_COMPLEX, 1, colr, 2) cv2.imshow('frame', frame)
def yolo_in_R(myDir, imageName, threshold): myDir = myDir imageName = imageName import os os.chdir(myDir) # import libraries from darkflow.net.build import TFNet import cv2 import matplotlib.pyplot as plt import matplotlib.image as mpimg # options for tiny-yolo-voc-1class (trained model) options = { 'model': 'cfg/tiny-yolo-voc-1class_toh.cfg', 'load': 1000, 'threshold': threshold, 'gpu': 1.0 } tfnet = TFNet(options) # create image object # global img # img = None img = cv2.imread("Downloaded_images/" + imageName + ".jpg", cv2.IMREAD_COLOR) # img = cv2.imread("sample_img/sample_toh.jpg", cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # plt.imshow(img) # plt.show() img img.shape # predict objects in image # result = None result = tfnet.return_predict(img) result # set up bounding boxes tl = (int(result[0]['topleft']['x']), int(result[0]['topleft']['y'])) br = (int(result[0]['bottomright']['x']), int(result[0]['bottomright']['y'])) label = result[0]['label'] # open CV commands img = cv2.rectangle(img, tl, br, (0, 255, 0), 5) # add label to bounding box img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 253, 0), 2) # display image with bounding box # plt.imshow(img) # plt.show() # save image mpimg.imsave("Labeled_images/" + imageName + "_labeled.jpg", img)
options = { 'model' : 'cfg/yolov2-tiny-voc-8c.cfg', 'load' : 21750, 'threshold': 0.2, 'gpu' : 0.8 } tf = TFNet(options) img_dir = 'check_img/' img_name = 'strawberry0' img_type = '.jpg' img = cv2.imread(img_dir+img_name+img_type,cv2.IMREAD_COLOR) result = tf.return_predict(img) res_len = len(result) for i in range(res_len): tl = (result[i]['topleft']['x'], result[i]['topleft']['y']) br = (result[i]['bottomright']['x'], result[i]['bottomright']['y']) label = result[i]['label'] img = cv2.rectangle(img, tl, br, (0, 255, 0), 4) img = cv2.putText(img, label, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) cv2.imwrite(img_dir+img_name+'_detected'+img_type,img) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) plt.imshow(img)
while (cap1.isOpened() and cap2.isOpened()): ret, frame = cap1.read() ret2, frame2 = cap2.read() #Extract frame try: framed = cv2.resize(frame, (960, 540)) framed2 = cv2.resize(frame2, (960, 540)) cv2.imshow('frame', framed) cv2.imshow('frame2', framed2) #Only run every 15 frames for efficiency if counter % 15 == 0: try: predictions = yoloPlate.return_predict(frame) firstCropImg = firstCrop(frame, predictions) secondCropImg = secondCrop(firstCropImg) cv2.imshow('Second crop plate', secondCropImg) secondCropImg = cv2.cvtColor(secondCropImg, cv2.COLOR_BGR2GRAY) if not (os.path.exists(dirname + '/results')): os.mkdir(dirname + '/results') cv2.imwrite( dirname + '/results/result' + str(plateNum) + '.jpg', secondCropImg) rtnJSON = ('{"source": "./results/result' + str(plateNum) + '.jpg", "camera": "1" , "time": "' + str(datetime.datetime.now()) + '"}') print(rtnJSON)
class DogDetector(): def __init__(self, cfg='yolo', weights='yolov2', threshold=0.3): model_path = os.path.join(root_path, 'cfg/' + cfg + '.cfg') load_path = os.path.join(root_path, 'weights/' + weights + '.weights') self.options = { 'model': model_path, 'load': load_path, 'threshold': threshold, 'gpu': 1.0, 'savepb': True } print(self.options['model'], self.options['load']) self.tfnet = TFNet(self.options) def detectsOneDog(self, img): result = self.tfnet.return_predict(img) dog_list = [] for res in result: if res['label'] == 'dog': dog_list.append(res) if len(dog_list) == 0: return False else: print(dog_list) temp_area = 0 index = 0 for i in range(len(dog_list)): rect = self.getDogRect(dog_list[i], img) area = rect[2] * rect[3] if area > temp_area: temp_area = area index = i print(dog_list[index]) return dog_list[index] def detectDogHead(self, img): detector = dlib.simple_object_detector( os.path.join(root_path, "data/dog_detector.svm")) dets = detector(img) if (len(dets) == 0): print("Can't detect Head") return False else: dets_pop = dets.pop() return dets_pop def isLeft(self, dog_rect, head_rect): if (head_rect[0] + head_rect[2] > dog_rect[0] + dog_rect[2]): return False else: return True def getDogRect(self, result, originalImg): tl = result['topleft'] br = result['bottomright'] width = br['x'] - tl['x'] height = br['y'] - tl['y'] error_pixel = round(max(width, height) / 4) width += error_pixel height += error_pixel x = max(tl['x'] - round(error_pixel / 2), 1) y = max(tl['y'] - round(error_pixel / 2), 1) imgHeight, imgWidth = originalImg.shape[:2] if (x + width) >= imgWidth: width = imgWidth - x - 1 if (y + height) >= imgHeight: height = imgHeight - y - 1 return (x, y, width, height) def getDogPartRect(self, result, originalImg): t = result.top() l = result.left() b = result.bottom() r = result.right() print(t, l, b, r) width = r - l height = b - t error_pixel = round(max(width, height) / 4) width += error_pixel height += error_pixel x = max(l - round(error_pixel / 2), 0) y = max(t - round(error_pixel / 2), 0) imgHeight, imgWidth = originalImg.shape[:2] if (x + width) >= imgWidth: width = imgWidth - x - 1 if (y + height) >= imgHeight: height = imgHeight - y - 1 return (x, y, width, height)
class FoodLabeller(Labeller): def __init__(self, env_spec, policy, options=None, darkflow_path=None, im_is_rgb=False, **kwargs): super(FoodLabeller, self).__init__(env_spec, policy) if options is None: options = {"model": "cfg/yolo.cfg", "load": "bin/yolov2.weights", "threshold": 1.e-2, "gpu": True} if darkflow_path is None: darkflow_path = os.path.join(str(Path.home()), 'darkflow') old_cwd = os.getcwd() os.chdir(darkflow_path) self._tfnet = TFNet(options) os.chdir(old_cwd) self._labels = [] for key in self._env_spec.goal_spec: if key[-5:] == '_diff': self._labels.append(key[:-5]) self._im_is_rgb = im_is_rgb def label(self, observations, curr_goals): obs_ims = observations[0] goals = [] for obs_im, curr_goal in zip(obs_ims, curr_goals): if self._im_is_rgb: obs_im = obs_im[..., ::-1] # make bgr im_height, im_width, _ = obs_im.shape results = self._tfnet.return_predict(obs_im) boxes = self._get_boxes(results) goal = self._get_goal(boxes, curr_goal, im_height, im_width) goals.append(goal) return goals def _get_boxes(self, results): boxes = [] for result in results: if result['label'] in self._labels: boxes.append(result) return boxes def _get_goal(self, boxes, curr_goal, im_height, im_width): # goal is weight of each goal and then desired center pixel goals = {} min_areas = {} for label in self._labels: goals[label] = (0., 0.) # (in image, normalized diff to middle) min_areas[label] = np.inf im_mid = 0.5 * (im_width - 1.) for box in boxes: label = box['label'] br_x = box['bottomright']['x'] br_y = box['bottomright']['y'] tl_x = box['topleft']['x'] tl_y = box['topleft']['y'] mid_point = 0.5 * np.array((br_x + tl_x, br_y + tl_y)) assert(mid_point[0] >= 0 and mid_point[0] < im_width) assert(mid_point[1] >= 0 and mid_point[1] < im_height) area = (tl_x - br_x) * (tl_y - br_y) if area < min_areas[label]: min_areas[label] = area norm_diff = (im_mid - mid_point[0]) / im_mid goals[label] = (1., norm_diff) goal_keys = list(self._env_spec.goal_spec.keys()) goal = np.array(curr_goal) for label in self._labels: idx = min([idx for idx, k in enumerate(goal_keys) if label in k]) goal[idx:idx+2] = goals[label] # NOTE: assumes in image, normalized diff to middle return goal
FRAME_HEIGHT = 480 #240 if __name__ == '__main__': camera = cv2.VideoCapture(0) print(camera.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH)) print(camera.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT)) tfnet = TFNet(options) #imgcv = cv2.imread("/Users/marco/Documents/Datasets/drAIver/object_detector/test_images/image1.jpg") #print(results) while True: if camera.isOpened(): _, imgcv = camera.read() results = tfnet.return_predict(imgcv) for result in results: if result['confidence'] > 0.5: cv2.rectangle( imgcv, (result['topleft']['x'], result['topleft']['y']), (result['bottomright']['x'], result['bottomright']['y']), (255, 0, 0)) cv2.imshow("Image", imgcv) cv2.waitKey(1)
#for feed2 tl2 = () bl2 = () fixed_top_left2 = () fixed_bottom_right2 = () flag2 = 0 time_flag2 = 0 while True: #start_time=time.time() ret1, frame1 = capture_1.read() ret2, frame2 = capture_2.read() countbottle1 = 0 countbottle2 = 0 if ret1 and ret2: results1 = tfnet.return_predict(frame1) results2 = tfnet.return_predict(frame2) for color, result in zip(colors, results1): tl = (result['topleft']['x'], result['topleft']['y']) br = (result['bottomright']['x'], result['bottomright']['y']) #the keys in result dictionary are: #[label], [confidence], [topleft][x or y], [bottomright][x or y] label = result['label'] confidence = result['confidence'] if label == 'bottle': time_flag = 0 text = '{}: {:.0f}%'.format(label, confidence * 100) frame1 = cv2.rectangle(frame1, tl, br, color, 5) frame1 = cv2.putText(frame1, text, tl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 2) btl_1 = tl
img_origin = cv2.imread('bin/yrm_origin.jpg') # 无弹幕 img_dm = cv2.imread('bin/yrm_dm.jpg') # 有弹幕 if img_origin is None or img_dm is None: print('img_origin is None or img_dm is None') exit(0) options = { 'model': 'cfg/yolo.cfg', 'load': 'bin/yolo.weights', 'threshold': 0.5, 'gpu': 0.7, # 'imgdir': 'sample_img/wa-yolo/', } tfnet = TFNet(options) boxes = tfnet.return_predict(img_origin) if len(boxes) == 0: print('there is no obj') exit(0) for result in boxes: if result['label'] != 'person': continue # 若此box为人 (tx, ty) = (result['topleft']['x'], result['topleft']['y']) (bx, by) = (result['bottomright']['x'], result['bottomright']['y']) img_dm[ty:by, tx:bx] = img_origin[ty:by, tx:bx] // 4 * 3 + img_dm[ty:by, tx:bx] // 4 while (1): cv2.imshow('image', img_dm)
class SubscribeImage(object): def __init__(self): self.options = {"model": "cfg/tiny-yolo-voc.cfg", "load": "bin/tiny-yolo-voc.weights", "threshold": 0.1, "gpu": 0.2} # self.options = {"model": "cfg/yolo.cfg", "load": "bin/yolo.weights", "threshold": 0.1, "gpu": 0.8} self.tfnet = TFNet(self.options) self.class_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] self.num_classes = len(self.class_names) # 色リストの作成 self.hsv_tuples = [(x / 80, 1., 1.) for x in range(80)] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), self.hsv_tuples)) self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),self.colors)) random.seed(10101) # Fixed seed for consistent colors across runs. random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes. random.seed(None) # Reset seed to default. self.periods = [] self.count = 0 self.class_num = 0 rospy.init_node('kinect_v1_image_sub') self.bridge = CvBridge() rospy.Subscriber('/camera/rgb/image_color', Image, self.callback) rospy.spin() def callback(self, data): try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) (rows,cols,channels) = cv_image.shape if cols > 60 and rows > 60 : cv2.circle(cv_image, (50,50), 10, 255) cv2.imshow("Image window", cv_image) self.DarkflowRos(cv_image) def DarkflowRos(self, frame): # 検出 self.start = time.time() self.items = self.tfnet.return_predict(frame) self.count += 1 self.period = time.time() - self.start if self.count % 30 == 0: print('FrameRate:' + str(1.0 / (sum(self.periods)/self.count))) self.periods.append(self.period) for item in self.items: self.tlx = item['topleft']['x'] self.tly = item['topleft']['y'] self.brx = item['bottomright']['x'] self.bry = item['bottomright']['y'] self.label = item['label'] self.conf = item['confidence'] # 自信のあるものを表示 if self.conf > 0.4: for i in self.class_names: if self.label == i: self.class_num = self.class_names.index(i) break # 検出位置の表示 cv2.rectangle(frame, (self.tlx, self.tly), (self.brx, self.bry), self.colors[self.class_num], 2) self.text = self.label + " " + ('%.2f' % self.conf) cv2.putText(frame, self.text, (self.tlx+10, self.tly-5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, self.colors[self.class_num], 2) # 表示 cv2.imshow("View", frame) k = cv2.waitKey(10) if k == ord('q'): cv2.destroyAllWindows()