def detect_attributes(image, yolo_dim, yolov3, encoder): ''' detect_attributes ''' text_results = [] image, orig_img, im_dim = prep_image(image, yolo_dim) im_dim = torch.FloatTensor(im_dim).repeat(1, 2) image_tensor = image.to(device) im_dim = im_dim.to(device) # Generate an caption from the image # prediction mode for yolo-v3 detections = yolov3(image_tensor, device, True) detections = write_results( detections, args.confidence, device, num_classes=80, nms=True, nms_conf=args.nms_thresh, ) # original image dimension --> im_dim # view_image(detections) os.system("clear") if not isinstance(detections, int): if detections.shape[0]: bboxs = detections[:, 1:5].clone() im_dim = im_dim.repeat(detections.shape[0], 1) scaling_factor = torch.min(yolo_dim / im_dim, 1)[0].view(-1, 1) detections[:, [1, 3]] -= ( yolo_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 detections[:, [2, 4]] -= ( yolo_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 detections[:, 1:5] /= scaling_factor small_object_ratio = torch.FloatTensor(detections.shape[0]) for i in range(detections.shape[0]): detections[i, [1, 3]] = torch.clamp(detections[i, [1, 3]], 0.0, im_dim[i, 0]) detections[i, [2, 4]] = torch.clamp(detections[i, [2, 4]], 0.0, im_dim[i, 1]) object_area = (detections[i, 3] - detections[i, 1]) * ( detections[i, 4] - detections[i, 2]) orig_img_area = im_dim[i, 0] * im_dim[i, 1] small_object_ratio[i] = object_area / orig_img_area detections = detections[small_object_ratio > 0.02] im_dim = im_dim[small_object_ratio > 0.02] if detections.size(0) > 0: feature = yolov3.get_feature() feature = feature.repeat(detections.size(0), 1, 1, 1) scaling_val = 16 bboxs /= scaling_val bboxs = bboxs.round() bboxs_index = torch.arange(bboxs.size(0), dtype=torch.int) bboxs_index = bboxs_index.to(device) bboxs = bboxs.to(device) roi_align = RoIAlign(args.roi_size, args.roi_size, transform_fpcoor=True).to(device) roi_features = roi_align(feature, bboxs, bboxs_index) outputs = encoder(roi_features) for i in range(detections.shape[0]): sampled_caption = [] for j in range(len(outputs) - 1): max_index = torch.max(outputs[j][i].data, 0)[1] word = attribute_pool[j][max_index] sampled_caption.append(word) # for reversion lower length and lower type c11 = sampled_caption[11] sampled_caption[11] = sampled_caption[10] sampled_caption[10] = c11 sentence = " ".join(sampled_caption) print(str(i + 1) + ": " + sentence) write( detections[i], orig_img, sentence, i + 1, coco_classes, colors, ) return text_results, orig_img
def OpenPro1(): def get_test_input(input_dim, CUDA): img = cv2.imread("imgs/messi.jpg") img = cv2.resize(img, (input_dim, input_dim)) img_ = img[:, :, ::-1].transpose((2, 0, 1)) img_ = img_[np.newaxis, :, :, :] / 255.0 img_ = torch.from_numpy(img_).float() img_ = Variable(img_) if CUDA: img_ = img_.cuda() return img_ def prep_image(img, inp_dim): """ Prepare image for inputting to the neural network. Returns a Variable """ orig_im = img dim = orig_im.shape[1], orig_im.shape[0] img = cv2.resize(orig_im, (inp_dim, inp_dim)) img_ = img[:, :, ::-1].transpose((2, 0, 1)).copy() img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) return img_, orig_im, dim def write(x, img): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) cls = int(x[-1]) label = "{0}".format(classes[cls]) print(label) write_db(label) color = random.choice(colors) cv2.rectangle(img, c1, c2, color, 1) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 cv2.rectangle(img, c1, c2, color, -1) cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1) return img def write_db(label): conn = None try: conn = sqlite3.connect( 'C:\\yolo\\pytorch-yolo-v3\\db\\final_bill.db') print('connected to db macha') sql = ''' INSERT INTO bill(item_name,price,weight,amount) VALUES(?,?,?,?) ''' sql1 = "SELECT * FROM rate WHERE item_name = ?" sql2 = "SELECT * FROM weights WHERE item_name = ?" cur = conn.cursor() q = [label] result = cur.execute(sql1, q) records = cur.fetchall() gk = 0 for row in records: print("price = ", row[1]) gk = row[1] print(result) result = cur.execute(sql2, q) weights = cur.fetchall() mk = 0 for rows in weights: print("weight = ", rows[1]) mk = rows[1] print(result) amt = gk * mk p = [label, gk, mk, amt] cur.execute(sql, p) conn.commit() conn.close() print('inserted') except Error as e: print(e) def arg_parse(): """ Parse arguements to the detect module """ parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo') parser.add_argument("--confidence", dest="confidence", help="Object Confidence to filter predictions", default=0.25) parser.add_argument("--nms_thresh", dest="nms_thresh", help="NMS Threshhold", default=0.4) parser.add_argument( "--reso", dest='reso', help= "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed", default="160", type=str) return parser.parse_args() cfgfile = "cfg/yolov3.cfg" weightsfile = "yolov3.weights" num_classes = 80 args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 + num_classes model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() videofile = 'video.avi' cap = cv2.VideoCapture(0) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) #print(output) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim # im_dim = im_dim.repeat(output.size(0), 1) output[:, [1, 3]] *= frame.shape[1] output[:, [2, 4]] *= frame.shape[0] classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) list(map(lambda x: write(x, orig_im), output)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print("FPS of the video is {:5.2f}".format(frames / (time.time() - start))) else: break
def video_yolo_ready2(): cfgfile = "cfg/yolov3.cfg" weightsfile = "yolov3.weights" num_classes = 5 confidence = 0.6 nms_thesh = 0.5 start = 0 CUDA = torch.cuda.is_available() num_classes = 5 bbox_attrs = 5 + num_classes model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = 416 # 160을 보다 낮은 수 넣으면속도는 빨라짐(단,32의배수값만넣어야함) inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() videofile = 'yolo/static/videos/cctv3_video.mp4' cap = cv2.VideoCapture(videofile) assert cap.isOpened(), 'Cannot capture source' global frames global picture frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) global label_list global flag if type(output) == int: print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) picture = orig_im label_list = list(map(lambda x: write(x, orig_im), output)) print("label_list : ", label_list) collision(label_list) flag = 0 #이미지 저장하는 코드 ret2, jpeg2 = cv2.imencode('.jpg', orig_im) detect_image_byte = jpeg2.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + detect_image_byte + b'\r\n\r\n') key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) picture = orig_im label_list = list(map(lambda x: write(x, orig_im), output)) print(label_list) collision(label_list) flag = 0 ret2, jpeg2 = cv2.imencode('.jpg', orig_im) detect_image_byte = jpeg2.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + detect_image_byte + b'\r\n\r\n') key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print("FPS of the video is {:5.2f}".format(frames / (time.time() - start))) else: break
videofile = args.video cap = cv2.VideoCapture(videofile) else: # On mac, 0 is bulit-in camera and 1 is USB webcam on Mac # On linux, 0 is video0, 1 is video1 and so on cap = cv2.VideoCapture(args.source) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, orig_dim = prep_image(frame, model_dim) orig_dim = torch.FloatTensor(orig_dim).repeat(1, 2) with torch.no_grad(): output = model(img) # output is, after write_results, [batch index, x1, y1, x2, y2, objectness score, class index, class prob] output = write_results(output, confidence, num_classes, model_dim, orig_dim, nms=True, nms_conf=nms_thesh) # If no preds, just show image and go to next pred
def main_main(): global classes, colors cfgfile = "cfg/yolov3.cfg" weightsfile = "yolov3.weights" num_classes = 80 args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 + num_classes model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() videofile = 'video.avi' cap = cv2.VideoCapture(0) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 #print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim #print(float(inp_dim),inp_dim) # im_dim = im_dim.repeat(output.size(0), 1) output[:, [1, 3]] *= frame.shape[1] output[:, [2, 4]] *= frame.shape[0] classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) list(map(lambda x: write(x, orig_im), output)) cv2.imshow("frame", orig_im) #show the frame / output key = cv2.waitKey(1) #time.sleep(1) if key & 0xFF == ord('q'): break frames += 1 #print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) else: break
def object_detection(): """ Will load the pre-trained weight file and the cfg file which has knowledge of 80 different objects Using the arg_parse function it will compare the confidence and threshold value of every object in a given frame """ cfgfile = "cfg/yolov3.cfg" weightsfile = "yolov3.weights" args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 num_classes = 80 width,height = 640, 480 q = queue.Queue() CUDA = torch.cuda.is_available() bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(cfgfile) model.load_weights(weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() #### Test the performance of the model on a Static Image # model(get_test_input(inp_dim, CUDA), CUDA) # model.eval() #### #### Test the performance of the model on any video file videofile = 'video3.avi' #### #### If you are using any thrird party camera access using IP address you can use this part of the code # address = ConnectionServer.connect() # address = 'http://' + address[0] + ':8000/stream.mjpg' # print("Fetching Video from", address) #### # assert cap.isOpened(), 'Cannot capture source' #### If camera is not found assert this message count = 0 frames = 0 start = time.time() cap = cv2.VideoCapture(0) # while cap.isOpened(): # ret, frame = cap.read() while True: ret, frame = cap.read() # if ret: img, orig_im, dim = prep_image(frame, inp_dim) #### Pre-processing part of every frame that came from the source im_dim = torch.FloatTensor(dim).repeat(1,2) if CUDA: #### If you have a gpu properly installed then it will run on the gpu im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): #### Set the model in the evaluation mode output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) #### Localize the objects in a frame if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("Object Detection Window", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue #im_dim = im_dim.repeat(output.size(0), 1) #scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1) output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim im_dim = im_dim.repeat(output.size(0), 1) output[:, [1, 3]] *= frame.shape[1] output[:, [2, 4]] *= frame.shape[0] #output[:,1:5] /= scaling_factor # for i in range(output.shape[0]): # output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0]) # output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1]) classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) list(map(lambda x: write(x, orig_im, classes, colors), output)) cv2.imshow("Object Detection Window", orig_im) #### Generating the window key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) l = print_labels()[0] print(l) hog = cv2.HOGDescriptor() hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector()) # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) found,w = hog.detectMultiScale(frame, winStride=(8,8), padding=(32,32), scale=1.05) # time.sleep(2) # print(found) # print(len(found)) # draw_detections(frame, found) get_number_of_object, get_distance= draw_detections(frame,found) if get_number_of_object >=1 and get_distance!=0: feedback = ("{}".format(get_number_of_object)+ " " +l+" at {}".format(round(get_distance))+"Inches") speak.Speak(feedback) print(feedback) else: feedback = ("{}".format("1")+ " " +l) speak.Speak(feedback) print(feedback) # Stop the capture cap.release() # Destory the window cv2.destroyAllWindows()
def detect_objects(self, image_path): image_prep = prep_image(image_path, self.inp_dim) im_batches = [image_prep[0]] orig_ims = [image_prep[1]] im_dim_list = [image_prep[2]] im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) img_path = image_path if self.CUDA: im_dim_list = im_dim_list.cuda() write = False self.model(get_test_input(self.inp_dim, self.CUDA), self.CUDA) objs = {} i = 0 for batch in im_batches: if self.CUDA: batch = batch.cuda() with torch.no_grad(): prediction = self.model(Variable(batch), self.CUDA) prediction = prediction[:, self.scales_indices] prediction = write_results(prediction, self.confidence, self.num_classes, nms=True, nms_conf=self.nms_thresh) prediction[:, 0] += i * self.batch_size if not write: output = prediction write = 1 else: output = torch.cat((output, prediction)) for im_num, image in enumerate(img_path[i * self.batch_size:min( (i + 1) * self.batch_size, len(img_path))]): im_id = i * self.batch_size + im_num objs = [ self.classes[int(x[-1])] for x in output if int(x[0]) == im_id ] print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs))) print( "----------------------------------------------------------" ) i += 1 if self.CUDA: torch.cuda.synchronize() try: output except NameError: print("No detections were made") exit() im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long()) scaling_factor = torch.min(self.inp_dim / im_dim_list, 1)[0].view(-1, 1) output[:, [1, 3]] -= (self.inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (self.inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i, 1]) def write(x, batches, results): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) img = results[int(x[0])] cls = int(x[-1]) label = "{0}".format(self.classes[cls]) color = random.choice(self.colors) cv2.rectangle(img, c1, c2, color, 1) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 cv2.rectangle(img, c1, c2, color, -1) cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1) return img list(map(lambda x: write(x, im_batches, orig_ims), output)) det_names = pd.Series(img_path).apply( lambda x: "{}/det_{}".format(self.save_directory, x.split("/")[-1])) cv2.imwrite(det_names[0], orig_ims[0]) torch.cuda.empty_cache() ret_path = det_names[0] return ret_path, objs, orig_ims[0]
def main(args): # Image preprocessing transform = transforms.Compose([transforms.ToTensor()]) # Load vocabulary wrapper # Build the models #CUDA = torch.cuda.is_available() num_classes = 80 yolov3 = Darknet(args.cfg_file) yolov3.load_weights(args.weights_file) yolov3.net_info["height"] = args.reso inp_dim = int(yolov3.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 print("yolo-v3 network successfully loaded") attribute_size = [15, 7, 3, 5, 8, 4, 15, 7, 3, 5, 3, 3, 4] encoder = EncoderClothing(args.embed_size, device, args.roi_size, attribute_size) # Prepare an image images = "test" try: list_dir = os.listdir(images) # list_dir.sort(key=lambda x: int(x[:-4])) imlist = [ osp.join(osp.realpath('.'), images, img) for img in list_dir if os.path.splitext(img)[1] == '.jpg' or os.path.splitext(img)[1] == '.JPG' or os.path.splitext(img)[1] == '.png' ] except NotADirectoryError: imlist = [] imlist.append(osp.join(osp.realpath('.'), images)) print('Not a directory error') except FileNotFoundError: print("No file or directory with the name {}".format(images)) exit() yolov3.to(device) encoder.to(device) yolov3.eval() encoder.eval() encoder.load_state_dict(torch.load(args.encoder_path)) for inx, image in enumerate(imlist): #print(image) image, orig_img, im_dim = prep_image(image, inp_dim) im_dim = torch.FloatTensor(im_dim).repeat(1, 2) image_tensor = image.to(device) im_dim = im_dim.to(device) # Generate an caption from the image detections = yolov3(image_tensor, device, True) # prediction mode for yolo-v3 detections = write_results(detections, args.confidence, num_classes, device, nms=True, nms_conf=args.nms_thresh) # original image dimension --> im_dim #view_image(detections) os.system('clear') if type(detections) != int: if detections.shape[0]: bboxs = detections[:, 1:5].clone() im_dim = im_dim.repeat(detections.shape[0], 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) detections[:, [1, 3]] -= ( inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 detections[:, [2, 4]] -= ( inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 detections[:, 1:5] /= scaling_factor small_object_ratio = torch.FloatTensor(detections.shape[0]) for i in range(detections.shape[0]): detections[i, [1, 3]] = torch.clamp(detections[i, [1, 3]], 0.0, im_dim[i, 0]) detections[i, [2, 4]] = torch.clamp(detections[i, [2, 4]], 0.0, im_dim[i, 1]) object_area = (detections[i, 3] - detections[i, 1]) * ( detections[i, 4] - detections[i, 2]) orig_img_area = im_dim[i, 0] * im_dim[i, 1] small_object_ratio[i] = object_area / orig_img_area detections = detections[small_object_ratio > 0.02] im_dim = im_dim[small_object_ratio > 0.02] if detections.size(0) > 0: feature = yolov3.get_feature() feature = feature.repeat(detections.size(0), 1, 1, 1) #orig_img_dim = im_dim[:, 1:] #orig_img_dim = orig_img_dim.repeat(1, 2) scaling_val = 16 bboxs /= scaling_val bboxs = bboxs.round() bboxs_index = torch.arange(bboxs.size(0), dtype=torch.int) bboxs_index = bboxs_index.to(device) bboxs = bboxs.to(device) roi_align = RoIAlign(args.roi_size, args.roi_size, transform_fpcoor=True).to(device) roi_features = roi_align(feature, bboxs, bboxs_index) # print(roi_features) # print(roi_features.size()) #roi_features = roi_features.reshape(roi_features.size(0), -1) #roi_align_feature = encoder(roi_features) outputs = encoder(roi_features) #attribute_size = [15, 7, 3, 5, 7, 4, 15, 7, 3, 5, 4, 3, 4] #losses = [criteria[i](outputs[i], targets[i]) for i in range(len(attribute_size))] for i in range(detections.shape[0]): sampled_caption = [] #attr_fc = outputs[] for j in range(len(outputs)): #temp = outputs[j][i].data max_index = torch.max(outputs[j][i].data, 0)[1] word = attribute_pool[j][max_index] sampled_caption.append(word) c11 = sampled_caption[11] sampled_caption[11] = sampled_caption[10] sampled_caption[10] = c11 sentence = ' '.join(sampled_caption) # again sampling for testing #print ('---------------------------') print(str(i + 1) + ': ' + sentence) write(detections[i], orig_img, sentence, i + 1, coco_classes, colors) #list(map(lambda x: write(x, orig_img, captions), detections[i].unsqueeze(0))) cv2.imshow("frame", orig_img) key = cv2.waitKey(0) os.system('clear') if key & 0xFF == ord('q'): break
def yolo_human_det(img, model=None, reso=416, confidence=0.70): args = arg_parse() # args.reso = reso inp_dim = reso num_classes = 80 CUDA = torch.cuda.is_available() if model is None: model = load_model(args, CUDA, inp_dim) if type(img) == str: assert os.path.isfile(img), 'The image path does not exist' img = cv2.imread(img) img, ori_img, img_dim = preprocess.prep_image(img, inp_dim) img_dim = torch.FloatTensor(img_dim).repeat(1, 2) with torch.no_grad(): if CUDA: img_dim = img_dim.cuda() img = img.cuda() output = model(img, CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=args.nms_thresh, det_hm=True) if len(output) == 0: return None, None img_dim = img_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / img_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * img_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * img_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, img_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, img_dim[i, 1]) bboxs = [] scores = [] for i in range(len(output)): item = output[i] bbox = item[1:5].cpu().numpy() # conver float32 to .2f data bbox = [round(i, 2) for i in list(bbox)] score = item[5].cpu().numpy() bboxs.append(bbox) scores.append(score) scores = np.expand_dims(np.array(scores), 1) bboxs = np.array(bboxs) return bboxs, scores
def SlowFast(model, CUDA, videofile, video_name): # Video file on which you want to run the model cap = cv2.VideoCapture(videofile) assert cap.isOpened(), 'Cannot capture source' frames = 0 last = np.array([]) last_time = time.time() start = time.time() #######for sp detec########## buffer = deque(maxlen=64) resize_width = 400 resize_height = 300 count = 0 while cap.isOpened(): ret, frame = cap.read() if ret: #######for sp detec########## f = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # will resize frames if not already final size f = cv2.resize(frame, (resize_width, resize_height)) f = normalize(f) buffer.append(f) frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) print(frame_height, frame_width) scale = [resize_width / frame_width, resize_height / frame_height] img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results( output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 # ZZ+= 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) # print(ZZ) #cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim/im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1))/2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1))/2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp( output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp( output[i, [2, 4]], 0.0, im_dim[i, 1]) output = output.cpu().data.numpy() # print(output) bbox_xywh = output[:, 1:5] bbox_xywh[:, 2] = bbox_xywh[:, 2] - bbox_xywh[:, 0] bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1] bbox_xywh[:, 0] = bbox_xywh[:, 0] + (bbox_xywh[:, 2])/2 bbox_xywh[:, 1] = bbox_xywh[:, 1] + (bbox_xywh[:, 3])/2 cls_conf = output[:, 5] cls_ids = output[:, 7] if bbox_xywh is not None: mask = cls_ids == 0.0 bbox_xywh = bbox_xywh[mask] cls_conf = cls_conf[mask] outputs = deepsort.update(bbox_xywh, cls_conf, orig_im) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] if len(buffer) == 64: if count % 3 == 0: b = buffer a = time.time() b = np.array(b, dtype=np.float32) print("time:", time.time() - a) b = to_tensor(b) image_batch = torch.tensor( b, dtype=torch.float).unsqueeze(0).cuda() bbox_xyxy = np.array(bbox_xyxy, dtype=np.float) bbox_xyxy[:, [0, 2]] *= scale[0] bbox_xyxy[:, [1, 3]] *= scale[1] detector_bboxes = torch.tensor( bbox_xyxy, dtype=torch.float).unsqueeze(0).cuda() with torch.no_grad(): detection_bboxes, detection_classes, detection_probs = \ model_sf.eval().forward(image_batch, detector_bboxes_batch=detector_bboxes) detection_bboxes = np.array(detection_bboxes.cpu()) detection_classes = np.array(detection_classes) detection_probs = np.array(detection_probs) # Get the corresponding classification label detection_bboxes[:, [0, 2]] /= scale[0] detection_bboxes[:, [1, 3]] /= scale[1] imshow(video_name, frame, detection_bboxes, detection_classes, detection_probs, identities, count) count += 1 #cv2.imshow("frame", orig_im) key = cv2.waitKey(0) if key & 0xFF == ord('q'): break frames += 1 # ZZ += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) # print(ZZ) else: break convert_to_video = f"""ffmpeg -framerate 30 -pattern_type glob -i "demo/outputs/{video_name}/frames/*.jpg" -c:v libx264 -pix_fmt yuv420p demo/outputs/{video_name}/videos/video.mp4""" os.system(convert_to_video)
if __name__ == '__main__': args = arg_parse() print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights("yolov3.weights") print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) model.eval() img = cv2.imread(args.image) img, orig_im, dim = prep_image(args.image, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) with torch.no_grad(): output = model(torch.autograd.Variable(img), False) classes = load_classes(args.classes) output = write_results(output, confidence=0.5, num_classes=len(classes), nms=True, nms_conf=0.4) class_counter = Counter([classes[int(obj[-1])] for obj in output]) print("Class counts: " + str(class_counter)) tot_objects = output.size(0)
def main(): args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 #print("loc: ", loc) CUDA = torch.cuda.is_available() num_classes = 80 CUDA = torch.cuda.is_available() bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() status = False option = args.option #read video based on option if option == "webcam": # if loc == "front": cap_front = cv2.VideoCapture(0) #else: cap_back = cv2.VideoCapture(1) elif option == "video": videofile1 = args.file1 videofile2 = args.file2 cap_front = cv2.VideoCapture(videofile1) cap_back = cv2.VideoCapture(videofile2) else: imagefile1 = args.file1 imagefile2 = args.file2 cap_front = cv2.VideoCapture(imagefile1) cap_back = cv2.VideoCapture(imagefile2) status = True assert cap_back.isOpened(), 'Cannot capture source' assert cap_front.isOpened(), 'Cannot capture source' max_val_f = 0 max_val_b = 0 tmp = 0 classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) while cap_back.isOpened() or cap_front.isOpened(): print("-----------------------------------") start = time.time() #read video ret_front, frame_front = cap_front.read() ret_back, frame_back = cap_back.read() if ret_front and ret_back: #preprocessing image img_f, orig_im_f, dim_f = prep_image(frame_front, inp_dim) img_b, orig_im_b, dim_b = prep_image(frame_back, inp_dim) im_dim_f = torch.FloatTensor(dim_f).repeat(1, 2) im_dim_b = torch.FloatTensor(dim_b).repeat(1, 2) if CUDA: im_dim_f = im_dim_f.cuda() img_f = img_f.cuda() im_dim_b = im_dim_b.cuda() img_b = img_b.cuda() with torch.no_grad(): output_f = model(Variable(img_f), CUDA) output_b = model(Variable(img_b), CUDA) output_f = write_results(output_f, confidence, num_classes, nms=True, nms_conf=nms_thesh) output_b = write_results(output_b, confidence, num_classes, nms=True, nms_conf=nms_thesh) im_dim_f = im_dim_f.repeat(output_f.size(0), 1) scaling_factor_f = torch.min(inp_dim / im_dim_f, 1)[0].view(-1, 1) im_dim_b = im_dim_b.repeat(output_b.size(0), 1) scaling_factor_b = torch.min(inp_dim / im_dim_b, 1)[0].view(-1, 1) #front output_f[:, [1, 3]] -= ( inp_dim - scaling_factor_f * im_dim_f[:, 0].view(-1, 1)) / 2 output_f[:, [2, 4]] -= ( inp_dim - scaling_factor_f * im_dim_f[:, 1].view(-1, 1)) / 2 output_f[:, 1:5] /= scaling_factor_f for i in range(output_f.shape[0]): output_f[i, [1, 3]] = torch.clamp(output_f[i, [1, 3]], 0.0, im_dim_f[i, 0]) output_f[i, [2, 4]] = torch.clamp(output_f[i, [2, 4]], 0.0, im_dim_f[i, 1]) #back output_b[:, [1, 3]] -= ( inp_dim - scaling_factor_b * im_dim_b[:, 0].view(-1, 1)) / 2 output_b[:, [2, 4]] -= ( inp_dim - scaling_factor_b * im_dim_b[:, 1].view(-1, 1)) / 2 output_b[:, 1:5] /= scaling_factor_b for i in range(output_b.shape[0]): output_b[i, [1, 3]] = torch.clamp(output_b[i, [1, 3]], 0.0, im_dim_b[i, 0]) output_b[i, [2, 4]] = torch.clamp(output_b[i, [2, 4]], 0.0, im_dim_b[i, 1]) #result cnt_f = list( map(lambda x: write(x, orig_im_f, classes, colors)[1], output_f)).count("person") cnt_b = list( map(lambda x: write(x, orig_im_b, classes, colors)[1], output_b)).count("person") if max_val_f < cnt_f: max_val_f = cnt_f if max_val_b < cnt_b: max_val_b = cnt_b print("front person : " + str(cnt_f)) print("back person : " + str(cnt_b)) print("max_val_f : " + str(max_val_f)) print("max_val_b : " + str(max_val_b)) #devide case case_f = check_person(max_val_f, "front") case_b = check_person(max_val_b, "back") after_img_f = represent_case(orig_im_f, case_f) after_img_b = represent_case(orig_im_b, case_b) #visualization f_h, f_w, f_d = after_img_f.shape b_h, b_w, b_d = after_img_b.shape h = max(f_h, b_h) after_img = np.zeros((h, f_w + b_w, f_d), np.uint8) after_img[0:f_h, 0:f_w] = after_img_f[:, :] after_img[0:b_h, f_w:f_w + b_w] = after_img_b[:, :] cv2.imshow("frame", after_img) if status: cv2.waitKey(-1) cv2.imwrite('output/frame%04d.jpg' % (tmp), after_img) tmp += 1 key = cv2.waitKey(1) if key & 0xFF == ord('q'): break print("\ndetecting time : " + str(time.time() - start)) if case_f == "red" and case_b == "green": print("Go back!") else: break
def getFrames(): cfgfile = "cfg/yolov3.cfg" weightsfile = "yolov3.weights" num_classes = 80 args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 + num_classes model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() videofile = 'video.avi' cap = cv2.VideoCapture(0) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) if CUDA: im_dim = im_dim.cuda() img = img.cuda() output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) continue output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim output[:, [1, 3]] *= frame.shape[1] output[:, [2, 4]] *= frame.shape[0] classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) list(map(lambda x: write(x, orig_im, classes, colors), output)) ret, jpg = cv2.imencode("test.jpg", orig_im) yield b'--boundary\r\nContent-Type: image/jpeg\r\n\r\n' + jpg.tostring( ) + b'\r\n\r\n' frames += 1 print("FPS of the video is {:5.2f}".format(frames / (time.time() - start))) else: break
def video_demo(frame, inp_dim, quadrangle, onnx2trt, deepsort, classes, colors, h_inv): img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) im_dim = im_dim.cuda() start = time.time() output = onnx2trt.detect_thread(frame, img) end = time.time() - start if type(output) == int: return orig_im, [] #rescale bbox 416,416 --> 1920 1080 im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) # output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor # target_num = output.shape[0] for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) # 针对性改进__BaiYu write_select 替代 write ,只显示四边形内部检测信息 result_list = list(map(lambda x: write_select(x, orig_im, classes, colors, quadrangle)[1], output)) # 所有类型的目标都跟踪 bbox_Tracking = [] #矩形框 cls_ids_Tracking = [] #类别下标 cls_conf = [] #置信度 for bi in range(len(result_list) - 1, -1, -1): # if result_list[bi][0] == 2 or result_list[bi][0] == 3: if result_list[bi][-1] <= 0: #根据置信度,删掉不在ROI区域的目标 continue bbox_Tracking.append(result_list[bi][3]) cls_ids_Tracking.append(result_list[bi][4]) cls_conf.append(result_list[bi][5]) outputs_tracking = [] # # if bbox_Tracking is not None: bbox_xcycwh = [] # 转化为centerX, centerY, width, height bbox形式 for i in range(len(bbox_Tracking)): (cx, cy) = ((bbox_Tracking[i][0] + bbox_Tracking[i][2]) / 2.0, (bbox_Tracking[i][1] + bbox_Tracking[i][3]) / 2.0) (w, h) = (bbox_Tracking[i][2] - bbox_Tracking[i][0], bbox_Tracking[i][3] - bbox_Tracking[i][1]) bbox_xcycwh.append([cx, cy, w, h]) bbox_xcycwh = np.asarray(bbox_xcycwh) cls_conf = np.asarray(cls_conf) # global deepsort if bbox_xcycwh is not None and len(bbox_xcycwh) > 0: outputs_tracking = deepsort.update(bbox_xcycwh, cls_conf, cls_ids_Tracking, frame) end = time.time() print('runtime: {0:.2f} ms'.format((end - start)*1000)) if outputs_tracking is not None and len(outputs_tracking) > 0: # if len(boxes) > 0: bbox_xyxy = outputs_tracking[:, :4] #x1, y1, x2, y2 identities = outputs_tracking[:, 5] #track_id clsTracking = outputs_tracking[:, 4] #classLabel index trace = outputs_tracking[:, -1] # trace of object #打印追踪后的框bbox ids ori_im = draw_bboxes(frame, bbox_xyxy, identities, clsTracking, trace, h_inv) return orig_im, outputs_tracking
def detect_sign(frame,confidence,inp_dim,CUDA,model,num_classes,nms_thesh,classes_gtsrb): try: b,g,r = cv2.split(frame) # get b,g,r frame_rgb = cv2.merge([r,g,b]) # switch it to rgb except: return None,"fsdaf" img, orig_im, dim = prep_image(frame, inp_dim) sign = True im_dim = torch.FloatTensor(dim).repeat(1,2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) if type(output) == int: print('no prediction observed') else: im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1) output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2 output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2 output[:,1:5] /= scaling_factor gtsrb_labels = np.zeros(output.shape[0]) _signs_ = [] _outputs_ = [] for i in range(output.shape[0]): output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0]) output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1]) if output[i][-1].round() == 0: y1,y2,x1,x2 =output[i][1].int(),output[i][3].int(),output[i][2].int(),output[i][4].int() img = frame_rgb[x1:x2,y1:y2] if img.shape[0]==0 and img.shape[1]==0 : return None, frame_rgb out_vector = output[i][1:].cpu().numpy() try: processed_img = np.array(preprocess_img(img)) processed_img_uint = np.transpose((processed_img*255).astype(np.uint8),(1,2,0)) processed_img_batch = np.expand_dims(processed_img,axis=0) gtsrb_labels[i] = 0 #classification_model.predict_classes(processed_img_batch) output[i][-1] = gtsrb_labels[i] out_vector[-1] = gtsrb_labels[i] frame_rgb, c1, c2, cls = write(output[i], frame_rgb, gtsrb_labels[i],classes_gtsrb) out_vector[0] = c1[0] out_vector[1] = c1[1] out_vector[2] = c2[0] out_vector[3] = c2[1] if (int(c1[0]) == 0 and int(c1[1]) == 0) or (int(c2[1]) == 0 and int(c2[1]) == 0): return None, frame_rgb except Exception as e: print(e) _outputs_.append(out_vector) print(_outputs_) else: output[i][-1] = output[i][-1] + 21 if _outputs_ == []: return None, frame_rgb return np.array(_outputs_), frame_rgb
def drone(): args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 CUDA = torch.cuda.is_available() bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model(get_test_input(inp_dim, CUDA), CUDA) model.eval() videofile = args.video cap = cv2.VideoCapture(0) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1,2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1) output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2 output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2 output[:,1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0]) output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1]) if output[0][0] == 0.0: print("Drone Detected") break #break #colors = pkl.load(open("pallete", "rb")) list(map(lambda x: write(x, orig_im), output)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 600 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) else: break
def demo(): params = { "video": "video.avi", # Video to run detection upon "dataset": "pasacal", # Dataset on which the network has been trained "confidence": 0.5, # Object Confidence to filter predictions "nms_thresh": 0.4, # NMS Threshold "cfgfile": "cfg/yolov3.cfg", # Config file "weightsfile": "yolov3.weights", # Weightsfile "repo": 416 # Input resolution of the network. Increase to increase accuracy. Decrease to increase speed } confidence = float(params["confidence"]) nms_thesh = float(params["nms_thresh"]) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 #num_classes bboxes = [] xywh = [] print("Loading network.....") model = Darknet(params["cfgfile"]) model.load_weights(params["weightsfile"]) print("Network successfully loaded") model.net_info["height"] = params["repo"] inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() videofile = params["video"] # activate our centroid tracker (H, W) = (None, None) ct = CentroidTracker(maxDisappeared=40, maxDistance=50) trackers = [] trackableObjects = {} totalFrames = 0 totalDown = 0 totalUp = 0 # set 0 for debug cap = cv2.VideoCapture(0) fps = FPS().start() rects = [] status = "Waiting.." assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ) print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) for i in output: x0 = i[1].int() y0 = i[2].int() x1 = i[3].int() y1 = i[4].int() bbox = (x0, y0, x1, y1) bboxes.append(bbox) print(bbox) w = x1 - x0 h = y1 - y0 xywh.append((x0, y0, w, h)) print(x0, y0, w, h) tracker = dlib.correlation_tracker() rect = dlib.rectangle(x0, y0, x1, y1) tracker.start_track(rgb, rect) trackers.append(tracker) for tracker in trackers: # set the status of the system to tracking status = "Tracking.." # update the tracker and grap the update position tracker.update(rgb) pos = tracker.get_position() # Unpack the position x0 = int(pos.left()) y0 = int(pos.top()) x1 = int(pos.right()) y1 = int(pos.bottom()) #add the bounding box coordiants to the rectangle rects.append((x0, y0, x1, y1)) # moving 'up' or 'down' cv2.line(frame, (0, h // 2), (w, h // 2), (0, 255, 255), 2) objects = ct.update(rects) # Loop through the tracked objects for (objectID, centroid) in objects.items(): to = trackableObjects.get(objectID, None) if to is None: to = TrackableObject(objectID, centroid) else: y = [c[1] for c in to.centroids] direction = centroid[1] - np.mean(y) to.centroids.append(centroid) if not to.counted: # if the direction is negative # indicatin gthe object is moving up # and the centroid is above the center line # count the object if direction < 0 and centroid[1] < h // 2: totalUp += 1 to.counted = True # if the direction is positive # indicating the object is moving down # and centroid is below the center line elif direction > 0 and centroid[1] > h // 2: totalDown += 1 to.counted = True # store the trackable object in the dictionary trackableObjects[objectID] = to #draw both the ID of the object and the centroid of the object # on the output frame text = "ID {}".format(objectID) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) info = [("Up", totalUp), ("Down", totalDown), ("Status", status)] for (i, (k, v)) in enumerate(info): text = "{}: {}".format(k, v) cv2.putText(frame, text, (10, h - ((i * 20) + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 225), 2) #return bboxes classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) # write bbox list(map(lambda x: write(x, orig_im, classes, colors), output)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 fps.update() fps.stop() print("[INFO] elapsed time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.2f}".format(fps.fps())) #return xywh else: break
stanfordBookstore = cv2.VideoCapture(stanfordBookstorePath) assert capCam1.isOpened(), 'not capture source' assert stanfordBookstore.isOpened( ), 'cannot capture Stanford bookstore video' frames = 0 start = time.time() while capCam1.isOpened() and stanfordBookstore.isOpened(): capCam1 = cv2.VideoCapture(cam1.url) ret, frame = capCam1.read() retSB, frameSB = stanfordBookstore.read() if ret and retSB: img, orig_im, dim = prep_image(frame, inp_dim) imgSB, orig_imSB, dimSB = prep_image(frameSB, inp_dim) im_dimSB = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() im_dimSB = im_dimSB.cuda() imgSB = imgSB.cuda() output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes,
def process(videofile, model, args): print(videofile) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) num_classes = 80 CUDA = torch.cuda.is_available() bbox_attrs = 5 + num_classes model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 cap = cv2.VideoCapture(videofile) FRAME_WIDTH = cap.get(3) FRAME_HEIGHT = cap.get(4) FRAME_FPS = cap.get(5) FRAME_FOURCC = cap.get(6) # print (FRAME_WIDTH, FRAME_HEIGHT, FRAME_FPS, FRAME_FOURCC) # fourcc = cv2.VideoWriter_fourcc(*'XVID') output_file = args.output + 'result_' + videofile.replace(args.videos, '') print(output_file) out = cv2.VideoWriter(output_file, int(FRAME_FOURCC), FRAME_FPS, (int(FRAME_WIDTH), int(FRAME_HEIGHT))) print(FRAME_WIDTH, FRAME_HEIGHT) assert cap.isOpened(), 'Cannot capture source' frames = 0 # start = time.time() start_time = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) # print('============================================================') if not args.noshow: cv2.imshow("frame", orig_im) if args.output is not None: out.write(orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) list(map(lambda x: write(x, orig_im), output)) if not args.noshow: cv2.imshow("frame", orig_im) if args.output is not None: out.write(orig_im) # cv2.imshow("frame", orig_im) # out.write(orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) else: break # fourcc = cv2.writer (*'XVID') cap.release() out.release() end_time = time.time() print("time: {}".format(str(end_time - start_time)))
def image_get(q, window_name): cfgfile = "cfg/yolov3.cfg" weightsfile = "yolov3.weights" timeF = 20 k = 0 n = 0 # 计数 frames = 0 i = 0 start = 0 start = time.time() args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) CUDA = torch.cuda.is_available() num_classes = 2 bbox_attrs = 5 + num_classes model = Darknet(args.cfgfile) if args.weights_path.endswith(".weights"): # Load darknet weights model.load_darknet_weights(args.weights_path) else: # Load checkpoint weights model.load_state_dict(torch.load(args.weights_path)) model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() cv2.namedWindow(window_name, flags=cv2.WINDOW_FREERATIO) while True: frame = q.get() img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim # im_dim = im_dim.repeat(output.size(0), 1) output[:, [1, 3]] *= frame.shape[1] output[:, [2, 4]] *= frame.shape[0] classes = load_classes('data/classes.names') colors = pkl.load(open("pallete", "rb")) list(map(lambda x: write(classes, colors, x, orig_im), output)) list1 = list(map(lambda x: write1(x, orig_im), output)) cv2.imshow(window_name, orig_im)#显示视频 cv2.waitKey(1) frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) n = n + 1 i += 1 if (n % timeF == 0): # 每隔timeF帧进行存储操作 for j in range(0, len(list1)): if list1[j] == 1: k = k + 1 if list1[j] == 0: k = 0 if k != 0: cv2.imwrite('camera/{}.jpg'.format(i), orig_im) # 当识别到未带安全帽时存储为图像
ttl_num = len(imlist) inter = 0 detect_flag = False center_pos = lambda x: (x[0] + x[1])/2 # deal with initial identities offset = 0 print('Computing initial identities...') while True: frame, ogl, dim = prep_image(imlist[offset], inp_dim) position_pre = measure(frame, dim)[:, 1:5].numpy() position_pre[:, 0] = [(x[0]+x[2])/2 for x in position_pre] position_pre[:, 1] = [(x[1]+x[3])/2 for x in position_pre] frame, ogl, dim = prep_image(imlist[offset + interval], inp_dim) position_post = measure(frame, dim)[:, 1:5].numpy() position_post[:, 0] = [(x[0]+x[2])/2 for x in position_post] position_post[:, 1] = [(x[1]+x[3])/2 for x in position_post] identity.max_dim = dim pos_map, paired = pair_position(position_pre, position_post) if paired:
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fourcc = cv2.VideoWriter_fourcc(*'XVID') queue = Queue() recorderthread = RecorderThread(queue, './output', fourcc, (width, height)) recorderthread.start() frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim, args.rotation) im_dim = torch.FloatTensor(dim).repeat(1,2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
def main(args, model): images = args.images batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) CUDA = torch.cuda.is_available() num_classes = 80 classes = load_classes('data/coco.names') model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 # If there's a GPU availible, put the model on GPU if CUDA: model.cuda() # Set the model in evaluation mode model.eval() read_dir = time.time() # Detection phase try: imlist = [ osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] == '.jpeg' or os.path.splitext(img)[1] == '.jpg' ] except NotADirectoryError: imlist = [osp.join(osp.realpath('.'), images)] except FileNotFoundError: print("No file or directory with the name {}".format(images)) exit() if not os.path.exists(args.det): os.makedirs(args.det) load_batch = time.time() batches = [prep_image(img, inp_dim) for img in imlist] im_batches = [x[0] for x in batches] # each shape (1, 3, H, W) resized H, W orig_ims = [x[1] for x in batches] # each shape (1, 3, H0, W0) not resized im_dim_list = torch.FloatTensor([x[2] for x in batches ]).repeat(1, 2) # (nr_img, 4) if CUDA: im_dim_list = im_dim_list.cuda() if batch_size != 1: leftover = 1 if len(im_dim_list) % batch_size else 0 num_batches = len(imlist) // batch_size + leftover im_batches = [ torch.cat( (im_batches[i * batch_size:min((i + 1) * batch_size, len(im_batches))])) for i in range(num_batches) ] i = 0 write = False start_det_loop = time.time() for batch in im_batches: # load the image if CUDA: batch = batch.cuda() with torch.no_grad(): prediction = model(batch, CUDA) prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(prediction) == int: i += 1 continue prediction[:, 0] += i * batch_size if not write: output = prediction write = 1 else: output = torch.cat((output, prediction)) i += 1 if CUDA: torch.cuda.synchronize() try: output except NameError: print("No detections were made") exit() im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long()) scaling_factor = torch.min(inp_dim / im_dim_list, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i, 1]) output_recast = time.time() class_load = time.time() draw = time.time() def _pad_bbox_to_square(c1, c2, pad_ratio=0.1): x1, y1 = c1 # left up x2, y2 = c2 # right down w, h = x2 - x1, y2 - y1 if w > h: a, x, y = w, x1, y1 - (w - h) / 2.0 else: a, x, y = h, x1 - (h - w) / 2.0, y1 # expand bbox x = int(x - a * pad_ratio / 2) y = int(y - a * pad_ratio / 2) a = int(a + a * pad_ratio) return a, x, y def _write(a, x, y, img, filename): crop = img[y:y + a, x:x + a] crop = cv2.resize(crop, (224, 224)) cv2.imwrite(filename, crop) # crop, resize and save person detection img_idx2size = {} for o in output: if int(o[-1]) == 0: # person: 0 img_idx = int(o[0]) a, x, y = _pad_bbox_to_square( as_numpy(o[1:3].int()).tolist(), as_numpy(o[3:5].int()).tolist()) img = orig_ims[img_idx] if 0 < y and y + a < img.shape[0] and 0 < x and x + a < img.shape[ 1]: if img_idx in img_idx2size.keys( ) and a < img_idx2size[img_idx]: continue save_filename = "{}/{}_cropped.png".format(args.det, img_idx) _write(a, x, y, img, save_filename) img_idx2size[img_idx] = a end = time.time() print() print("SUMMARY") print("----------------------------------------------------------") print("{:25s}: {}".format("Task", "Time Taken (in seconds)")) print() print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir)) print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch)) print("{:25s}: {:2.3f}".format( "Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop)) print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast)) print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw)) print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch) / len(imlist))) print("----------------------------------------------------------") torch.cuda.empty_cache()
def main(): confidence = 0.5 nms_thesh = 0.4 num_classes = 80 classes = load_classes('data/coco.names') print('cuda device count: ', torch.cuda.device_count()) print("Loading network.....") net = Darknet('cfg/yolov3.cfg') net.load_weights('yolov3.weights') print("Network successfully loaded") net = net.to('cuda:0') net = net.eval() print('print model') print('model: ', net) #------------------------input images------------------------------------------------ input, origin, dim = prep_image('imgs/dog.jpg', 320) print('input:', input) input = input.to('cuda:0') print(input.shape) prediction = net(input, True) print('pre shape: ', prediction.shape) print('pre : ', prediction) prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh) print('pre shape1: ', prediction.shape) print('pre1: ', prediction) scaling_factor = min(320 / dim[0], 320 / dim[1], 1) print(scaling_factor) prediction[:, [1, 3]] -= (320 - scaling_factor * dim[0]) / 2 prediction[:, [2, 4]] -= (320 - scaling_factor * dim[1]) / 2 print('pre2: ', prediction) prediction[:, 1:5] /= scaling_factor print('pre3: ', prediction) for i in range(prediction.shape[0]): prediction[i, [1, 3]] = torch.clamp(prediction[i, [1, 3]], 0.0, dim[0]) prediction[i, [2, 4]] = torch.clamp(prediction[i, [2, 4]], 0.0, dim[1]) print('pre4: ', prediction) def write(x, batches, res): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) img = res cls = int(x[-1]) label = "{0}".format(classes[cls]) cv2.rectangle(img, c1, c2, (255, 0, 0), 1) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 cv2.rectangle(img, c1, c2, (255, 0, 0), -1) cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1) return img list(map(lambda x: write(x, input, origin), prediction)) cv2.imwrite('infout.png', origin) #------------------------input ones------------------------------------------------ #print('state dict: ', net.state_dict().keys()) tmp = torch.ones(1, 3, 320, 320).to('cuda:0') print('input: ', tmp) out = net(tmp) print('output:', out) summary(net, (3, 320, 320)) #return f = open("yolov3.wts", 'w') f.write("{}\n".format(len(net.state_dict().keys()))) for k, v in net.state_dict().items(): print('key: ', k) print('value: ', v.shape) vr = v.reshape(-1).cpu().numpy() f.write("{} {}".format(k, len(vr))) for vv in vr: f.write(" ") f.write(struct.pack(">f", float(vv)).hex()) f.write("\n")
def run(self, frame, frames): if self.skip_flag == 0: inp_dim = int(self.model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 self.model.eval() img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if self.CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = self.model(Variable(img), self.CUDA) output = write_results(output, self.confidence, self.num_classes, nms=True, nms_conf=self.nms_thesh) if type(output) == int: return frame im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) # print('output: {}'.format(output.shape[0])) for i in range(output.shape[0]): data_list = np.array([[ frames, int(output[i, 1]), int(output[i, 3]), int(output[i, 2]), int(output[i, 4]), classes[int(output[i, 7])] ]]) self.data = np.vstack([self.data, data_list]) # print(self.data) # print(self.data) list( map(lambda x: write( x, orig_im, classes, colors, frames, ), output)) return orig_im
def run(self): args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) # fig = plt.figure() # ax1 = fig.add_subplot(1,1,1) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 CUDA = torch.cuda.is_available() bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model(get_test_input(inp_dim, CUDA), CUDA) model.eval() videofile = args.video cap = cv2.VideoCapture(videofile) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() first_iteration_indicator = 1 count = 0 fgbg = cv2.createBackgroundSubtractorMOG2() while cap.isOpened(): for x in range(11): cap.grab() if (first_iteration_indicator == 1): ret, frame = cap.read() first_frame = copy.deepcopy(frame) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) height, width = gray.shape[:2] accum_image = np.zeros((height, width), np.float64) first_iteration_indicator = 0 else: ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.cuda.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= ( inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= ( inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) m = list(map(lambda x: write(x, orig_im), output)) cv2.imshow("frame", orig_im) orig_im.fill(0) h = list(map(lambda x: write_heatmap(x, orig_im), output)) s = len(m) interface.self.update_people_number(self, s) if (count == 0): f = open("count.txt", "w+") f.write("%d,%d \r\n" % (count, s)) count += 1 else: f = open("count.txt", "a+") f.write("%d,%d \r\n" % (count, s)) count += 1 key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 # print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) fgmask = fgbg.apply(gray) thresh = 150 maxValue = 10 ret, th1 = cv2.threshold(fgmask, thresh, maxValue, cv2.THRESH_BINARY) cv2.imwrite('diff-th1.jpg', th1) accum_image = cv2.add(accum_image, th1, dtype=cv2.CV_64F) else: break accum_image = np.uint8(accum_image) color_image = im_color = cv2.applyColorMap(accum_image, cv2.COLORMAP_JET) # overlay the color mapped image to the first frame result_overlay = cv2.addWeighted(first_frame, 0.4, color_image, 0.4, 0) # save the final overlay image cv2.imwrite('diff-overlay.jpg', result_overlay) graph_data = open('count.txt', 'r').read() lines = graph_data.split('\n') xs = [] ys = [] for line in lines: if len(line) > 1: x, y = line.split(',') xs.append(int(x)) ys.append(int(y)) plt.plot(xs, ys) plt.savefig('test.jpg') graph_data_2 = open('TownCentre-groundtruth.txt', 'r').read() lines = graph_data_2.split('\n') xs_g = [] ys_g = [] x_check = 0 count_g = 0 for line in lines: if len(line) > 1: id, frame, cq, cq2, x_1, y_1, x_2, y_2, x_3, y_3, x_4, _y_4 = line.split( ',') if (x_check == 0): if (x_check == frame): count_g = count_g + 1 else: xs_g.append(int(count_g)) ys_g.append(int(x_check)) x_check = frame count_g = 1 else: if (x_check == frame): count_g = count_g + 1 else: xs_g.append(int(count_g)) ys_g.append(int(x_check)) x_check = frame count_g = 1 plt.plot(xs_g, ys_g) plt.savefig('ground.jpg') # cleanup1 cap.release() cv2.destroyAllWindows()
def stream_yolo_ready(): cfgfile = "cfg/yolov3.cfg" weightsfile = "yolov3.weights" num_classes = 5 confidence = 0.7 nms_thesh = 0.5 start = 0 CUDA = torch.cuda.is_available() num_classes = 5 bbox_attrs = 5 + num_classes model = Darknet(cfgfile) model.load_weights(weightsfile) model.net_info["height"] = 416 inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() videofile = 'video.avi' cap = cv2.VideoCapture(0) assert cap.isOpened(), 'Cannot capture source' cap.set(3, 416) cap.set(4, 416) global frames global picture frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) global label_list global flag if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) picture = orig_im label_list = list(map(lambda x: write(x, orig_im), output)) print("label_list : ", label_list) collision(label_list) flag = 0 #이미지 저장하는 코드 #cv2.imwrite('yolo/static/images/fire_accident.jpg',orig_im) ret2, jpeg2 = cv2.imencode('.jpg', orig_im) detect_image_byte = jpeg2.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + detect_image_byte + b'\r\n\r\n') #cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim im_dim = im_dim.repeat(output.size(0), 1) output[:, [1, 3]] *= frame.shape[1] output[:, [2, 4]] *= frame.shape[0] #전역변수 선언 picture = orig_im label_list = list(map(lambda x: write(x, orig_im), output)) print("label_list : ", label_list) collision(label_list) flag = 0 #cv2.imshow("frame", orig_im) #이미지 저장하는 코드 ret2, jpeg2 = cv2.imencode('.jpg', orig_im) detect_image_byte = jpeg2.tobytes() yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + detect_image_byte + b'\r\n\r\n') key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print("FPS of the video is {:5.2f}".format(frames / (time.time() - start))) else: break
def demo(): params = { "video": "video.avi", # Video to run detection upon "dataset": "pasacal", # Dataset on which the network has been trained "confidence": 0.5, # Object Confidence to filter predictions "nms_thresh": 0.4, # NMS Threshold "cfgfile": "cfg/yolov3.cfg", # Config file "weightsfile": "yolov3.weights", # Weightsfile "repo": 416 # Input resolution of the network. Increase to increase accuracy. Decrease to increase speed } confidence = float(params["confidence"]) nms_thesh = float(params["nms_thresh"]) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 + num_classes bboxes = [] xywh = [] print("Loading network.....") model = Darknet(params["cfgfile"]) model.load_weights(params["weightsfile"]) print("Network successfully loaded") model.net_info["height"] = params["repo"] inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() videofile = params["video"] # set 0 for debug cap = cv2.VideoCapture(0) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() print("ret: ", ret) print("frame: ", frame.shape) if ret: img, orig_im, dim = prep_image(frame, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print( "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" ) print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) print("output: ", output) print("output: ", output.shape) for i in output: x0 = i[1].int() y0 = i[2].int() x1 = i[3].int() y1 = i[4].int() bbox = (x0, y0, x1, y1) bboxes.append(bbox) print(bbox) w = x1 - x0 h = y1 - y0 xywh.append((x0, y0, w, h)) print(x0, y0, w, h) #return bboxes classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) # write bbox list(map(lambda x: write(x, orig_im, classes, colors), output)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print("FPS of the video is {:5.2f}g7".format( frames / (time.time() - start))) #return xywh else: break
tl_camera.start_video_stream(display=False) tl_camera.set_fps("low") tl_camera.set_resolution("low") tl_camera.set_bitrate(6) frames = 0 start = time.time() i = 0 # cap = cv2.VideoCapture('udp://192.168.10.1:11111') # assert cap.isOpened(), 'Cannot capture source' while (True): frame = tl_camera.read_video_frame(strategy="newest") img, orig_im, dim = prep_image(frame, inp_dim) output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1 print("FPS of the video is {:5.2f}".format(frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break
videofile = args.video print(videofile) cap = cv2.VideoCapture(videofile) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened(): ret, frame = cap.read() # frame으로 가져오는 부분 if ret: frame = laneDetection(frame) img, orig_im, dim = prep_image(frame, inp_dim) #이미지 정보에 대한것 가져오기 im_dim = torch.FloatTensor(dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): output = model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(output) == int: frames += 1