def __init__(self, id): # self.cap = cv2.VideoCapture(id) self.cap = WebcamVideoStream(src=id).start() self.cfgfile = "cfg/yolov3.cfg" # self.cfgfile = 'cfg/yolov3-tiny.cfg' self.weightsfile = "yolov3.weights" # self.weightsfile = 'yolov3-tiny.weights' self.confidence = float(0.5) self.nms_thesh = float(0.4) self.num_classes = 80 self.classes = load_classes('data/coco.names') self.colors = pkl.load(open("pallete", "rb")) self.model = Darknet(self.cfgfile) self.CUDA = torch.cuda.is_available() self.model.load_weights(self.weightsfile) self.model.net_info["height"] = 160 self.inp_dim = int(self.model.net_info["height"]) self.width = 1280 #640# self.height = 720 #360# print("Loading network.....") if self.CUDA: self.model.cuda() print("Network successfully loaded") assert self.inp_dim % 32 == 0 assert self.inp_dim > 32 self.model.eval()
def __init__(self, darknet_path=os.environ['HOME'] + '/programs/darknet/', model_name='yolov3', dataset='coco', dim=480): """ Construct the YOLOObjectDetector class. Args: darknet_path: Path to darknet model_name: Model to use dataset: Dataset for model dim: Input dimension of YOLO network """ # YOLO file paths cfg_file = '%s/cfg/%s.cfg' % (darknet_path, model_name) weights_file = '%s/weights/%s.weights' % (darknet_path, model_name) classes_file = '%s/data/%s.names' % (darknet_path, dataset) # Liad classes self.classes = load_classes(classes_file) self.np_classes = np.array(self.classes) self.colors = pickle.load( open(DIR_NAME + '/pytorch-yolo-v3/pallete', 'rb')) # Load model print('Loading ' + model_name + ' network') self.model = Darknet(cfg_file) self.model.load_weights(weights_file) print(model_name + ' network loaded successfully') # Check dimension to see if its valid self.dim = dim try: assert self.dim % 32 == 0 and self.dim > 32 except AssertionError as e: print(e) print('Invalid model dimension -- must be multiple of 32') self.dim = int(self.dim / 32) * 32 print('Moded dimension changed to {}'.format(self.dim)) # Change input size self.model.net_info['height'] = self.dim # Move to gpu if available if torch.cuda.is_available(): self.model.cuda() # Do one pass and set to evaluation mode self.model(self.get_test_input(), torch.cuda.is_available()) self.model.eval() return
def __init__(self, cfg_file: pathlib.Path, weights_file: pathlib.Path, class_names_file: pathlib.Path, resolution: int = 416, class_filters: List[str] = None) -> None: self.net: Any = None self.input_dim: int = None self.load_net(cfg_file, weights_file, resolution) self.class_names = load_classes(class_names_file) self.num_classes = len(self.class_names) self.class_filters = class_filters
def align(graph1, graph2, threshold=1.0, method=stringdistances.equal_distance): corr_list = [] # Create properties lists prop_list1 = util.load_properties(graph1) prop_list2 = util.load_properties(graph2) # Create class lists class_list1 = util.load_classes(graph1) class_list2 = util.load_classes(graph2) # Create individuals lists ind_list1 = util.load_individuals(graph1) ind_list2 = util.load_individuals(graph2) # Calc class correspondence for i, class1 in zip(xrange(len(class_list1)), class_list1): for j, class2 in zip(xrange(len(class_list2)), class_list2): value = _correspondence_measure(graph1, graph2, class1, class2, method=method) if value >= threshold: corr = Correspondence(class1, class2, '=', value) corr_list.append(corr) # Calc property correspondence for i, prop1 in zip(xrange(len(prop_list1)), prop_list1): for j, prop2 in zip(xrange(len(prop_list2)), prop_list2): value = _correspondence_measure(graph1, graph2, prop1, prop2, method=method) if value >= threshold: corr = Correspondence(prop1, prop2, '=', value) corr_list.append(corr) # Calc individual correspondence for i, ind1 in zip(xrange(len(ind_list1)), ind_list1): for j, ind2 in zip(xrange(len(ind_list2)), ind_list2): value = _correspondence_measure(graph1, graph2, ind1, ind2, method=method) if value >= threshold: corr = Correspondence(ind1, ind2, '=', value) corr_list.append(corr) return corr_list
def __init__(self, model_def_file='', weights_file=''): self.model_def_file = model_def_file self.weights_file = weights_file self.model = Darknet(self.model_def_file) self.model.load_weights(self.weights_file) self.CUDA = True if self.CUDA: self.model.cuda() print('load network finish') self.confidence = 0.5 self.nms_thresh = 0.4 self.num_classes = 80 self.yolo_dir = '/home/yfji/SourceCode/pytorch-yolo-v3' self.classes = util.load_classes(op.join('data/coco.names')) self.colors = pickle.load(open(op.join(self.yolo_dir, 'pallete', 'rb')))
def __init__(self, id_num, cfg_file,wt_file,class_file,pallete_file, nms_threshold = .3 , conf = 0.7, resolution=1024, num_classes=80, nms_classwise= True): #Set up the neural network print("Loading network.....") self.model = Darknet(cfg_file) self.model.load_weights(wt_file) print("Network successfully loaded") self.nms = nms_threshold self.conf = conf self.nms_classwise = nms_classwise self.resolution = resolution # sets size of max dimension if id_num == 0: self.CUDA = True torch.cuda.set_device(0) torch.cuda.empty_cache() elif id_num == 1: self.CUDA = True torch.cuda.set_device(1) torch.cuda.empty_cache() else: self.CUDA = False self.colors = pkl.load(open(pallete_file, "rb")) self.num_classes = num_classes self.classes = load_classes(class_file) self.model.net_info["height"] = self.resolution inp_dim = int(self.model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 #If there's a GPU availible, put the model on GPU if self.CUDA: self.model.cuda() #Set the model in evaluation mode self.model.eval()
def __init__(self, cfg_file, wt_file, class_file, pallete_file, nms_threshold=.3, conf=0.7, resolution=1024, num_classes=80, nms_classwise=True): #Set up the neural network print("Loading network.....") self.model = Darknet(cfg_file) self.model.load_weights(wt_file) print("Network successfully loaded") self.nms = nms_threshold self.conf = conf self.nms_classwise = nms_classwise self.resolution = resolution # sets size of max dimension self.CUDA = torch.cuda.is_available() with open(pallete_file, 'rb') as f: self.colors = pkl.load(f) self.num_classes = num_classes self.classes = load_classes(class_file) self.model.net_info["height"] = self.resolution inp_dim = int(self.model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 #If there's a GPU availible, put the model on GPU if self.CUDA: self.model.cuda() #Set the model in evaluation mode self.model.eval()
def load_trained(): util.load_classes() model = load_model(nb_class=len(config.classes)) model.load_weights(config.get_fine_tuned_weights_path()) return model
import time import torch import cv2 from torch.autograd import Variable from net import Net from util import load_classes, prep_image, write_results cfgfile = "cfg/yolov3.cfg" weightsfile = "weights/yolov3.weights" classes = load_classes("classes/coco.names") confidence = 0.5 num_classes = 80 nms_thesh = 0.4 print("Loading network.....") model = Net(cfgfile) model.load_weights(weightsfile) print("Network successfully loaded") model.eval() # Capture image cap = cv2.VideoCapture(0) inp_dim = int(model.net_info["height"]) def write(x, results): c1 = tuple(x[1:3].int())
model = Darknet(args.cfgfile) model.load_weights("yolov3.weights") print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) model.eval() img = cv2.imread(args.image) img, orig_im, dim = prep_image(args.image, inp_dim) im_dim = torch.FloatTensor(dim).repeat(1, 2) with torch.no_grad(): output = model(torch.autograd.Variable(img), False) classes = load_classes(args.classes) output = write_results(output, confidence=0.5, num_classes=len(classes), nms=True, nms_conf=0.4) class_counter = Counter([classes[int(obj[-1])] for obj in output]) print("Class counts: " + str(class_counter)) tot_objects = output.size(0) tot_objs_str = f"Total objects detected: {tot_objects}" print(tot_objs_str) colors = pkl.load(open("pallete", "rb"))
num_boxes = sum([3 * (x**2) for x in num_boxes]) for scale in scales: li = list(range((scale - 1) * num_boxes // 3, scale * num_boxes // 3)) scales_indices.extend(li) images = args.images batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 classes = util.load_classes('data/coco.names') #Set up the neural network print("Loading network.....") model = Darknet(args.cfgfile) util.download('https://pjreddie.com/media/files/yolov3.weights ', args.weightsfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32
temp_threshold = 96 # Threshold of CPU temperature to enter time.sleep t0 = time.time() ############################################################ ##### FOR YOLO ############################################# ############################################################ confidence = float(config['yolo']['confidence']) #float(args.confidence) nms_thesh = 0.4 #float(args.nms_thresh) CUDA = torch.cuda.is_available() num_classes = 80 # bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(cwd + "cfg/yolov3.cfg") model.load_weights(cwd + "yolov3.weights") classes = load_classes(cwd + 'data/coco.names') print("Network successfully loaded") model.net_info["height"] = config['yolo']['height'] inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() ############################################################ ############################################################ ############################################################ # HOG face detector hog = dlib.get_frontal_face_detector()
'Increase to increase accuracy. Decrease to increase speed'), default='416', type=str) return parser.parse_args() args = arg_parse() images = args.images batch_size = int(args.bs) confidence = float(args.confidence) nms_thresh = float(args.nms_thresh) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 # Num classes in COCO classes = load_classes('data/coco.names') # Set up the neural network print('Loading network...') model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print('Network successfully loaded') model.net_info['height'] = args.reso inp_dim = int(model.net_info['height']) assert inp_dim % 32 == 0 assert inp_dim > 32 # if there's a GPU available, put the model on it. if CUDA: model.cuda()
if __name__ == '__main__': import torch from util import load_classes,imwrite,readcfg,prep_image import cv2 import time import os args = arg_parse() confidence = args.confidence nms_thresh = args.nms_thresh weightsfile = args.weightsfile[0] dv = args.dv class_names = load_classes('data/voc.names') class_num = len(class_names) CUDA = torch.cuda.is_available() cfg = readcfg('cfg/yolond') inp_dim = int(cfg['inp_size']) side = int(cfg['side']) num = int(cfg['num']) print('Loading network') model = load_model(args.model,weightsfile,1,CUDA) print('network successfully loaded') if CUDA: model.to(gpudevice) videofile = args.videofile if videofile is None:
string2 = string2.lower() dist_subs = stringdistances.substring_distance(string1, string2) synsets = wn.synsets(string1, wn.NOUN) if len(synsets) == 0: tokens = wordpunct_tokenize(string1) for token in tokens: synsets = wn.synsets(string1, wn.NOUN) if len(synsets) > 0: break if len(synsets) > 0: for synset in synsets: for lemma in synset.lemmas(): dist = stringdistances.substring_distance(lemma.name(), string2) if (dist < dist_subs): dist_subs = dist return dist_subs graph1 = util.graph_from_uri('http://purl.org/dc/elements/1.1/') graph2 = util.graph_from_uri('http://purl.org/dc/terms/') print 'graph sizes:', len(graph1), len(graph2) print 'num classes:', len(util.load_classes(graph1)), len(util.load_classes(graph2)) corr_list = alignment.align(graph1, graph2, threshold=0.9, method=jwnl_basic_synonym_distance) print 'num correspondences:', len(corr_list) for corr in corr_list: print corr.entity1, corr.relation, corr.entity2, corr.measure
# -*- coding: utf-8 -*- """ Created on Thu Jun 6 11:26:49 2019 @author: hasee """ import yolov3 import util import torch import cv2 num_classes=1 confidence=0.3 nms_theshold=0.4 classes=util.load_classes('data/animeface.names') model=yolov3.yolov3_darknet('cfg/animeface.cfg') #model.load_weights('weights/yolov3.weights') model.load_state_dict(torch.load('model_state_dict.pt')) img=cv2.imread('samples/test2.png') net_h,net_w=int(model.net_info['height']),int(model.net_info['width']) new_img,img_tensor=util.resize_img(img,net_h,net_w) _,prediction=model(img_tensor,torch.cuda.is_available()) prediction=util.write_results(prediction,confidence,num_classes) write_img=util.writebox(img,model,prediction,classes) cv2.imwrite('test.png',write_img)
import numpy as np import os from util import write_results, load_classes ### Model initialization CUDA = False model = Darknet('yolov3.cfg') if CUDA: model = model.cuda() model.load_weights('yolov3.weights') ### End Model initialization # These 2 lines are just for us to test our model frames_path = '../Istanbul_traffic_annotated/Istanbul_traffic_annotated/images' classes = load_classes('coco.names') # Predict Function def predict(frame_img): result = [] h, w, _ = frame_img.shape # Making it square by padding frame_img = cv2.copyMakeBorder(frame_img, 0, max(0, w - h), 0, max(0, h - w), cv2.BORDER_CONSTANT, value=0)
if type(output) == int: frames += 1 #print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim)) / inp_dim output[:, [1, 3]] *= frame.shape[1] output[:, [2, 4]] *= frame.shape[0] classes = load_classes('data/coco.names') print('129', classes) colors = pkl.load(open("pallete", "rb")) #list(map(lambda x: write(x, orig_im), output)) list(map(lambda x: write(x, orig_im), output)) cv2.imshow("frame", orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 #print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) else: break
def main(): args = arg_parse() confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) start = 0 #print("loc: ", loc) CUDA = torch.cuda.is_available() num_classes = 80 CUDA = torch.cuda.is_available() bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() status = False option = args.option #read video based on option if option == "webcam": # if loc == "front": cap_front = cv2.VideoCapture(0) #else: cap_back = cv2.VideoCapture(1) elif option == "video": videofile1 = args.file1 videofile2 = args.file2 cap_front = cv2.VideoCapture(videofile1) cap_back = cv2.VideoCapture(videofile2) else: imagefile1 = args.file1 imagefile2 = args.file2 cap_front = cv2.VideoCapture(imagefile1) cap_back = cv2.VideoCapture(imagefile2) status = True assert cap_back.isOpened(), 'Cannot capture source' assert cap_front.isOpened(), 'Cannot capture source' max_val_f = 0 max_val_b = 0 tmp = 0 classes = load_classes('data/coco.names') colors = pkl.load(open("pallete", "rb")) while cap_back.isOpened() or cap_front.isOpened(): print("-----------------------------------") start = time.time() #read video ret_front, frame_front = cap_front.read() ret_back, frame_back = cap_back.read() if ret_front and ret_back: #preprocessing image img_f, orig_im_f, dim_f = prep_image(frame_front, inp_dim) img_b, orig_im_b, dim_b = prep_image(frame_back, inp_dim) im_dim_f = torch.FloatTensor(dim_f).repeat(1, 2) im_dim_b = torch.FloatTensor(dim_b).repeat(1, 2) if CUDA: im_dim_f = im_dim_f.cuda() img_f = img_f.cuda() im_dim_b = im_dim_b.cuda() img_b = img_b.cuda() with torch.no_grad(): output_f = model(Variable(img_f), CUDA) output_b = model(Variable(img_b), CUDA) output_f = write_results(output_f, confidence, num_classes, nms=True, nms_conf=nms_thesh) output_b = write_results(output_b, confidence, num_classes, nms=True, nms_conf=nms_thesh) im_dim_f = im_dim_f.repeat(output_f.size(0), 1) scaling_factor_f = torch.min(inp_dim / im_dim_f, 1)[0].view(-1, 1) im_dim_b = im_dim_b.repeat(output_b.size(0), 1) scaling_factor_b = torch.min(inp_dim / im_dim_b, 1)[0].view(-1, 1) #front output_f[:, [1, 3]] -= ( inp_dim - scaling_factor_f * im_dim_f[:, 0].view(-1, 1)) / 2 output_f[:, [2, 4]] -= ( inp_dim - scaling_factor_f * im_dim_f[:, 1].view(-1, 1)) / 2 output_f[:, 1:5] /= scaling_factor_f for i in range(output_f.shape[0]): output_f[i, [1, 3]] = torch.clamp(output_f[i, [1, 3]], 0.0, im_dim_f[i, 0]) output_f[i, [2, 4]] = torch.clamp(output_f[i, [2, 4]], 0.0, im_dim_f[i, 1]) #back output_b[:, [1, 3]] -= ( inp_dim - scaling_factor_b * im_dim_b[:, 0].view(-1, 1)) / 2 output_b[:, [2, 4]] -= ( inp_dim - scaling_factor_b * im_dim_b[:, 1].view(-1, 1)) / 2 output_b[:, 1:5] /= scaling_factor_b for i in range(output_b.shape[0]): output_b[i, [1, 3]] = torch.clamp(output_b[i, [1, 3]], 0.0, im_dim_b[i, 0]) output_b[i, [2, 4]] = torch.clamp(output_b[i, [2, 4]], 0.0, im_dim_b[i, 1]) #result cnt_f = list( map(lambda x: write(x, orig_im_f, classes, colors)[1], output_f)).count("person") cnt_b = list( map(lambda x: write(x, orig_im_b, classes, colors)[1], output_b)).count("person") if max_val_f < cnt_f: max_val_f = cnt_f if max_val_b < cnt_b: max_val_b = cnt_b print("front person : " + str(cnt_f)) print("back person : " + str(cnt_b)) print("max_val_f : " + str(max_val_f)) print("max_val_b : " + str(max_val_b)) #devide case case_f = check_person(max_val_f, "front") case_b = check_person(max_val_b, "back") after_img_f = represent_case(orig_im_f, case_f) after_img_b = represent_case(orig_im_b, case_b) #visualization f_h, f_w, f_d = after_img_f.shape b_h, b_w, b_d = after_img_b.shape h = max(f_h, b_h) after_img = np.zeros((h, f_w + b_w, f_d), np.uint8) after_img[0:f_h, 0:f_w] = after_img_f[:, :] after_img[0:b_h, f_w:f_w + b_w] = after_img_b[:, :] cv2.imshow("frame", after_img) if status: cv2.waitKey(-1) cv2.imwrite('output/frame%04d.jpg' % (tmp), after_img) tmp += 1 key = cv2.waitKey(1) if key & 0xFF == ord('q'): break print("\ndetecting time : " + str(time.time() - start)) if case_f == "red" and case_b == "green": print("Go back!") else: break
def load_trained(): model = load_model(config.get_model_path()) util.load_classes() return model
img, orig_im, orig_im_dim = prep_image(frame, inp_dim) orig_im_dim = torch.FloatTensor(orig_im_dim).to(device) img = img.to(device) with torch.no_grad(): output = model(img) # output = center_to_corner(output) # output = output.unsqueeze(0).view(-1, bbox_attrs) # output = np.asarray(output.squeeze(0)) output = write_results(output, confidence, num_classes, nms=True, nms_conf=nms_thesh) # Classes for labels and colors for bbox classes = load_classes(args.datafile) colors = pkl.load(open("pallete", "rb")) if output.shape[0] > 0: # Get x1y1x2y2 # Remember original image is square (or should be) output[:, 1] *= np.asarray(orig_im_dim[0])/inp_dim output[:, 2] *= np.asarray(orig_im_dim[1])/inp_dim output[:, 3] *= np.asarray(orig_im_dim[0])/inp_dim output[:, 4] *= np.asarray(orig_im_dim[1])/inp_dim for i in range(output.shape[0]): score = output[i, 6] if score >= confidence: img = np.asarray(img) # print(output[i, :])
break continue im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1) output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2 output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2 output[:,1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,0]) output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim[i,1]) classes = load_classes('data/obj.names') colors = pkl.load(open("pallete", "rb")) list(map(lambda x: write(x, orig_im), output)) cv2.imshow("frame", orig_im) cv2.imwrite('detection.png', orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) else: break
def main(args, model): images = args.images batch_size = int(args.bs) confidence = float(args.confidence) nms_thesh = float(args.nms_thresh) CUDA = torch.cuda.is_available() num_classes = 80 classes = load_classes('data/coco.names') model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 # If there's a GPU availible, put the model on GPU if CUDA: model.cuda() # Set the model in evaluation mode model.eval() read_dir = time.time() # Detection phase try: imlist = [ osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] == '.jpeg' or os.path.splitext(img)[1] == '.jpg' ] except NotADirectoryError: imlist = [osp.join(osp.realpath('.'), images)] except FileNotFoundError: print("No file or directory with the name {}".format(images)) exit() if not os.path.exists(args.det): os.makedirs(args.det) load_batch = time.time() batches = [prep_image(img, inp_dim) for img in imlist] im_batches = [x[0] for x in batches] # each shape (1, 3, H, W) resized H, W orig_ims = [x[1] for x in batches] # each shape (1, 3, H0, W0) not resized im_dim_list = torch.FloatTensor([x[2] for x in batches ]).repeat(1, 2) # (nr_img, 4) if CUDA: im_dim_list = im_dim_list.cuda() if batch_size != 1: leftover = 1 if len(im_dim_list) % batch_size else 0 num_batches = len(imlist) // batch_size + leftover im_batches = [ torch.cat( (im_batches[i * batch_size:min((i + 1) * batch_size, len(im_batches))])) for i in range(num_batches) ] i = 0 write = False start_det_loop = time.time() for batch in im_batches: # load the image if CUDA: batch = batch.cuda() with torch.no_grad(): prediction = model(batch, CUDA) prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(prediction) == int: i += 1 continue prediction[:, 0] += i * batch_size if not write: output = prediction write = 1 else: output = torch.cat((output, prediction)) i += 1 if CUDA: torch.cuda.synchronize() try: output except NameError: print("No detections were made") exit() im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long()) scaling_factor = torch.min(inp_dim / im_dim_list, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i, 1]) output_recast = time.time() class_load = time.time() draw = time.time() def _pad_bbox_to_square(c1, c2, pad_ratio=0.1): x1, y1 = c1 # left up x2, y2 = c2 # right down w, h = x2 - x1, y2 - y1 if w > h: a, x, y = w, x1, y1 - (w - h) / 2.0 else: a, x, y = h, x1 - (h - w) / 2.0, y1 # expand bbox x = int(x - a * pad_ratio / 2) y = int(y - a * pad_ratio / 2) a = int(a + a * pad_ratio) return a, x, y def _write(a, x, y, img, filename): crop = img[y:y + a, x:x + a] crop = cv2.resize(crop, (224, 224)) cv2.imwrite(filename, crop) # crop, resize and save person detection img_idx2size = {} for o in output: if int(o[-1]) == 0: # person: 0 img_idx = int(o[0]) a, x, y = _pad_bbox_to_square( as_numpy(o[1:3].int()).tolist(), as_numpy(o[3:5].int()).tolist()) img = orig_ims[img_idx] if 0 < y and y + a < img.shape[0] and 0 < x and x + a < img.shape[ 1]: if img_idx in img_idx2size.keys( ) and a < img_idx2size[img_idx]: continue save_filename = "{}/{}_cropped.png".format(args.det, img_idx) _write(a, x, y, img, save_filename) img_idx2size[img_idx] = a end = time.time() print() print("SUMMARY") print("----------------------------------------------------------") print("{:25s}: {}".format("Task", "Time Taken (in seconds)")) print() print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir)) print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch)) print("{:25s}: {:2.3f}".format( "Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop)) print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast)) print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw)) print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch) / len(imlist))) print("----------------------------------------------------------") torch.cuda.empty_cache()
im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= ( inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= ( inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) classes = load_classes('data_output/data/obj.names') print(classes, "classes") colors = pkl.load(open("pallete", "rb")) list(map(lambda x: write(x, orig_im), output)) cv2.imshow("frame", orig_im) cv2.imwrite('detection.png', orig_im) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print("FPS of the video is {:5.2f}".format( frames / (time.time() - start))) else:
def gen_dataset(N, data_path, compute_embed=True): data_path = os.path.abspath(data_path) raw_img_path = os.path.join(data_path, 'imgs') temp_path = os.path.join(data_path, 'temp_lmdb') util.create_dir(data_path) util.create_dir(raw_img_path) util.create_dir(temp_path) # Data from the entire raw dataset print('Loading data...') lmdb_data = util.load_lmdb() recipe2img_id = util.map_recipe_id_to_img_id(lmdb_data) recipe_classes = util.load_classes() # Data to go into our custom set print('Sampling recipe ids...') recipe_ids = util.sample_ids(lmdb_data, N) recipe_ids_decode = [recipe_id.decode('utf-8') for recipe_id in recipe_ids] util.repickle(recipe_ids_decode, os.path.join(data_path, 'temp_keys.pkl')) # Fill in everything except embeddings print('Filling in everything except embeddings...') dataset = {} classes = {} # Maps raw class to the smallest number needed error_recipe_ids = [] for i, recipe_id in enumerate(recipe_ids): if i % 100 == 0: print('Filling in entry %d/%d...' % (i, N)) sample = {} sample['recipe_id'] = recipe_id.decode('utf-8') sample['img_id'] = recipe2img_id[recipe_id][-1] sample['class_raw'] = recipe_classes[recipe_id.decode('utf-8')] if sample['class_raw'] not in classes: classes[sample['class_raw']] = len(classes) sample['class'] = classes[sample['class_raw']] try: img = util.resize_crop_img(util.get_img_path(sample['img_id'], util.RAW_IMG_PATH)) save_img(sample['img_id'], img, raw_img_path) sample['img_pre'] = preprocess_img(img) dataset[recipe_id.decode('utf-8')] = sample except FileNotFoundError: print('Could not find image with id %s. Skipping' % sample['img_id']) error_recipe_ids.append(sample['recipe_id']) # Hacky business to compute embeddings if not compute_embed: print('Skipping embeddings...') else: print('Computing embeddings...') lmdb_slice = util.slice_lmdb(recipe_ids, lmdb_data) util.save_lmdb_data(lmdb_slice, temp_path) subprocess.run(['python', util.GEN_EMBEDDINGS_ROOT, '--model_path=%s' % util.MODEL_PATH, '--data_path=%s' % data_path, '--path_results=%s' % data_path]) embeddings = util.unpickle2(os.path.join(data_path, 'rec_embeds.pkl')) embedding_ids = util.unpickle2(os.path.join(data_path, 'rec_ids.pkl')) for i, embedding_id in enumerate(embedding_ids): if embedding_id in dataset: dataset[embedding_id]['recipe_emb'] = embeddings[i] print('Saving dataset...') util.repickle({'data': dataset, 'class_mapping': classes}, os.path.join(data_path, 'data.pkl')) print('...done!')