def load_yolov2(image,output_file,dataset='coco',threshold=0.5,nms_thresh=0.4): batch_size = 1 confidence = threshold start = 0 global num_classes imlist = image output_file_names = [output_file] CUDA = torch.cuda.is_available() if dataset == "pascal": inp_dim = 416 num_classes = 20 classes = load_classes('data/voc.names') weightsfile = 'yolov2-voc.weights' cfgfile = "cfg/yolo-voc.cfg" elif dataset == "coco": inp_dim = 544 num_classes = 80 classes = load_classes('data/coco.names') weightsfile = 'yolov2.weights' cfgfile = "cfg/yolo.cfg" else: print("Invalid dataset") exit() stride = 32 #Set up the neural network print("Loading network.....") model = Darknet(cfgfile) model.load_weights(weightsfile) print("Network successfully loaded") #If there's a GPU availible, put the model on GPU if CUDA: model.cuda() model(get_test_input(inp_dim, CUDA)) #Set the model in evaluation mode model.eval() read_dir = time.time() #Detection phase load_batch = time.time() batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))])) im_batches = [x[0] for x in batches] orig_ims = [x[1] for x in batches] im_dim_list = [x[2] for x in batches] im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2) if CUDA: im_dim_list = im_dim_list.cuda() leftover = 0 if (len(im_dim_list) % batch_size): leftover = 1 if batch_size != 1: num_batches = len(imlist) // batch_size + leftover im_batches = [torch.cat((im_batches[i*batch_size : min((i + 1)*batch_size, len(im_batches))])) for i in range(num_batches)] i = 0 output = torch.FloatTensor(1, 8) write = False # model(get_test_input(inp_dim, CUDA)) start_det_loop = time.time() for batch in im_batches: #load the image start = time.time() if CUDA: batch = batch.cuda() prediction = model(Variable(batch, volatile = True)) prediction = prediction.data #Apply offsets to the result predictions #Tranform the predictions as described in the YOLO paper #flatten the prediction vector # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes) # Put every proposed box as a row. #get the boxes with object confidence > threshold #Convert the cordinates to absolute coordinates prediction = predict_transform(prediction, inp_dim, stride, model.anchors, num_classes, confidence, CUDA) if type(prediction) == int: i += 1 continue #perform NMS on these boxes, and save the results #I could have done NMS and saving seperately to have a better abstraction #But both these operations require looping, hence #clubbing these ops in one loop instead of two. #loops are slower than vectorised operations. prediction = write_results(prediction, num_classes, nms = True, nms_conf = nms_thresh) end = time.time() # print(end - start) prediction[:,0] += i*batch_size if not write: output = prediction write = 1 else: output = torch.cat((output,prediction)) for image in imlist[i*batch_size: min((i + 1)*batch_size, len(imlist))]: im_id = imlist.index(image) objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id] print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size)) print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs))) print("----------------------------------------------------------") i += 1 if CUDA: torch.cuda.synchronize() output_recast = time.time() output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim)) im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())/inp_dim output[:,1:5] *= im_dim_list class_load = time.time() colors = pkl.load(open("pallete", "rb")) draw = time.time() def write(x, batches, results): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) img = results[int(x[0])] cls = int(x[-1]) label = "{0}".format(classes[cls]) color = random.choice(colors) cv2.rectangle(img, c1, c2,color, 1) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 cv2.rectangle(img, c1, c2,color, -1) cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1); return img list(map(lambda x: write(x, im_batches, orig_ims), output)) #det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1])) det_names = output_file_names list(map(cv2.imwrite, det_names, orig_ims)) end = time.time() print() print("SUMMARY") print("----------------------------------------------------------") print("{:25s}: {}".format("Task", "Time Taken (in seconds)")) print() print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir)) print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch)) print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop)) print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast)) print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw)) print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist))) print("----------------------------------------------------------") torch.cuda.empty_cache()
# Get the dataloader for test data test_loader = torch.utils.data.DataLoader(dataset.listDataset( testlist, shape=(test_width, test_height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ]), train=False), batch_size=1, shuffle=False, **kwargs) # Pass the model to GPU if use_cuda: model = model.cuda( ) # model = torch.nn.DataParallel(model, device_ids=[0]).cuda() # Multiple GPU parallelism # Get the optimizer params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if key.find('.bn') >= 0 or key.find('.bias') >= 0: params += [{'params': [value], 'weight_decay': 0.0}] else: params += [{'params': [value], 'weight_decay': decay * batch_size}] optimizer = optim.SGD(model.parameters(), lr=learning_rate / batch_size, momentum=momentum, dampening=0, weight_decay=decay * batch_size)
def valid(datacfg, cfgfile, weightfile, outfile): options = read_data_cfg(datacfg) valid_images = options['valid'] name_list = options['names'] prefix = 'results' names = load_class_names(name_list) with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] m = Darknet(cfgfile) m.print_network() m.load_weights(weightfile) m.cuda() m.eval() valid_dataset = dataset.listDataset(valid_images, shape=(m.width, m.height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 2 assert (valid_batchsize > 1) kwargs = {'num_workers': 4, 'pin_memory': True} valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) fps = [0] * m.num_classes if not os.path.exists('results'): os.mkdir('results') for i in range(m.num_classes): buf = '%s/%s%s.txt' % (prefix, outfile, names[i]) fps[i] = open(buf, 'w') lineId = -1 conf_thresh = 0.005 nms_thresh = 0.45 if m.net_name() == 'region': # region_layer shape = (0, 0) else: shape = (m.width, m.height) for _, (data, target, org_w, org_h) in enumerate(valid_loader): data = data.cuda() output = m(data) batch_boxes = get_all_boxes(output, shape, conf_thresh, m.num_classes, only_objectness=0, validation=True) for i in range(len(batch_boxes)): lineId += 1 fileId = os.path.basename(valid_files[lineId]).split('.')[0] #width, height = get_image_size(valid_files[lineId]) width, height = float(org_w[i]), float(org_h[i]) print(valid_files[lineId]) boxes = batch_boxes[i] correct_yolo_boxes(boxes, width, height, m.width, m.height) boxes = nms(boxes, nms_thresh) for box in boxes: x1 = (box[0] - box[2] / 2.0) * width y1 = (box[1] - box[3] / 2.0) * height x2 = (box[0] + box[2] / 2.0) * width y2 = (box[1] + box[3] / 2.0) * height det_conf = box[4] for j in range((len(box) - 5) // 2): cls_conf = box[5 + 2 * j] cls_id = box[6 + 2 * j] prob = det_conf * cls_conf fps[cls_id].write('%s %f %f %f %f %f\n' % (fileId, prob, x1, y1, x2, y2)) for i in range(m.num_classes): fps[i].close()
def valid(datacfg, cfgfile, weightfile, outfile): def truths_length(truths): for i in range(50): if truths[i][1] == 0: return i # Parse configuration files options = read_data_cfg(datacfg) valid_images = options['valid'] meshname = options['mesh'] backupdir = options['backup'] name = options['name'] if not os.path.exists(backupdir): makedirs(backupdir) # Parameters prefix = 'results' seed = int(time.time()) gpus = '0' # Specify which gpus to use test_width = 544 test_height = 544 torch.manual_seed(seed) use_cuda = True if use_cuda: os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) save = False testtime = True use_cuda = True num_classes = 1 testing_samples = 0.0 eps = 1e-5 notpredicted = 0 conf_thresh = 0.1 nms_thresh = 0.4 match_thresh = 0.5 if save: makedirs(backupdir + '/test') makedirs(backupdir + '/test/gt') makedirs(backupdir + '/test/pr') # To save testing_error_trans = 0.0 testing_error_angle = 0.0 testing_error_pixel = 0.0 errs_2d = [] errs_3d = [] errs_trans = [] errs_angle = [] errs_corner2D = [] preds_trans = [] preds_rot = [] preds_corners2D = [] gts_trans = [] gts_rot = [] gts_corners2D = [] edges_corners = [[0, 1], [0, 2], [0, 4], [1, 3], [1, 5], [2, 3], [2, 6], [3, 7], [4, 5], [4, 6], [5, 7], [6, 7]] # Read object model information, get 3D bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) # diam = calc_pts_diameter(np.array(mesh.vertices)) diam = float(options['diam']) # Read intrinsic camera parameters internal_calibration = get_camera_intrinsic() # Get validation file names with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(cfgfile) model.print_network() model.load_weights(weightfile) model.cuda() model.eval() # Get the parser for the test dataset valid_dataset = dataset.listDataset(valid_images, shape=(test_width, test_height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 1 # Specify the number of workers for multiple processing, get the dataloader for the test dataset kwargs = {'num_workers': 4, 'pin_memory': True} test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) logging(" Testing {}...".format(name)) logging(" Number of test samples: %d" % len(test_loader.dataset)) # Iterate through test batches (Batch size for test data is 1) count = 0 z = np.zeros((3, 1)) for batch_idx, (data, target) in enumerate(test_loader): # Images img = data[0, :, :, :] img = img.numpy().squeeze() img = np.transpose(img, (1, 2, 0)) t1 = time.time() # Pass data to GPU if use_cuda: data = data.cuda() target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions all_boxes = get_region_boxes(output, conf_thresh, num_classes) t4 = time.time() # Iterate through all images in the batch for i in range(output.size(0)): # For each image, get all the predictions boxes = all_boxes[i] # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target[i].view(-1, 21) # Get how many object are present in the scene num_gts = truths_length(truths) # Iterate through each ground-truth object for k in range(num_gts): box_gt = [ truths[k][1], truths[k][2], truths[k][3], truths[k][4], truths[k][5], truths[k][6], truths[k][7], truths[k][8], truths[k][9], truths[k][10], truths[k][11], truths[k][12], truths[k][13], truths[k][14], truths[k][15], truths[k][16], truths[k][17], truths[k][18], 1.0, 1.0, truths[k][0] ] best_conf_est = -1 # If the prediction has the highest confidence, choose it as our prediction for single object pose estimation for j in range(len(boxes)): if (boxes[j][18] > best_conf_est): match = corner_confidence9( box_gt[:18], torch.FloatTensor(boxes[j][:18])) box_pr = boxes[j] best_conf_est = boxes[j][18] # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:18], [9, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:18], [9, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * 640 corners2D_gt[:, 1] = corners2D_gt[:, 1] * 480 corners2D_pr[:, 0] = corners2D_pr[:, 0] * 640 corners2D_pr[:, 1] = corners2D_pr[:, 1] * 480 preds_corners2D.append(corners2D_pr) gts_corners2D.append(corners2D_gt) # Compute corner prediction error corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr, axis=1) corner_dist = np.mean(corner_norm) errs_corner2D.append(corner_dist) # Compute [R|t] by pnp R_gt, t_gt = pnp( np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_gt, np.array(internal_calibration, dtype='float32')) R_pr, t_pr = pnp( np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_pr, np.array(internal_calibration, dtype='float32')) if save: preds_trans.append(t_pr) gts_trans.append(t_gt) preds_rot.append(R_pr) gts_rot.append(R_gt) np.savetxt( backupdir + '/test/gt/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_gt, dtype='float32')) np.savetxt( backupdir + '/test/gt/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_gt, dtype='float32')) np.savetxt( backupdir + '/test/pr/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_pr, dtype='float32')) np.savetxt( backupdir + '/test/pr/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_pr, dtype='float32')) np.savetxt( backupdir + '/test/gt/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_gt, dtype='float32')) np.savetxt( backupdir + '/test/pr/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_pr, dtype='float32')) # Compute translation error trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr))) errs_trans.append(trans_dist) # Compute angle error angle_dist = calcAngularDistance(R_gt, R_pr) errs_angle.append(angle_dist) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, internal_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, internal_calibration) proj_corners_gt = np.transpose( compute_projection(corners3D, Rt_gt, internal_calibration)) proj_corners_pr = np.transpose( compute_projection(corners3D, Rt_pr, internal_calibration)) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) # Visualize fig = plt.figure() plt.xlim((0, 640)) plt.ylim((0, 480)) plt.imshow(scipy.misc.imresize(img, (480, 640))) # Projections for edge in edges_corners: plt.plot(proj_corners_gt[edge, 0], proj_corners_gt[edge, 1], color='g', linewidth=3.0) plt.plot(proj_corners_pr[edge, 0], proj_corners_pr[edge, 1], color='b', linewidth=3.0) plt.gca().invert_yaxis() # plt.show() plt.savefig(outfile + '/output_' + str(count) + '_.png', bbox_inches='tight') fig.canvas.draw() count = count + 1 # Compute 3D distances transform_3d_gt = compute_transformation(vertices, Rt_gt) transform_3d_pred = compute_transformation(vertices, Rt_pr) norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred, axis=0) vertex_dist = np.mean(norm3d) errs_3d.append(vertex_dist) # Sum errors testing_error_trans += trans_dist testing_error_angle += angle_dist testing_error_pixel += pixel_dist testing_samples += 1 count = count + 1 t5 = time.time() # Compute 2D projection error, 6D pose error, 5cm5degree error px_threshold = 5 acc = len(np.where( np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps) acc5cm5deg = len( np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) + eps) acc3d10 = len(np.where( np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d) + eps) acc5cm5deg = len( np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) + eps) corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold) [0]) * 100. / (len(errs_corner2D) + eps) mean_err_2d = np.mean(errs_2d) mean_corner_err_2d = np.mean(errs_corner2D) nts = float(testing_samples) if testtime: print('-----------------------------------') print(' tensor to cuda : %f' % (t2 - t1)) print(' predict : %f' % (t3 - t2)) print('get_region_boxes : %f' % (t4 - t3)) print(' eval : %f' % (t5 - t4)) print(' total : %f' % (t5 - t1)) print('-----------------------------------') # Print test statistics logging('Results of {}'.format(name)) logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc)) logging(' Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'. format(diam * 0.1, acc3d10)) logging(' Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg)) logging( " Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f" % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d)) logging( ' Translation error: %f m, angle error: %f degree, pixel error: % f pix' % (testing_error_trans / nts, testing_error_angle / nts, testing_error_pixel / nts)) if save: predfile = backupdir + '/predictions_linemod_' + name + '.mat' scipy.io.savemat( predfile, { 'R_gts': gts_rot, 't_gts': gts_trans, 'corner_gts': gts_corners2D, 'R_prs': preds_rot, 't_prs': preds_trans, 'corner_prs': preds_corners2D })
def main(camera_id): ip = str('192.168.0.2') name = str('admin') pw = str('a1234567') camera = HKCamera(ip, name, pw) threadPubMsg_shelfID_1 = pubmsg.MsgPublishClass(cameraID=camera_id, shelfID=1) threadPubMsg_shelfID_1.setDaemon(True) threadPubMsg_shelfID_1.start() threadPubMsg_shelfID_2 = pubmsg.MsgPublishClass(cameraID=camera_id, shelfID=2) threadPubMsg_shelfID_2.setDaemon(True) threadPubMsg_shelfID_2.start() threadPubMsg_dict = { 'shelfID_1': threadPubMsg_shelfID_1, 'shelfID_2': threadPubMsg_shelfID_2 } model = loadDataset() cfg = Darknet('cfg/yolov3.cfg') cfg.load_weights('yolov3.weights') cfg.cuda() # global frame_number frame_number2 = [0] flag = [0] bridge = CvBridge() dic_change = {} dic_change_huojia2 = {} huojia1_id = 1 huojia2_id = 2 while not rospy.is_shutdown(): frame_origin = camera.getFrame() frame_origin = np.array(frame_origin) frame_origin = cv2.resize(frame_origin, None, fx=0.75, fy=0.75, interpolation=cv2.INTER_AREA) frame_trans = copy.deepcopy(frame_origin) # # draw the shangping area # left_x, top_y, right_m, bottom_n = shangpin_area() # cv2.rectangle(frame_origin, (left_x, top_y), (right_m, bottom_n), (0, 255, 0), 2) # # left_x_2, top_y_2, right_m_2, bottom_n_2 = shangpin_area_huojia2() # cv2.rectangle(frame_origin, (left_x_2, top_y_2), (right_m_2, bottom_n_2), (255, 0, 0), 2) res, camera_id = callback((None, cfg, model, frame_number2, bridge, camera_id, flag, frame_origin)) if res == []: threadPubMsg = threadPubMsg_dict['shelfID_' + str(huojia1_id)] threadPubMsg.set_commodity_recognition_trigger_with_image( camera_id=camera_id, person_id=-1, shelf_id=-1, flag=0, flag1=0, flag2=0, flag_list=[], frame=frame_trans) threadPubMsg = threadPubMsg_dict['shelfID_' + str(huojia2_id)] threadPubMsg.set_commodity_recognition_trigger_with_image( camera_id=camera_id, person_id=-1, shelf_id=-1, flag=0, flag1=0, flag2=0, flag_list=[], frame=frame_trans) continue dic, dic_huojia2 = xuanze_original(res, frame_origin, model, cfg, camera_id, dic_change, dic_change_huojia2, huojia1_id, huojia2_id) if compare_dic(dic, dic_change) == False and compare_dic( dic_huojia2, dic_change_huojia2) == False: pass else: dic, dic_huojia2 = xuanze(res, frame_origin, model, cfg, threadPubMsg_dict, camera_id, dic, dic_change, dic_huojia2, dic_change_huojia2, huojia1_id, huojia2_id, frame_trans) print("**********************") print("dic_change shelf1: {}".format(dic)) print("dic_change_shelf2: {}".format(dic_huojia2)) print("") dic_change = dic dic_change_huojia2 = dic_huojia2 HKIPcamera.release()
init_epoch = model.seen/nsamples kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {} test_loader = torch.utils.data.DataLoader( dataset.listDataset(testlist, shape=(init_width, init_height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ]), train=False), batch_size=batch_size, shuffle=False, **kwargs) if use_cuda: if ngpus > 1: model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if key.find('.bn') >= 0 or key.find('.bias') >= 0: params += [{'params': [value], 'weight_decay': 0.0}] else: params += [{'params': [value], 'weight_decay': decay*batch_size}] optimizer = optim.SGD(model.parameters(), lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size) def adjust_learning_rate(optimizer, batch): """Sets the learning rate to the initial LR decayed by 10 every 30 epochs""" lr = learning_rate for i in range(len(steps)): scale = scales[i] if i < len(scales) else 1
def generate_det(seq_dir, npy_dir, cfg_file, weight_file): # load yolo model m = Darknet(cfg_file) m.print_network() m.load_weights(weight_file) print('Loading weights from %s... Done!' % (weight_file)) use_cuda = 1 if use_cuda: m.cuda() if not os.path.exists(npy_dir): os.makedirs(npy_dir) seq_list = glob.glob(os.path.join(seq_dir, "*-YOLO")) seq_list = sorted(seq_list) for seq in seq_list: seq_name = os.path.basename(seq) print("processing: %s" % seq_name) det_dir = os.path.join(seq, "det") if not os.path.exists(det_dir): os.makedirs(det_dir) txt_file = os.path.join(det_dir, "det.txt") fid = open(txt_file, 'w') npy_file = os.path.join(npy_dir, seq_name + ".npy") img_dir = os.path.join(seq, "img1") img_list = os.listdir(img_dir) img_list = sorted(img_list) img = cv2.imread(os.path.join(img_dir, img_list[0])) sized = cv2.resize(img, (m.width, m.height)) sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB) boxes = do_detect(m, sized, 0.5, 0.4, use_cuda) total_time = 0.0 npy_list = [] for i in range(len(img_list)): print("processing: %d/%d" % (i + 1, len(img_list))) img_name = img_list[i][:-4] img_idx = int(img_name) img_path = os.path.join(img_dir, img_list[i]) img = cv2.imread(img_path) sized = cv2.resize(img, (m.width, m.height)) sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB) #time_0 = time.time() boxes = do_detect(m, sized, 0.5, 0.4, use_cuda) #time_1 = time.time() #total_time += time_1 - time_0 height, width = img.shape[:2] for j in range(len(boxes)): box = boxes[j] cls_id = box[6] if cls_id != 0: continue x = (box[0] - box[2] / 2.0) * width y = (box[1] - box[3] / 2.0) * height w = box[2] * width h = box[3] * height cls_conf = box[5] txt_tmp = "%d,-1,%.1f,%.1f,%.1f,%.1f,%.3f\n" % (img_idx, x, y, w, h, cls_conf) fid.write(txt_tmp) npy_list.append([ img_idx, -1.0, x, y, w, h, cls_conf, -1.0, -1.0, -1.0, 1.0 ]) fid.close() np.save(npy_file, np.asarray(npy_list, dtype=np.float32), allow_pickle=False)
def valid(datacfg, cfgfile, weightfile, outfile): options = read_data_cfg(datacfg) valid_images = options['valid'] name_list = options['names'] prefix = 'results' names = load_class_names(name_list) with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] m = Darknet(cfgfile) m.print_network() m.load_weights(weightfile) m.cuda() m.eval() valid_dataset = dataset.listDataset(valid_images, shape=(m.width, m.height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 2 assert (valid_batchsize > 1) kwargs = {'num_workers': 4, 'pin_memory': True} valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False, **kwargs) fps = [0] * m.num_classes if not os.path.exists('results'): os.mkdir('results') for i in range(m.num_classes): buf = '%s/%s%s.txt' % (prefix, outfile, names[i]) fps[i] = open(buf, 'w') lineId = -1 conf_thresh = 0.005 nms_thresh = 0.45 for batch_idx, (data, target) in enumerate(valid_loader): data = data.cuda() data = Variable(data, volatile=True) output = m(data).data batch_boxes = get_region_boxes(output, conf_thresh, m.num_classes, m.anchors, m.num_anchors, 0) for i in range(output.size(0)): lineId = lineId + 1 fileId = os.path.basename(valid_files[lineId]).split('.')[0] width, height = get_image_size(valid_files[lineId]) print(valid_files[lineId]) boxes = batch_boxes[i] boxes = nms(boxes, nms_thresh) for box in boxes: x1 = (box[0] - box[2] / 2.0) * width y1 = (box[1] - box[3] / 2.0) * height x2 = (box[0] + box[2] / 2.0) * width y2 = (box[1] + box[3] / 2.0) * height det_conf = box[4] cls_conf = box[5] cls_id = box[6] prob = det_conf * cls_conf fps[cls_id].write('%s %f %f %f %f %f\n' % (fileId, prob, x1, y1, x2, y2)) for i in range(m.num_classes): fps[i].close()
def main(camera_id, shelf_id): rospy.init_node('MultiProcessingNode', anonymous=True) ip = '192.168.0.' + str(camera_id) name = str('admin') pw = str('a1234567') camera = HKCamera(ip, name, pw) threadPubMsg_shelfID_1 = pubmsg.MsgPublishClass(cameraID=camera_id, shelfID=shelf_id[0]) threadPubMsg_shelfID_1.setDaemon(True) threadPubMsg_shelfID_1.start() shelf1 = 'shelfID_' + str(shelf_id[0]) threadPubMsg_dict = {shelf1: threadPubMsg_shelfID_1} model = loadDataset() cfg = Darknet('cfg/yolov3.cfg') cfg.load_weights('yolov3.weights') cfg.cuda() # global frame_number frame_number2 = [0] flag = [0] bridge = CvBridge() dic_change = {} pre_res = {} huojia1_id = shelf_id[0] print("huojia1_id: {}".format(huojia1_id)) tmp = 0 while not rospy.is_shutdown(): frame_origin = camera.getFrame() frame_origin = np.array(frame_origin) frame_origin = cv2.resize(frame_origin, None, fx=0.75, fy=0.75, interpolation=cv2.INTER_AREA) frame_trans = copy.deepcopy(frame_origin) # draw the shangping area # left_x, top_y, right_m, bottom_n = shangpin_area(huojia1_id) # cv2.rectangle(frame_origin, (left_x, top_y), (right_m, bottom_n), (0, 255, 0), 2) res, camera_id, dict_res = callback( (None, cfg, model, frame_number2, bridge, camera_id, flag, frame_origin, huojia1_id, pre_res)) if res == []: if tmp > 30: threadPubMsg = threadPubMsg_dict['shelfID_' + str(huojia1_id)] threadPubMsg.set_commodity_recognition_trigger_with_image( camera_id=camera_id, person_id=-1, shelf_id=huojia1_id, flag=0, flag1=0, flag2=0, flag_list=[], frame=None) tmp = 0 else: tmp += 1 continue else: tmp = 0 dic = xuanze_original(res, frame_origin, model, cfg, camera_id, dic_change, huojia1_id, pre_res) if compare_dic(dic, dic_change) == False: pass else: dic = xuanze(res, frame_origin, model, cfg, threadPubMsg_dict, camera_id, dic, dic_change, huojia1_id, frame_trans, pre_res) #print("**********************") #print("dic_change_shelf_{}: {}".format(shelf_id[0], dic)) #print("") change_idnum = len(pre_res.keys()) == len(res) if change_idnum: pre_res = dict_res else: pre_res = {} dic_change = dic HKIPcamera.release()
class Detector: def __init__(self, resolution=416): ''' :param resolution: int, multiple of 32 greater than 32 ''' self.batch_size = 1 self.scales = [1, 2, 3] self.resolution = resolution self.num_boxes = [ self.resolution // 8, self.resolution // 16, self.resolution // 32 ] self.num_boxes = sum([3 * (x**2) for x in self.num_boxes]) self.scales_indices = [] for scale in self.scales: li = list( range((scale - 1) * self.num_boxes // 3, scale * self.num_boxes // 3)) self.scales_indices.extend(li) self.confidence = 0.5 self.nms_thresh = 0.4 self.start = 0 self.save_directory = '.' self.cfg_file = 'cfg/yolov3.cfg' self.weights_file = "yolov3.weights" self.colors = pkl.load(open("pallete", "rb")) self.CUDA = torch.cuda.is_available() self.num_classes = 80 self.classes = load_classes('data/coco.names') # Set up the neural network print("Loading network.....") self.model = Darknet(self.cfg_file) self.model.load_weights(self.weights_file) print("Network successfully loaded") self.model.net_info["height"] = self.resolution self.inp_dim = self.model.net_info["height"] assert self.inp_dim % 32 == 0 assert self.inp_dim > 32 # If there's a GPU availible, put the model on GPU if self.CUDA: self.model.cuda() # Set the model in evaluation mode self.model.eval() def detect_objects(self, image_path): image_prep = prep_image(image_path, self.inp_dim) im_batches = [image_prep[0]] orig_ims = [image_prep[1]] im_dim_list = [image_prep[2]] im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) img_path = image_path if self.CUDA: im_dim_list = im_dim_list.cuda() write = False self.model(get_test_input(self.inp_dim, self.CUDA), self.CUDA) objs = {} i = 0 for batch in im_batches: if self.CUDA: batch = batch.cuda() with torch.no_grad(): prediction = self.model(Variable(batch), self.CUDA) prediction = prediction[:, self.scales_indices] prediction = write_results(prediction, self.confidence, self.num_classes, nms=True, nms_conf=self.nms_thresh) prediction[:, 0] += i * self.batch_size if not write: output = prediction write = 1 else: output = torch.cat((output, prediction)) for im_num, image in enumerate(img_path[i * self.batch_size:min( (i + 1) * self.batch_size, len(img_path))]): im_id = i * self.batch_size + im_num objs = [ self.classes[int(x[-1])] for x in output if int(x[0]) == im_id ] print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs))) print( "----------------------------------------------------------" ) i += 1 if self.CUDA: torch.cuda.synchronize() try: output except NameError: print("No detections were made") exit() im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long()) scaling_factor = torch.min(self.inp_dim / im_dim_list, 1)[0].view(-1, 1) output[:, [1, 3]] -= (self.inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (self.inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i, 1]) def write(x, batches, results): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) img = results[int(x[0])] cls = int(x[-1]) label = "{0}".format(self.classes[cls]) color = random.choice(self.colors) cv2.rectangle(img, c1, c2, color, 1) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 cv2.rectangle(img, c1, c2, color, -1) cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1) return img list(map(lambda x: write(x, im_batches, orig_ims), output)) det_names = pd.Series(img_path).apply( lambda x: "{}/det_{}".format(self.save_directory, x.split("/")[-1])) cv2.imwrite(det_names[0], orig_ims[0]) torch.cuda.empty_cache() ret_path = det_names[0] return ret_path, objs, orig_ims[0]
def evaluate_with_gt_pos(cfgfile, weightfile, listfile, append, bestCnt, withZoom=True, use_cuda=True, zoom_type=None): import cv2 all_channels = [ 32, 64, 32, 64, 128, 64, 128, 64, 128, 256, 128, 256, 128, 256, 128, 256, 128, 256, 128, 256, 128, 256, 128, 256, 128, 256, 512, 256, 512, 256, 512, 256, 512, 256, 512, 256, 512, 256, 512, 256, 512, 256, 512, 1024, 512, 1024, 512, 1024, 512, 1024, 512, 1024, 512, 1024, 512, 1024, 512, 256, 256, 512, 256, 512, 256, 128, 128, 256, 128, 256, 128, 256, 2, 512, 1024, 512, 1024, 512, 256, 256, 512, 256, 512, 256, 128, 128, 256, 128, 256, 128, 256, 24 ] m = Darknet(cfgfile, all_channels) m.print_network() m.load_state_dict(torch.load(weightfile)) print('Loading weights from %s... Done!' % (weightfile)) if use_cuda: m.cuda() m.print_bn_weights() with open(listfile, 'r') as file: imglines = file.readlines() failed_pred = 0 total_pred = 0 for idx in range(len(imglines)): max_conf = 0 imgfile = imglines[idx].rstrip() img = cv2.imread(imgfile) dirname, filename = os.path.split(imgfile) baseName, _ = os.path.splitext(filename) dirname = os.path.splitext(dirname[dirname.rfind('/') + 1:])[0] outFileName = dirname + '_' + baseName start = time.time() gtPoses = [None] * 3 rawk = K_tango target_shape = (704, 704) max_conf = 0 best_pred = None best_border = None save = False print('imgfile', imgfile) print(str(failed_pred) + ' ' + str(total_pred)) #predPose, conf, deviation, p2d = do_detect(m, img, rawk, gtPoses, bestCnt, 0, 0, use_cuda) try: total_pred = total_pred + 1 predPose, conf, p2d = do_detect(m, img, rawk, gtPoses, bestCnt, 0, 0, use_cuda) except Exception: failed_pred = failed_pred + 1 pass finish = time.time() name = 'img/' + filename + '.png' if predPose is not None and (len(predPose) != 0): pose = predPose[0][1] print(str(pose)) r = pose[:, 3] name = 'img/' + filename + '_' + str(conf) + '.png' #save_img_with_label(img, pose, rawk, name) print(name) quat = np.delete(pose, 3, axis=1).T q0, qvec = dcm2quat(quat) q = [q0, qvec[0], qvec[1], qvec[2]] print(conf) else: name = 'img/missing' + filename + '.png' print('problem', name, save) print(str(failed_pred) + ' ' + str(total_pred))
class Main(QtWidgets.QMainWindow, Ui_MainWindow): logQueue = multiprocessing.Queue() # 日志数据队列,用于多进程之间传输数据 receiveLogSignal = pyqtSignal(str) def __init__(self): QtWidgets.QMainWindow.__init__(self) self.setupUi(self) self.cap = None # video self.center() self.openFIleButton.clicked.connect(self.open_video) self.closeFileButton.clicked.connect(self.close_video) # 创建一个关闭事件并设为未触发 self.stopEvent = threading.Event() self.stopEvent.clear() # 加载模型 self.load_models.clicked.connect(self.load_model) # 加载日志 self.receiveLogSignal.connect(lambda log: self.logOutput(log)) self.logOutputThread = threading.Thread(target=self.receiveLog, daemon=True) self.logOutputThread.start() # 调节帧率 self.changeFrameSlider.valueChanged.connect(self.frameChange) self.frameInterval = self.changeFrameSlider.value() def frameChange(self): self.label_14.setText(str(self.changeFrameSlider.value())) #print("frameInterval:" + str(self.frameInterval)) def logOutput(self, log): # 获取当前系统时间 time = datetime.now().strftime('[%Y/%m/%d %H:%M:%S]') log = time + '\n' + log self.logFile.write(log) self.textEdit.moveCursor(QTextCursor.End) self.textEdit.insertPlainText(log) self.textEdit.ensureCursorVisible() # 自动滚屏 def receiveLog(self): while True: data = self.logQueue.get() if data: self.receiveLogSignal.emit(data) else: continue def center(self, screenNum=0): screen = QDesktopWidget().screenGeometry() size = self.geometry() self.normalGeometry2 = QRect( (screen.width() - size.width()) / 2 + screen.left(), (screen.height() - size.height()) / 2, size.width(), size.height()) self.setGeometry((screen.width() - size.width()) / 2 + screen.left(), (screen.height() - size.height()) / 2, size.width(), size.height()) def open_video(self): fileName, _ = QFileDialog.getOpenFileName(self, "载入监控视频", '../videos') self.cap = cv2.VideoCapture(fileName) self.frameRate = self.cap.get(cv2.CAP_PROP_FPS) video_thread = threading.Thread(target=self.display_video) video_thread.start() self.logFile = open('../log/log_info.txt', 'a') def close_video(self): self.stopEvent.set() def display_video(self): self.openFIleButton.setEnabled(False) self.closeFileButton.setEnabled(True) # RGB转BGR frames = 0 while self.cap.isOpened(): ret, frame = self.cap.read() if ret: if frames % self.changeFrameSlider.value() == 0: #frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) plate_frame = frame.copy() img = frame.copy() output = None orign_img = None # 系统日志 count_info_log = "" event_info_log = "" break_info_log = "" try: if self.target_detect.isChecked(): # 目标识别 output, orign_img, img, pedestrians_num = target_detect( self.model, frame) if int(pedestrians_num) != 0: break_info_log = str( pedestrians_num) + "人闯红灯;\n" self.break_traffic_warning.setVisible(True) self.break_traffic_label.setVisible(True) self.break_traffic_label.setText( break_info_log) else: self.break_traffic_label.setVisible(False) self.break_traffic_warning.setVisible(False) # 红绿灯检测 if self.traffic_light_detect.isChecked(): traffic_light_color = traffic_light_detect( output, orign_img) if traffic_light_color == "green": self.red_light.setVisible(False) self.green_light.setVisible(True) elif traffic_light_color == "red": self.green_light.setVisible(False) self.red_light.setVisible(True) else: self.green_light.setVisible(False) self.red_light.setVisible(False) else: self.green_light.setVisible(False) self.red_light.setVisible(False) #车流,人流检测 people_num = 0 cars_num = 0 motors_num = 0 if self.cars_detect.isChecked(): _, cars_num, motors_num = classNum_detect(output) self.tableWidget.setItem( 0, 1, QTableWidgetItem(str(cars_num))) self.tableWidget.setItem( 0, 2, QTableWidgetItem(str(motors_num))) else: self.tableWidget.setItem(0, 1, QTableWidgetItem(str(0))) self.tableWidget.setItem(0, 2, QTableWidgetItem(str(0))) #print(1111) if self.people_detect.isChecked(): people_num, _, _ = classNum_detect(output) self.tableWidget.setItem( 0, 0, QTableWidgetItem(str(people_num))) else: self.tableWidget.setItem(0, 0, QTableWidgetItem(str(0))) count_info_log = "people:" + str(people_num) + ", cars:" + str(cars_num) + \ ", motors:" + str(motors_num) + ";\n" # 车牌识别 if self.license_plate_detect.isChecked(): plate_info_list = recognize_plate(plate_frame) for plate_info in plate_info_list: plate = plate_info[0] # 车牌 conficdence = plate_info[1] # 置信度 #print("车牌:" + plate) #self.license_result.clear() self.license_result.setText(plate) rect = plate_info[2] # 位置 #print(rect[0], rect[2], rect[1], rect[3]) plate_img = plate_frame[int(rect[1]) : int(rect[3] + rect[1]), \ int(rect[0]) : int(rect[2]+rect[0])] plate_img = cv2.cvtColor( plate_img, cv2.COLOR_RGB2BGR) plate_img = cv2.resize(plate_img, (140, 30)) plate_img = QImage(plate_img.data, plate_img.shape[1], \ plate_img.shape[0], QImage.Format_RGB888) self.license_graph.setPixmap( QPixmap.fromImage(plate_img)) img = drawRectBox(img, rect, plate) plate_center = [ int(rect[0] + rect[2]), int(rect[1] + rect[3]) ] #车牌中心中心位置 car_color = detect_car_color( output, plate_frame, plate_center) #检测汽车颜色 event_info_log = car_color + "汽车,车牌信息:" + plate + \ "识别准确率:" + str(conficdence)[:5] + '\n' #print(event_info_log) else: self.license_graph.clear() self.license_result.clear() except: pass log_info = count_info_log + event_info_log + break_info_log self.logQueue.put(log_info) #self.sys_log(count_info_log, event_info_log, break_info_log) #self.set_log_info(count_info_log, event_info_log, break_info_log) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) img = cv2.resize(img, (1080, 540)) img = QImage(img.data, img.shape[1], img.shape[0], QImage.Format_RGB888) self.video_plate.setPixmap(QPixmap.fromImage(img)) cv2.waitKey(1) frames += 1 #print(frames) if self.stopEvent.is_set(): self.stopEvent.clear() #self.textEdit.clear() self.video_plate.clear() self.tableWidget.setItem(0, 0, QTableWidgetItem(str(0))) self.tableWidget.setItem(0, 1, QTableWidgetItem(str(0))) self.tableWidget.setItem(0, 2, QTableWidgetItem(str(0))) break else: self.video_plate.clear() break try: self.openFIleButton.setEnabled(True) self.cap.release() self.logFile.close() self.green_light.setVisible(False) self.red_light.setVisible(False) self.break_traffic_warning.setVisible(False) self.break_traffic_label.setVisible(False) self.license_graph.clear() self.license_result.clear() except: print("资源释放错误") def load_model(self): CUDA = torch.cuda.is_available() print("Loading network.....") self.model = Darknet("../yolov3/cfg/yolov3.cfg") self.model.load_weights("../yolov3/weights/yolov3.weights") print("Network successfully loaded") self.model.net_info["height"] = 416 inp_dim = int(self.model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: self.model.cuda() # 将模型迁移到GPU self.model.eval() load_completed = QMessageBox.information(self, 'message', '模型加载完成.', QMessageBox.Ok)
class YOLO: def __init__(self, cfg_file: pathlib.Path, weights_file: pathlib.Path, class_names_file: pathlib.Path, resolution: int = 416, class_filters: List[str] = None) -> None: self.net: Any = None self.input_dim: int = None self.load_net(cfg_file, weights_file, resolution) self.class_names = load_classes(class_names_file) self.num_classes = len(self.class_names) self.class_filters = class_filters def load_net(self, cfg_file: pathlib.Path, weights_file: pathlib.Path, resolution: int) -> None: self.net = Darknet(str(cfg_file)) self.net.load_weights(str(weights_file)) self.net.net_info['height'] = resolution self.net.cuda() self.input_dim = self.net.net_info['height'] if self.input_dim % 32 != 0 or self.input_dim <= 32: raise ValueError("Bad input dimension. Resolution is bad") # self.net(get_test_input(self.input_dim, True), True) self.net.eval() def prep_frame( self, frame: np.ndarray ) -> Tuple[np.ndarray, np.ndarray, Tuple[int, int]]: original_frame = frame dim = original_frame.shape[1], original_frame.shape[0] frame = (letterbox_image(original_frame, (self.input_dim, self.input_dim))) frame_ = frame[:, :, ::-1].transpose((2, 0, 1)).copy() frame_ = torch.from_numpy(frame_).float().div(255.0).unsqueeze(0) return frame_, original_frame, dim def format_output(self, output: Any, threshold: float, frame_dimensions: Tuple[int, int]) -> Optional[Any]: output = write_results(output, threshold, self.num_classes, nms=True, nms_conf=threshold) if isinstance(output, int): # means no output return None frame_dimensions = frame_dimensions.repeat(output.size(0), 1) scaling_factor = torch.min(self.input_dim / frame_dimensions, 1)[0].view(-1, 1) output[:, [1, 3]] -= (self.input_dim - scaling_factor * frame_dimensions[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (self.input_dim - scaling_factor * frame_dimensions[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, frame_dimensions[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, frame_dimensions[i, 1]) return output def get_detections(self, frame: np.ndarray, threshold: float = 0.7) -> FrameAnnotations: if frame is None: return FrameAnnotations(frame=frame, objects=list(), image_width=None, image_height=None) new_frame, frame, dimensions = self.prep_frame(frame) new_frame = new_frame.cuda() frame_dimensions = torch.FloatTensor(dimensions).repeat(1, 2).cuda() with torch.no_grad(): output = self.net(Variable(new_frame), True) output = self.format_output(output, threshold, frame_dimensions) objects = list() if output is not None: for obj in output: if self.class_filters is not None: if self.class_names[int(obj[-1])] not in \ self.class_filters: continue objects.append( Object(class_name=self.class_names[int(obj[-1])], bbox=BBox(left=int(obj[1]), top=int(obj[2]), right=int(obj[3]), bottom=int(obj[4])))) return FrameAnnotations(frame=frame, objects=objects, image_width=frame.shape[1], image_height=frame.shape[0])
def main(): global args, best_prec1 args = parser.parse_args() # create model if args.pretrained: print("=> using pre-trained model '{}'".format(args.arch)) # if args.arch == 'resnet50': # import resnet_model # model = resnet_model.resnet50_new(pretrained=True) # print('save resnet50 to resnet50.weights') # model.saveas_darknet_weights('resnet50.weights') if args.arch == 'resnet50-darknet': from darknet import Darknet model = Darknet('cfg/resnet50.cfg') print('load weights from resnet50.weights') model.load_weights('resnet50.weights') elif args.arch == 'resnet50-kaiming': from caffenet import CaffeNet model = CaffeNet('ResNet-50-deploy.prototxt') print('load weights from ResNet-50-model.caffemodel') model.load_weights('ResNet-50-model.caffemodel') elif args.arch == 'resnet50-kaiming-dk': from darknet import Darknet model = Darknet('ResNet-50-model.cfg') print('load weights from ResNet-50-model.weights') model.load_weights('ResNet-50-model.weights') elif args.arch == 'resnet18-caffe': from caffenet import CaffeNet model = CaffeNet('cfg/resnet-18.prototxt') print('load weights from resnet-18.caffemodel') model.load_weights('resnet-18.caffemodel') elif args.arch == 'resnet18-darknet': from darknet import Darknet model = Darknet('resnet-18.cfg') print('load weights from resnet-18.weights') model.load_weights('resnet-18.weights') elif args.arch == 'resnet50-test': from darknet import Darknet model = Darknet('test/ResNet-50-model.cfg') print('load weights from test/ResNet-50-model.weights') model.load_weights('test/ResNet-50-model.weights') else: model = models.__dict__[args.arch](pretrained=True) else: print("=> creating model '{}'".format(args.arch)) if args.arch.startswith('mobilenet'): model = Net() print(model) else: model = models.__dict__[args.arch]() if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') if args.arch == 'resnet50-test' or args.arch == 'resnet50-kaiming' or args.arch == 'resnet50-kaiming-dk': normalize = transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]) elif args.arch == 'resnet18-darknet' or args.arch == 'resnet18-caffe': normalize = transforms.Normalize( mean=[104 / 255.0, 117 / 255.0, 123 / 255.0], std=[1.0, 1.0, 1.0]) else: normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_loader = torch.utils.data.DataLoader(datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomSizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Scale(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)
class Detector: def __init__(self, showTags=False, showCordenates=False): self.showTags = showTags self.showCordenates = showCordenates self.model = Darknet(cfgfile) self.model.load_weights(weightsfile) self.model.net_info["height"] = resolution self.inp_dim = int(self.model.net_info["height"]) assert self.inp_dim % 32 == 0 assert self.inp_dim > 32 # If there's a GPU availible, put the model on GPU if CUDA: self.model.cuda() self.model.eval() self.reconocimiento_facil = ReconocimientoFacial() def detect(self, frame, debugFrames=[], frame_counter=0): img = prep_image(frame, self.inp_dim) im_dim = frame.shape[1], frame.shape[0] im_dim = torch.FloatTensor(im_dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): # output = self.model(Variable(img, volatile=True), CUDA) output = self.model(Variable(img), CUDA) output = write_results(output, confidence, num_classes, nms_conf=nms_thesh) if isinstance(output, int): return frame im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(416 / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (self.inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (self.inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim[i, 1]) if (self.showCordenates): frame = lineaGol.add_linea_gol(frame, frame_counter) list( map( lambda x: write(x, frame, self.showTags, self. reconocimiento_facil), output)) return frame
CUDA = torch.cuda.is_available() # cuda가 사용가능한 상황인지 num_classes = 80 # 암튼 80 bbox_attrs = 5 + num_classes # Bouding Box 속성 model = Darknet(cfgfile) # Darknet model.load_weights(weightsfile) # Model에 weighs파일을 load해준다 model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() # Cuda를 사용중이면 model.cuda() model.eval() # 모델 평가? # cap = cv2.VideoCapture(0) #videoCapture(0) >> video 캡쳐변수 선언 cap = cv2.VideoCapture("http://192.168.0.54:8409/?action=snapshot") # videoCapture("주소") >> video 캡쳐변수 선언 assert cap.isOpened(), 'Cannot capture source' # assert는 가정설정문, 뒤의 조건이 True가 아니면 AssertError를 발생시킨다. frames = 0 # frame 변수 선언, 초기값은 0 start = time.time() # 시간을 측정해주는 함수 while cap.isOpened(): # cap이 초기화가 잘 되어 있는지 확인
stdoutToServer=True, stderrToServer=True) datacfg = 'cfg/voc.data' cfgfile = 'cfg/yolov2-tiny-voc.cfg' weightfile = '../yolov2-tiny-bnn/weights/yolov2-tiny-voc.weights' options = read_data_cfg(datacfg) valid_images = options['valid'] with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] m = Darknet(cfgfile) m.print_network() m.load_weights(weightfile) m.cuda() m.eval() valid_dataset = dataset.listDataset(valid_images, shape=(m.width, m.height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ])) valid_batchsize = 2 assert (valid_batchsize > 1) kwargs = {'num_workers': 0, 'pin_memory': True} valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=valid_batchsize, shuffle=False,
def valid(datacfg, modelcfg, weightfile): def truths_length(truths, max_num_gt=50): for i in range(max_num_gt): if truths[i][1] == 0: return i # Parse configuration files data_options = read_data_cfg(datacfg) valid_images = data_options['valid'] meshname = data_options['mesh'] backupdir = data_options['backup'] name = data_options['name'] gpus = data_options['gpus'] fx = float(data_options['fx']) fy = float(data_options['fy']) u0 = float(data_options['u0']) v0 = float(data_options['v0']) im_width = int(data_options['width']) im_height = int(data_options['height']) if not os.path.exists(backupdir): makedirs(backupdir) # Parameters seed = int(time.time()) os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) save = False testtime = True num_classes = 1 testing_samples = 0.0 if save: makedirs(backupdir + '/test') makedirs(backupdir + '/test/gt') makedirs(backupdir + '/test/pr') # To save testing_error_trans = 0.0 testing_error_angle = 0.0 testing_error_pixel = 0.0 errs_2d = [] errs_3d = [] errs_trans = [] errs_angle = [] errs_corner2D = [] preds_trans = [] preds_rot = [] preds_corners2D = [] gts_trans = [] gts_rot = [] gts_corners2D = [] # Read object model information, get 3D bounding box corners mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() corners3D = get_3D_corners(vertices) try: diam = float(options['diam']) except: diam = calc_pts_diameter(np.array(mesh.vertices)) # Read intrinsic camera parameters intrinsic_calibration = get_camera_intrinsic(u0, v0, fx, fy) # Get validation file names with open(valid_images) as fp: tmp_files = fp.readlines() valid_files = [item.rstrip() for item in tmp_files] # Specicy model, load pretrained weights, pass to GPU and set the module in evaluation mode model = Darknet(modelcfg) model.print_network() model.load_weights(weightfile) model.cuda() model.eval() test_width = model.test_width test_height = model.test_height num_keypoints = model.num_keypoints num_labels = num_keypoints * 2 + 3 # +2 for width, height, +1 for class label # Get the parser for the test dataset valid_dataset = dataset.listDataset(valid_images, shape=(test_width, test_height), shuffle=False, transform=transforms.Compose([ transforms.ToTensor(), ])) # Specify the number of workers for multiple processing, get the dataloader for the test dataset kwargs = {'num_workers': 4, 'pin_memory': True} test_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=1, shuffle=False, **kwargs) logging(" Testing {}...".format(name)) logging(" Number of test samples: %d" % len(test_loader.dataset)) # Iterate through test batches (Batch size for test data is 1) count = 0 for batch_idx, (data, target) in enumerate(test_loader): t1 = time.time() # Pass data to GPU # import IPython; IPython.embed() data = data.cuda() # target = target.cuda() # Wrap tensors in Variable class, set volatile=True for inference mode and to use minimal memory during inference data = Variable(data, volatile=True) t2 = time.time() # Forward pass output = model(data).data t3 = time.time() # Using confidence threshold, eliminate low-confidence predictions all_boxes = get_region_boxes(output, num_classes, num_keypoints) all_boxes = [t.cpu() for t in all_boxes] t4 = time.time() # Evaluation # Iterate through all batch elements for box_pr, target in zip([all_boxes], [target[0]]): # For each image, get all the targets (for multiple object pose estimation, there might be more than 1 target per image) truths = target.view(-1, num_labels) # Get how many objects are present in the scene num_gts = truths_length(truths) # Iterate through each ground-truth object for k in range(num_gts): box_gt = list() for j in range(1, 2 * num_keypoints + 1): box_gt.append(truths[k][j]) box_gt.extend([1.0, 1.0]) box_gt.append(truths[k][0]) # Denormalize the corner predictions corners2D_gt = np.array(np.reshape(box_gt[:18], [-1, 2]), dtype='float32') corners2D_pr = np.array(np.reshape(box_pr[:18], [-1, 2]), dtype='float32') corners2D_gt[:, 0] = corners2D_gt[:, 0] * im_width corners2D_gt[:, 1] = corners2D_gt[:, 1] * im_height corners2D_pr[:, 0] = corners2D_pr[:, 0] * im_width corners2D_pr[:, 1] = corners2D_pr[:, 1] * im_height preds_corners2D.append(corners2D_pr) gts_corners2D.append(corners2D_gt) # Compute corner prediction error corner_norm = np.linalg.norm(corners2D_gt - corners2D_pr, axis=1) corner_dist = np.mean(corner_norm) errs_corner2D.append(corner_dist) # Compute [R|t] by pnp R_gt, t_gt = pnp( np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_gt, np.array(intrinsic_calibration, dtype='float32')) R_pr, t_pr = pnp( np.array(np.transpose( np.concatenate((np.zeros((3, 1)), corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_pr, np.array(intrinsic_calibration, dtype='float32')) # Compute translation error trans_dist = np.sqrt(np.sum(np.square(t_gt - t_pr))) errs_trans.append(trans_dist) # Compute angle error angle_dist = calcAngularDistance(R_gt, R_pr) errs_angle.append(angle_dist) # Compute pixel error Rt_gt = np.concatenate((R_gt, t_gt), axis=1) Rt_pr = np.concatenate((R_pr, t_pr), axis=1) proj_2d_gt = compute_projection(vertices, Rt_gt, intrinsic_calibration) proj_2d_pred = compute_projection(vertices, Rt_pr, intrinsic_calibration) norm = np.linalg.norm(proj_2d_gt - proj_2d_pred, axis=0) pixel_dist = np.mean(norm) errs_2d.append(pixel_dist) # Compute 3D distances transform_3d_gt = compute_transformation(vertices, Rt_gt) transform_3d_pred = compute_transformation(vertices, Rt_pr) norm3d = np.linalg.norm(transform_3d_gt - transform_3d_pred, axis=0) vertex_dist = np.mean(norm3d) errs_3d.append(vertex_dist) # Sum errors testing_error_trans += trans_dist testing_error_angle += angle_dist testing_error_pixel += pixel_dist testing_samples += 1 count = count + 1 if save: preds_trans.append(t_pr) gts_trans.append(t_gt) preds_rot.append(R_pr) gts_rot.append(R_gt) np.savetxt( backupdir + '/test/gt/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_gt, dtype='float32')) np.savetxt( backupdir + '/test/gt/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_gt, dtype='float32')) np.savetxt( backupdir + '/test/pr/R_' + valid_files[count][-8:-3] + 'txt', np.array(R_pr, dtype='float32')) np.savetxt( backupdir + '/test/pr/t_' + valid_files[count][-8:-3] + 'txt', np.array(t_pr, dtype='float32')) np.savetxt( backupdir + '/test/gt/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_gt, dtype='float32')) np.savetxt( backupdir + '/test/pr/corners_' + valid_files[count][-8:-3] + 'txt', np.array(corners2D_pr, dtype='float32')) t5 = time.time() # Compute 2D projection error, 6D pose error, 5cm5degree error px_threshold = 5 # 5 pixel threshold for 2D reprojection error is standard in recent sota 6D object pose estimation works eps = 1e-5 acc = len(np.where( np.array(errs_2d) <= px_threshold)[0]) * 100. / (len(errs_2d) + eps) acc5cm5deg = len( np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) + eps) acc3d10 = len(np.where( np.array(errs_3d) <= diam * 0.1)[0]) * 100. / (len(errs_3d) + eps) acc5cm5deg = len( np.where((np.array(errs_trans) <= 0.05) & (np.array(errs_angle) <= 5))[0]) * 100. / (len(errs_trans) + eps) corner_acc = len(np.where(np.array(errs_corner2D) <= px_threshold) [0]) * 100. / (len(errs_corner2D) + eps) mean_err_2d = np.mean(errs_2d) mean_corner_err_2d = np.mean(errs_corner2D) nts = float(testing_samples) if testtime: print('-----------------------------------') print(' tensor to cuda : %f' % (t2 - t1)) print(' forward pass : %f' % (t3 - t2)) print('get_region_boxes : %f' % (t4 - t3)) print(' prediction time : %f' % (t4 - t1)) print(' eval : %f' % (t5 - t4)) print('-----------------------------------') # Print test statistics logging('Results of {}'.format(name)) logging(' Acc using {} px 2D Projection = {:.2f}%'.format( px_threshold, acc)) logging(' Acc using 10% threshold - {} vx 3D Transformation = {:.2f}%'. format(diam * 0.1, acc3d10)) logging(' Acc using 5 cm 5 degree metric = {:.2f}%'.format(acc5cm5deg)) logging( " Mean 2D pixel error is %f, Mean vertex error is %f, mean corner error is %f" % (mean_err_2d, np.mean(errs_3d), mean_corner_err_2d)) logging( ' Translation error: %f m, angle error: %f degree, pixel error: % f pix' % (testing_error_trans / nts, testing_error_angle / nts, testing_error_pixel / nts)) if save: predfile = backupdir + '/predictions_linemod_' + name + '.mat' scipy.io.savemat( predfile, { 'R_gts': gts_rot, 't_gts': gts_trans, 'corner_gts': gts_corners2D, 'R_prs': preds_rot, 't_prs': preds_trans, 'corner_prs': preds_corners2D })
'cautery': 'cfg/my_config_webcam.yaml' } # weightfile = {'hands': 'backup/hands/000500.weights'} namesfile = {'hands': 'data/hands.names'} ####################################################### # Setting up YOLO-hand ####################################################### model_hand = Darknet(cfgfile['hands']) model_hand.load_weights(weightfile['hands']) print('Loading weights from %s... Done!' % (weightfile['hands'])) if use_cuda: model_hand.cuda() class_names = uyolo.load_class_names(namesfile['hands']) ####################################################### # Setting up DOPE ####################################################### yaml_path = cfgfile['cautery'] with open(yaml_path, 'r') as stream: try: print("Loading DOPE parameters from '{}'...".format(yaml_path)) params = yaml.load(stream) print(' Parameters loaded.') except yaml.YAMLError as exc: print(exc)
m = Darknet(cfgfile) region_loss = m.loss m.load_weights(weightfile) print('--- bn weight ---') print(m.models[0][1].weight) print('--- bn bias ---') print(m.models[0][1].bias) print('--- bn running_mean ---') print(m.models[0][1].running_mean) print('--- bn running_var ---') print(m.models[0][1].running_var) m.train() if torch.cuda.is_available(): m = m.cuda() optimizer = optim.SGD(m.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.1) img = Image.open(imgpath) img = image2torch(img) if torch.cuda.is_available(): img = img.cuda() img = Variable(img) target = Variable(label) print('----- img ---------------------') print(img.data.storage()[0:100]) print('----- target -----------------') print(target.data.storage()[0:100])
class ObjectDetection: def __init__(self, id): # self.cap = cv2.VideoCapture(id) self.cap = WebcamVideoStream(src = id).start() self.cfgfile = "cfg/yolov3.cfg" # self.cfgfile = 'cfg/yolov3-tiny.cfg' self.weightsfile = "yolov3.weights" # self.weightsfile = 'yolov3-tiny.weights' self.confidence = float(0.5) self.nms_thesh = float(0.4) self.num_classes = 80 self.classes = load_classes('data/coco.names') self.colors = pkl.load(open("pallete", "rb")) self.model = Darknet(self.cfgfile) self.CUDA = torch.cuda.is_available() self.model.load_weights(self.weightsfile) self.model.net_info["height"] = 160 self.inp_dim = int(self.model.net_info["height"]) self.width = 640 #640# self.height = 480 #360# print("Loading network.....") if self.CUDA: self.model.cuda() print("Network successfully loaded") assert self.inp_dim % 32 == 0 assert self.inp_dim > 32 self.model.eval() def main(self): q = queue.Queue() def frame_render(queue_from_cam): frame = self.cap.read() frame = cv2.resize(frame,(self.width, self.height)) queue_from_cam.put(frame) cam = threading.Thread(target=frame_render, args=(q,)) cam.start() cam.join() frame = q.get() q.task_done() fps = FPS().start() try: img, orig_im, dim = prep_image(frame, self.inp_dim) im_dim = torch.FloatTensor(dim).repeat(1,2) if self.CUDA: #### If you have a gpu properly installed then it will run on the gpu im_dim = im_dim.cuda() img = img.cuda() # with torch.no_grad(): #### Set the model in the evaluation mode output = self.model(Variable(img), self.CUDA) output = write_results(output, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh) #### Localize the objects in a frame output = output.type(torch.half) if list(output.size()) == [1,86]: pass else: output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(self.inp_dim))/self.inp_dim # im_dim = im_dim.repeat(output.size(0), 1) output[:,[1,3]] *= frame.shape[1] output[:,[2,4]] *= frame.shape[0] list(map(lambda x: write(x, frame, self.classes, self.colors),output)) x,y,w,h = b_boxes["bbox"][0],b_boxes["bbox"][1], b_boxes["bbox"][2], b_boxes["bbox"][3] distance = (2 * 3.14 * 180) / (w + h * 360) * 1000 + 3 ### Distance measuring in Inch feedback = ("{}".format(labels["Current Object"])+ " " +"is"+" at {} ".format(round(distance))+"Inches") # speak.Speak(feedback) # If you are running this on linux based OS kindly use espeak. Using this speaking library in winodws will add unnecessary latency # print(feedback) except: pass fps.update() fps.stop() print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.1f}".format(fps.fps())) frame = cv2.putText(frame, str("{:.2f} Inches".format(distance)), (x,y), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0,0,255), 1, cv2.LINE_AA) ret, jpeg = cv2.imencode('.jpg', frame) return jpeg.tostring()
def main(): args = arg_parse() confidence = args.confidence nms_thresh = args.nms_thresh start = 0 CUDA = torch.cuda.is_available() classes = load_classes("data/coco.names") num_classes = len(classes) # Set up the neural network print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 # if there's a GPU available, put the model on GPU if CUDA: model.cuda() # set the model in evaluation mode model.eval() def write(x, img, color): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) cls = int(x[-1]) label = "{0}".format(classes[cls]) cv2.rectangle(img, c1, c2, color, 4) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 cv2.rectangle(img, c1, c2, color, -1) cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (150, 150, 150), thickness=1) # detection phaase cap = cv2.VideoCapture(0) assert cap.isOpened(), "Cannot capture source" frames = 0 start = time.time() hsv_tuples = [(x / num_classes, 1., 1.) for x in range(num_classes)] colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) colors = list( map(lambda x: (int(x[0] * 200), int(x[1] * 200), int(x[2] * 200)), colors)) np.random.seed(10000) np.random.shuffle(colors) np.random.seed(None) # reset seed to default. while cap.isOpened(): ret, frame = cap.read() if ret: frame = cv2.resize(frame, dsize=(1280, 960)) img = prep_image(frame, inp_dim) print(f"IMG_SHAPE: {img.shape}") im_dim = frame.shape[1], frame.shape[0] im_dim = torch.FloatTensor(im_dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() with torch.no_grad(): outputs = model(Variable(img, volatile=True), CUDA) outputs = write_results(outputs, confidence, num_classes, nms_conf=nms_thresh) if outputs != None: im_dim = im_dim.repeat(outputs.size(0), 1) scaling_factor = torch.min(inp_dim / im_dim, 1)[0].view(-1, 1) outputs[:, [1, 3]] -= ( inp_dim - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 outputs[:, [2, 4]] -= ( inp_dim - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 outputs[:, 1:5] /= scaling_factor for i in range(outputs.shape[0]): outputs[i, [1, 3]] = torch.clamp(outputs[i, [1, 3]], 0.0, im_dim[i, 0]) outputs[i, [2, 4]] = torch.clamp(outputs[i, [2, 4]], 0.0, im_dim[i, 1]) for output in outputs: color = colors[int(output[-1])] write(output, frame, color) cv2.imshow("frame", frame) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 print(time.time() - start) print("FPS of the video is {:5.2f}".format(frames / (time.time() - start))) else: break
CUDA = torch.cuda.is_available() num_classes = 80 bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda().half() # model(get_test_input(inp_dim, CUDA), CUDA) # model.eval() videofile = 'sample.mp4' cap = cv2.VideoCapture(videofile) assert cap.isOpened(), 'Cannot capture source' frames = 0 start = time.time() while cap.isOpened():
class ObjectDetection: def __init__(self, id): # self.cap = cv2.VideoCapture(id) self.cap = WebcamVideoStream(src = id).start() self.cfgfile = "cfg/yolov3.cfg" # self.cfgfile = 'cfg/yolov3-tiny.cfg' self.weightsfile = "yolov3.weights" # self.weightsfile = 'yolov3-tiny.weights' self.confidence = float(0.6) self.nms_thesh = float(0.8) self.num_classes = 80 self.classes = load_classes('data/coco.names') self.colors = pkl.load(open("pallete", "rb")) self.model = Darknet(self.cfgfile) self.CUDA = torch.cuda.is_available() self.model.load_weights(self.weightsfile) self.model.net_info["height"] = 160 self.inp_dim = int(self.model.net_info["height"]) self.width = 1280 #640#1280 self.height = 720 #360#720 print("Loading network.....") if self.CUDA: self.model.cuda() print("Network successfully loaded") assert self.inp_dim % 32 == 0 assert self.inp_dim > 32 self.model.eval() def main(self): q = queue.Queue() while True: def frame_render(queue_from_cam): frame = self.cap.read() # If you capture stream using opencv (cv2.VideoCapture()) the use the following line # ret, frame = self.cap.read() frame = cv2.resize(frame,(self.width, self.height)) queue_from_cam.put(frame) cam = threading.Thread(target=frame_render, args=(q,)) cam.start() cam.join() frame = q.get() q.task_done() fps = FPS().start() try: img, orig_im, dim = prep_image(frame, self.inp_dim) im_dim = torch.FloatTensor(dim).repeat(1,2) if self.CUDA: #### If you have a gpu properly installed then it will run on the gpu im_dim = im_dim.cuda() img = img.cuda() # with torch.no_grad(): #### Set the model in the evaluation mode output = self.model(Variable(img), self.CUDA) output = write_results(output, self.confidence, self.num_classes, nms = True, nms_conf = self.nms_thesh) #### Localize the objects in a frame output = output.type(torch.half) if list(output.size()) == [1,86]: print(output.size()) pass else: output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(self.inp_dim))/self.inp_dim # im_dim = im_dim.repeat(output.size(0), 1) output[:,[1,3]] *= frame.shape[1] output[:,[2,4]] *= frame.shape[0] list(map(lambda boxes: write(boxes, frame, self.classes, self.colors),output)) except: pass fps.update() fps.stop() ret, jpeg = cv2.imencode('.jpg', frame) print("[INFO] elasped time: {:.2f}".format(fps.elapsed())) print("[INFO] approx. FPS: {:.1f}".format(fps.fps())) return jpeg.tostring()
def predict(): target = os.path.join(APP_ROOT, 'static/') print(target) if not os.path.isdir(target): os.mkdir(target) else: print("Couldn't create upload directory: {}".format(target)) print(request.files.getlist("file")) for upload in request.files.getlist("file"): print(upload) print("{} is the file name".format(upload.filename)) filename = upload.filename destination = "/".join([target, filename]) print("Accept incoming file:", filename) print("Save it to:", destination) upload.save(destination) scales = "1,2,3" print(filename) images = "static/" + str(filename) batch_size = int(1) confidence = float(0.5) nms_thesh = float(0.4) start = 0 CUDA = torch.cuda.is_available() num_classes = 80 classes = load_classes('data/coco.names') print("Loading network.....") model = Darknet("cfg/yolov3.cfg") model.load_weights("yolov3.weights") print("Network successfully loaded") model.net_info["height"] = "416" inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model.eval() read_dir = time.time() try: imlist = [ osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] == '.jpeg' or os.path.splitext(img)[1] == '.jpg' ] except NotADirectoryError: imlist = [] imlist.append(osp.join(osp.realpath('.'), images)) except FileNotFoundError: print("No file or directory with the name {}".format(images)) exit() load_batch = time.time() batches = list( map(prep_image, imlist, [inp_dim for x in range(len(imlist))])) im_batches = [x[0] for x in batches] orig_ims = [x[1] for x in batches] im_dim_list = [x[2] for x in batches] im_dim_list = torch.FloatTensor(im_dim_list).repeat(1, 2) if CUDA: im_dim_list = im_dim_list.cuda() leftover = 0 if (len(im_dim_list) % batch_size): leftover = 1 if batch_size != 1: num_batches = len(imlist) // batch_size + leftover im_batches = [ torch.cat( (im_batches[i * batch_size:min((i + 1) * batch_size, len(im_batches))])) for i in range(num_batches) ] i = 0 write = False model(get_test_input(inp_dim, CUDA), CUDA) start_det_loop = time.time() objs = {} f = open("result.txt", "w+") for batch in im_batches: start = time.time() if CUDA: batch = batch.cuda() with torch.no_grad(): prediction = model(Variable(batch), CUDA) prediction = write_results(prediction, confidence, num_classes, nms=True, nms_conf=nms_thesh) if type(prediction) == int: i += 1 continue end = time.time() prediction[:, 0] += i * batch_size if not write: output = prediction write = 1 else: output = torch.cat((output, prediction)) for im_num, image in enumerate( imlist[i * batch_size:min((i + 1) * batch_size, len(imlist))]): im_id = i * batch_size + im_num objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id] print("{0:20s} predicted in {1:6.3f} seconds".format( image.split("/")[-1], (end - start) / batch_size)) print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs))) #f.write(listToString(objs)) obj6 = [] for i in objs: if i not in obj6: obj6.append(i) f.write(listToString(obj6)) print(f.read()) print("----------------------------------------------------------") #i += 1 if CUDA: torch.cuda.synchronize() try: output except NameError: print("No detections were made") exit() im_dim_list = torch.index_select(im_dim_list, 0, output[:, 0].long()) scaling_factor = torch.min(inp_dim / im_dim_list, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim - scaling_factor * im_dim_list[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim - scaling_factor * im_dim_list[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp(output[i, [1, 3]], 0.0, im_dim_list[i, 0]) output[i, [2, 4]] = torch.clamp(output[i, [2, 4]], 0.0, im_dim_list[i, 1]) output_recast = time.time() class_load = time.time() colors = pkl.load(open("pallete", "rb")) draw = time.time() def write(x, batches, results): c1 = tuple(x[1:3].int()) c2 = tuple(x[3:5].int()) img = results[int(x[0])] cls = int(x[-1]) label = "{0}".format(classes[cls]) color = random.choice(colors) cv2.rectangle(img, c1, c2, color, 2) t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 cv2.rectangle(img, c1, c2, color, -1) cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225, 255, 255], 1) return img list(map(lambda x: write(x, im_batches, orig_ims), output)) det_names = pd.Series(imlist).apply( lambda x: "{}/{}".format("static", x.split("/")[-1])) list(map(cv2.imwrite, det_names, orig_ims)) end = time.time() torch.cuda.empty_cache() main1() main2() with open("result.txt", "r") as read_file: with open("result2.txt", "w") as write_file: write_file.write(read_file.read().replace(" ", '\n')) with open("hashesfromcaption.txt", "r") as read_file: with open("hsh.txt", "w") as write_file: write_file.write(read_file.read().replace(" ", '_')) with open("hsh.txt", "r") as read_file: with open("h.txt", "w") as write_file: write_file.write(read_file.read().replace("#", '\n#')) text = open('output.txt', 'r+') content = text.read() text.close() objk = open('result.txt', 'r+') contentobjk = objk.read() objk.close() hashcode = open('h.txt', 'r+') h = hashcode.read() hashcode.close() return render_template("results.html", image_name=filename, text=content, objects=contentobjk, last=h)
CUDA = torch.cuda.is_available() bbox_attrs = 5 + num_classes print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 if CUDA: model.cuda() model(get_test_input(inp_dim, CUDA), CUDA) model.eval() videofile = args.video cap = cv2.VideoCapture(videofile) video_frame_cnt = int(cap.get(7)) video_width = int(cap.get(3)) video_height = int(cap.get(4)) video_fps = int(cap.get(5)) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height))
class Detector(object): def __init__(self, model_def_file='', weights_file=''): self.model_def_file = model_def_file self.weights_file = weights_file self.model = Darknet(self.model_def_file) self.model.load_weights(self.weights_file) self.CUDA = True if self.CUDA: self.model.cuda() print('load network finish') self.confidence = 0.5 self.nms_thresh = 0.4 self.num_classes = 80 self.yolo_dir = '/home/yfji/SourceCode/pytorch-yolo-v3' self.classes = util.load_classes(op.join('data/coco.names')) self.colors = pickle.load(open(op.join(self.yolo_dir, 'pallete', 'rb'))) def detect(self, image): prediction = self.model(Variable(image), self.CUDA) output = self.filter_results(prediction) #list of [score, x1,y1,x2,y2] return output def filter_results(self, prediction, nms=True): conf_mask = (prediction[:, :, 4] > self.confidence).float().unsqueeze(2) prediction = prediction * conf_mask try: torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): image_pred = prediction[ind] max_conf, max_conf_score = torch.max( image_pred[:, 5:5 + self.num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) non_zero_ind = (torch.nonzero(image_pred[:, 4])) try: image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) except: continue img_classes = util.unique(image_pred_[:, -1]) for cls in img_classes: #get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) if nms: for i in range(idx): try: ious = util.bbox_iou( image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break iou_mask = (ious < self.nms_thresh).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def start(self): # Inicializacion de variables globales global classes, BBox, colors, phase, frame, initBBox, true_class_filter # PREPARACION DE LA FASE DE DETECCION CUDA = torch.cuda.is_available() text = 'No class filter selected' classes = load_classes('model/{}/model.names'.format( self.model_folder)) colors = pkl.load(open('pallete', 'rb')) num_classes = len(classes) if [i for i in self.class_filter if not (i in classes)]: if self.label_info: text = 'WARNING: {} class/classes are not included in the selected model. Updating the searching list...'.format( [i for i in self.class_filter if not (i in classes)]) self.label_info.setText(text) else: print( 'WARNING: {} class/classes are not included in the selected model. Updating the searching list...' .format( [i for i in self.class_filter if not (i in classes)])) true_class_filter = [i for i in self.class_filter if (i in classes)] # Configuracion de la red if self.label_info: text += '\nLoading network...' self.label_info.setText(text) else: print('Loading network.....') model = Darknet('model/{}/model.cfg'.format(self.model_folder)) model.load_weights('model/{}/model.weights'.format(self.model_folder)) if self.label_info: text += '\nNetwork succesfully loaded' self.label_info.setText(text) else: print('Network successfully loaded') model.net_info['height'] = self.reso_det inp_dim_det = int(model.net_info['height']) assert inp_dim_det % 32 == 0 assert inp_dim_det > 32 # Si hay un dispositivo CUDA se carga en el el modelo if CUDA: model.cuda() # Modelo en modo de evaluacion model.eval() # PREPARACION DE LA FASE DE TRACKING inp_dim_track = int(self.reso_track) OPENCV_OBJECT_TRACKERS = { 'csrt': cv2.TrackerCSRT_create, 'kcf': cv2.TrackerKCF_create, 'boosting': cv2.TrackerBoosting_create, 'mil': cv2.TrackerMIL_create, 'tld': cv2.TrackerTLD_create, 'medianflow': cv2.TrackerMedianFlow_create, 'mosse': cv2.TrackerMOSSE_create } # INICIALIZACION DE LA FUENTE if self.source == '0' or self.source == '1': self.cap = cv2.VideoCapture(int(self.source)) mode = 'cam' self.window_name = 'Camera ' + self.source else: if self.label_info: # via GUI se obtiene el path completo self.cap = cv2.VideoCapture(self.source) else: # via terminal solo escribimos el nombre del archivo self.cap = cv2.VideoCapture('videos/{}'.format(self.source)) mode = 'file' self.window_name = self.source assert self.cap.isOpened(), 'Cannot capture source' phase = 'det' initBBox = [] cont = 0 frames = 0 cv2.namedWindow(self.window_name) cv2.setMouseCallback(self.window_name, click_det2track) while self.cap.isOpened(): grab, frame = self.cap.read() start = time.time() if grab: # Fase de deteccion if phase == 'det': if mode == 'cam': img = prep_image_c(frame, inp_dim_det) elif mode == 'file': img = prep_image_f(frame, inp_dim_det) im_dim = frame.shape[1], frame.shape[0] im_dim = torch.FloatTensor(im_dim).repeat(1, 2) if CUDA: im_dim = im_dim.cuda() img = img.cuda() # Inicializacion la lista de BBox detectadas BBox = [] output = model.forward(Variable(img), CUDA) output = write_results(output, self.confidence, num_classes, nms_conf=self.nms_thresh) if type(output) == int: frames += 1 cv2.imshow(self.window_name, frame) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break continue if mode == 'cam': output[:, 1:5] = torch.clamp(output[:, 1:5], 0.0, float(inp_dim_det)) im_dim = im_dim.repeat(output.size(0), 1) / inp_dim_det output[:, 1:5] *= im_dim elif mode == 'file': im_dim = im_dim.repeat(output.size(0), 1) scaling_factor = torch.min(inp_dim_det / im_dim, 1)[0].view(-1, 1) output[:, [1, 3]] -= (inp_dim_det - scaling_factor * im_dim[:, 0].view(-1, 1)) / 2 output[:, [2, 4]] -= (inp_dim_det - scaling_factor * im_dim[:, 1].view(-1, 1)) / 2 output[:, 1:5] /= scaling_factor for i in range(output.shape[0]): output[i, [1, 3]] = torch.clamp( output[i, [1, 3]], 0.0, im_dim[i, 0]) output[i, [2, 4]] = torch.clamp( output[i, [2, 4]], 0.0, im_dim[i, 1]) list(map(lambda x: write(x, frame), output)) cv2.imshow(self.window_name, frame) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 if self.label_info: self.label_info.setText( text + '\nDETECTION PHASE:' + '\n {0: .2f} fps'.format( float(1 / (time.time() - start)))) # Fase de tracking elif phase == 'track': ratio = frame.shape[0] / inp_dim_track img = imutils.resize(frame, height=inp_dim_track) if initBBox: (success, box) = tracker.update(img) if success: cont = 0 (x, y, w, h) = [int(v) for v in box] x, y, w, h = prep_rect(x, y, w, h, ratio) cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2) else: cont += 1 if self.label_info: self.label_info.setText( text + '\nTRACKING PHASE' + '\nObject lost ({})'.format(cont)) else: print('Object lost ', cont) else: (x, y, w, h) = [int(v) for v in track_rect] initBBox = (prep_rect(x, y, w, h, float(1 / ratio))) tracker = OPENCV_OBJECT_TRACKERS[self.tracker_alg]() tracker.init(img, initBBox) if cont > 100: phase = 'det' cont = 0 initBBox = [] cv2.imshow(self.window_name, frame) key = cv2.waitKey(1) if key & 0xFF == ord('q'): break frames += 1 if self.label_info: self.label_info.setText( text + '\nTRACKING PHASE:' + '\n {0: .2f} fps'.format( float(1 / (time.time() - start)))) else: break else: break if not self.label_info: cv2.destroyWindow(self.window_name) self.cap.release() torch.cuda.empty_cache()
class ssp_rosbag: def __init__(self): rospy.init_node('eval_baseline', anonymous=True) ############################################################################## ############################################################################## ############################################################################## self.b_first_rb_loop = True self.first_time = None self.ns = rospy.get_param('~ns') # robot namespace modelcfg = rospy.get_param('~modelcfg') weightfile = rospy.get_param('~weightfile') datacfg = rospy.get_param('~datacfg') rb_name = rospy.get_param('~rb_name') self.ado_names = [rospy.get_param('~tracked_name')] # Parse configuration files data_options = read_data_cfg(datacfg) valid_images = data_options['valid'] if 'mesh' in data_options: meshname = data_options['mesh'] else: meshname = None assert ('box_length' in data_options) box_length = float(data_options['box_length']) box_width = float(data_options['box_width']) box_height = float(data_options['box_height']) self.ego_name = data_options['name'] gpus = data_options['gpus'] self.im_width = int(data_options['width']) self.im_height = int(data_options['height']) # Parameters seed = int(time.time()) os.environ['CUDA_VISIBLE_DEVICES'] = gpus torch.cuda.manual_seed(seed) self.num_classes = 1 # Read object model information, get 3D bounding box corners if meshname is None: # vertices must be 4 x N for compute_projections to work later vertices = np.array( [[box_length / 2, box_width / 2, box_height / 2, 1.], [box_length / 2, box_width / 2, -box_height / 2, 1.], [box_length / 2, -box_width / 2, -box_height / 2, 1.], [box_length / 2, -box_width / 2, box_height / 2, 1.], [-box_length / 2, -box_width / 2, box_height / 2, 1.], [-box_length / 2, -box_width / 2, -box_height / 2, 1.], [-box_length / 2, box_width / 2, -box_height / 2, 1.], [-box_length / 2, box_width / 2, box_height / 2, 1.]]).T self.diam = float(data_options['diam']) else: mesh = MeshPly(meshname) vertices = np.c_[np.array(mesh.vertices), np.ones((len(mesh.vertices), 1))].transpose() try: self.diam = float(data_options['diam']) except: self.diam = calc_pts_diameter(np.array(mesh.vertices)) self.vertices = vertices self.corners3D = get_3D_corners(vertices) # Specify model, load pretrained weights, pass to GPU and set the module in evaluation mode torch.set_grad_enabled(False) # since we are just doing forward passes self.model = Darknet(modelcfg) self.model.load_weights(weightfile) self.model.cuda() self.model.eval() self.shape = (self.model.test_width, self.model.test_height) num_keypoints = self.model.num_keypoints num_labels = num_keypoints * 2 + 3 # +2 for width, height, +1 for class label ############################################################################## ############################################################################## ############################################################################## self.result_list = [] # save the results as they are processed self.itr = 0 self.time_prev = -1 self.bridge = CvBridge() self.pose_buffer_len = 20 self.ado_pose_msg_buf = [] self.ego_pose_msg_buf = [] self.ego_pose_est_msg_buf = [] self.ego_pose_est_time_msg_buf = [] self.ado_pose_time_msg_buf = [] self.ego_pose_time_msg_buf = [] # Create camera (camera extrinsics from quad7.param in the msl_raptor project): self.tf_cam_ego = np.eye(4) self.tf_cam_ego[0:3, 3] = np.asarray([0.01504337, -0.06380886, -0.13854437]) self.tf_cam_ego[0:3, 0:3] = np.reshape([ -6.82621737e-04, -9.99890488e-01, -1.47832690e-02, 3.50423970e-02, 1.47502748e-02, -9.99276969e-01, 9.99385593e-01, -1.20016936e-03, 3.50284906e-02 ], (3, 3)) # Correct Rotation w/ Manual Calibration Angle_x = 8. / 180. Angle_y = 8. / 180. Angle_z = 0. / 180. R_deltax = np.array([[1., 0., 0.], [0., np.cos(Angle_x), -np.sin(Angle_x)], [0., np.sin(Angle_x), np.cos(Angle_x)]]) R_deltay = np.array([[np.cos(Angle_y), 0., np.sin(Angle_y)], [0., 1., 0], [-np.sin(Angle_y), 0., np.cos(Angle_y)]]) R_deltaz = np.array([[np.cos(Angle_z), -np.sin(Angle_z), 0.], [np.sin(Angle_z), np.cos(Angle_z), 0.], [0., 0., 1.]]) R_delta = np.dot(R_deltax, np.dot(R_deltay, R_deltaz)) self.tf_cam_ego[0:3, 0:3] = np.matmul(R_delta, self.tf_cam_ego[0:3, 0:3]) ######################################################################################### camera_info = rospy.wait_for_message(self.ns + '/camera/camera_info', CameraInfo, 30) self.K = np.reshape(camera_info.K, (3, 3)) self.dist_coefs = np.reshape(camera_info.D, (5, )) self.new_camera_matrix, _ = cv2.getOptimalNewCameraMatrix( self.K, self.dist_coefs, (camera_info.width, camera_info.height), 0, (camera_info.width, camera_info.height)) self.log_out_dir = '/mounted_folder/ssp_logs' # ssp_log_name = self.log_out_dir + "/log_" + rb_name.split("_")[-1] + "_SSP.log" # param_log_name = self.log_out_dir + "/log_" + rb_name.split("_")[-1] + "_PARAM.log" # self.logger = raptor_logger(source="SSP", mode="write", ssp_fn=ssp_log_name, param_fn=param_log_name) base_path = self.log_out_dir + "/log_" + rb_name.split("_")[-1] self.rb_name = rb_name self.bb_3d_dict_all = { self.ado_names[0]: [box_length, box_width, box_height, self.diam] } self.logger = RaptorLogger(mode="write", names=self.ado_names, base_path=base_path, b_ssp=True) # Write params to log file ######################################################################################################## param_data = {} if self.new_camera_matrix is not None: param_data['K'] = np.array([ self.new_camera_matrix[0, 0], self.new_camera_matrix[1, 1], self.new_camera_matrix[0, 2], self.new_camera_matrix[1, 2] ]) else: param_data['K'] = np.array( [self.K[0, 0], self.K[1, 1], self.K[0, 2], self.K[1, 2]]) param_data['3d_bb_dims'] = np.array( [box_length, box_width, box_height, self.diam]) param_data['tf_cam_ego'] = np.reshape(copy(self.tf_cam_ego), (16, )) # self.logger.write_data_to_log(log_data, mode='prms') self.logger.write_params(param_data) ################################################################################################################################### self.t0 = None self.time_arr = [] # self.raptor_metrics = pose_metric_tracker(px_thresh=5, prct_thresh=10, trans_thresh=0.05, ang_thresh=5, name=self.name, diam=self.diam) self.raptor_metrics = PoseMetricTracker(px_thresh=5, prct_thresh=10, trans_thresh=0.05, ang_thresh=5, names=self.ado_names, bb_3d_dict=self.bb_3d_dict_all) rospy.Subscriber(self.ns + '/mavros/vision_pose/pose', PoseStamped, self.ego_pose_gt_cb, queue_size=10) # optitrack pose rospy.Subscriber(self.ns + '/mavros/local_position/pose', PoseStamped, self.ego_pose_est_cb, queue_size=10) # onboard ekf pose est rospy.Subscriber('/quad4' + '/mavros/vision_pose/pose', PoseStamped, self.ado_pose_gt_cb, queue_size=10) # optitrack pose rospy.Subscriber(self.ns + '/camera/image_raw', ROS_IMAGE, self.image_cb, queue_size=1, buff_size=2**21) def ado_pose_gt_cb(self, msg): # if self.first_time is not None and self.first_time >= msg.header.stamp.to_sec(): # return self.ado_pose_gt_rosmsg = msg.pose pose_tm = msg.header.stamp.to_sec() self.ado_pose_msg_buf.append(msg) self.ado_pose_time_msg_buf.append(pose_tm) def ego_pose_gt_cb(self, msg): # if self.first_time is not None and self.first_time >= msg.header.stamp.to_sec(): # return self.ego_pose_gt_rosmsg = msg.pose pose_tm = msg.header.stamp.to_sec() self.ego_pose_msg_buf.append(msg) self.ego_pose_time_msg_buf.append(pose_tm) def ego_pose_est_cb(self, msg): # if self.first_time is not None and self.first_time >= msg.header.stamp.to_sec(): # return self.ego_pose_est_rosmsg = msg.pose pose_tm = msg.header.stamp.to_sec() self.ego_pose_est_msg_buf.append(msg) self.ego_pose_est_time_msg_buf.append(pose_tm) def image_cb(self, msg): """ Maintains a buffer of images & times. The first element is the earliest. Stored in a way to interface with a quick method for finding closest match by time. """ tic = time.time() img_tm = msg.header.stamp.to_sec() if len(program.result_list) > 0 and img_tm <= self.result_list[-1][5]: return if self.t0 is None: self.t0 = img_tm img_cv2 = self.bridge.imgmsg_to_cv2(msg, desired_encoding="passthrough") img_cv2 = cv2.undistort(img_cv2, self.K, self.dist_coefs, None, self.new_camera_matrix) img_pil = PIL.Image.fromarray(img_cv2).resize(self.shape) img = Variable(transforms.ToTensor()(img_pil).resize( 1, 3, img_pil.size[0], img_pil.size[1]).cuda(), volatile=True) with torch.no_grad(): output = self.model(img).data # Forward pass # Using confidence threshold, eliminate low-confidence predictions box_pr = get_region_boxes(output, self.num_classes, self.model.num_keypoints) # Denormalize the corner predictions corners2D_pr = np.array(np.reshape(box_pr[:18], [-1, 2]), dtype='float32') corners2D_pr[:, 0] = corners2D_pr[:, 0] * self.im_width corners2D_pr[:, 1] = corners2D_pr[:, 1] * self.im_height # Compute [R|t] by pnp R_pr, t_pr = pnp( np.array(np.transpose( np.concatenate((np.zeros((3, 1)), self.corners3D[:3, :]), axis=1)), dtype='float32'), corners2D_pr, np.array(self.K, dtype='float32')) tf_cam_ado_est = rotm_and_t_to_tf(R_pr, t_pr) if len(self.ado_pose_time_msg_buf) == 0 or len( self.ego_pose_time_msg_buf) == 0 or len( self.ego_pose_est_time_msg_buf) == 0: print("still waiting for other rosbag messages") return ado_msg, _ = find_closest_by_time(img_tm, self.ado_pose_time_msg_buf, message_list=self.ado_pose_msg_buf) ego_gt_msg, _ = find_closest_by_time( img_tm, self.ego_pose_time_msg_buf, message_list=self.ego_pose_msg_buf) ego_est_msg, _ = find_closest_by_time( img_tm, self.ego_pose_est_time_msg_buf, message_list=self.ego_pose_est_msg_buf) tf_w_ado_gt = pose_to_tf(ado_msg.pose) tf_w_ego_gt = pose_to_tf(ego_gt_msg.pose) tf_w_ego_est = pose_to_tf(ego_est_msg.pose) tf_w_cam_gt = tf_w_ego_gt @ invert_tf(self.tf_cam_ego) tf_w_ado_est = tf_w_cam_gt @ tf_cam_ado_est quat_pr = rotm_to_quat(tf_w_ado_est[0:3, 0:3]) state_pr = np.concatenate((tf_w_ado_est[0:3, 3], quat_pr)) # shape = (7,) b_remove_yaw = True if b_remove_yaw: quat_pr_with_yaw = quat_pr # quat with yaw quat_gt = rotm_to_quat(tf_w_ado_gt[0:3, 0:3]) # quat with yaw quat_pr = remove_yaw(quat_pr) # remove yaw quat_gt = remove_yaw(quat_gt) # remove yaw tf_w_ado_est[0:3, 0:3] = quat_to_rotm(quat_pr) # update tf tf_w_ado_gt[0:3, 0:3] = quat_to_rotm(quat_gt) # update tf img_to_save = copy(np.array(img.cpu())) self.result_list.append( (state_pr, copy(tf_w_ado_est), copy(tf_w_ado_gt), copy(corners2D_pr), img_to_save, img_tm, time.time(), copy(R_pr), copy(t_pr), invert_tf(tf_w_cam_gt), copy(tf_w_ego_gt), copy(tf_w_ego_est))) del img self.itr += 1 self.time_arr.append(time.time() - tic) if self.itr > 0 and self.itr % 50 == 0: print("Finished processing image #{}, mean time: {}".format( self.itr, np.mean(self.time_arr))) torch.cuda.empty_cache() def post_process_data(self): print("Post-processing data now ({} itrs)".format(len( self.result_list))) b_save_bb_imgs = True name = self.ado_names[0] bb_im_path = os.path.dirname(os.path.relpath( __file__)) + '/output_imgs' # PATH MUST BE RELATIVE create_dir_if_missing(bb_im_path) N = len(self.result_list) # To save trans_dist = 0.0 angle_dist = 0.0 pixel_dist = 0.0 testing_samples = 0.0 testing_error_trans = 0.0 testing_error_angle = 0.0 testing_error_pixel = 0.0 errs_2d = [] errs_3d = [] errs_trans = [] errs_angle = [] errs_corner2D = [] preds_trans = [] preds_rot = [] preds_corners2D = [] gts_trans = [] gts_rot = [] gts_corners2D = [] corners2D_gt = None log_data = {} for i, res in enumerate(self.result_list): # extract / compute values for comparison state_pr, tf_w_ado_est, tf_w_ado_gt, corners2D_pr, img, img_tm, sys_time, R_cam_ado_pr, t_cam_ado_pr, tf_cam_w_gt, tf_w_ego_gt, tf_w_ego_est = res tf_cam_ado_gt = tf_cam_w_gt @ tf_w_ado_gt R_cam_ado_gt = tf_cam_ado_gt[0:3, 0:3] t_cam_ado_gt = tf_cam_ado_gt[0:3, 3].reshape(t_cam_ado_pr.shape) if img_tm - self.t0 > 34 and self.rb_name == "rosbag_for_post_process_2019-12-18-02-10-28": print("STOPPING EARLY") break # quad crashes Rt_cam_ado_gt = np.concatenate((R_cam_ado_gt, t_cam_ado_gt), axis=1) Rt_cam_ado_pr = np.concatenate((R_cam_ado_pr, t_cam_ado_pr), axis=1) corners2D_gt = compute_projection( np.hstack((np.reshape([0, 0, 0, 1], (4, 1)), self.vertices)), Rt_cam_ado_gt, self.new_camera_matrix).T if b_save_bb_imgs: draw_2d_proj_of_3D_bounding_box(img, corners2D_pr, corners2D_gt=corners2D_gt, epoch=None, batch_idx=None, detect_num=i, im_save_dir=bb_im_path) if self.raptor_metrics is not None: # self.raptor_metrics.update_all_metrics(vertices=self.vertices, R_gt=R_gt, t_gt=t_gt, R_pr=R_pr, t_pr=t_pr, K=self.new_camera_matrix) self.raptor_metrics.update_all_metrics( name=name, vertices=self.vertices, tf_w_cam=invert_tf(tf_cam_w_gt), R_cam_ado_gt=R_cam_ado_gt, t_cam_ado_gt=t_cam_ado_gt, R_cam_ado_pr=R_cam_ado_pr, t_cam_ado_pr=t_cam_ado_pr, K=self.new_camera_matrix) # Write data to log file ############################# log_data['time'] = img_tm - self.t0 log_data['state_est'] = tf_to_state_vec(tf_w_ado_est) log_data['state_gt'] = tf_to_state_vec(tf_w_ado_gt) log_data['ego_state_est'] = tf_to_state_vec(tf_w_ego_est) log_data['ego_state_gt'] = tf_to_state_vec(tf_w_ego_gt) corners3D_pr = (tf_w_ado_est @ self.vertices)[0:3, :] corners3D_gt = (tf_w_ado_gt @ self.vertices)[0:3, :] log_data['corners_3d_est'] = np.reshape(corners3D_pr, (corners3D_pr.size, )) log_data['corners_3d_gt'] = np.reshape(corners3D_gt, (corners3D_gt.size, )) log_data['proj_corners_est'] = np.reshape( self.raptor_metrics.proj_2d_pr[name].T, (self.raptor_metrics.proj_2d_pr[name].size, )) log_data['proj_corners_gt'] = np.reshape( self.raptor_metrics.proj_2d_gt[name].T, (self.raptor_metrics.proj_2d_gt[name].size, )) log_data['x_err'] = tf_w_ado_est[0, 3] - tf_w_ado_gt[0, 3] log_data['y_err'] = tf_w_ado_est[1, 3] - tf_w_ado_gt[1, 3] log_data['z_err'] = tf_w_ado_est[2, 3] - tf_w_ado_gt[2, 3] log_data['ang_err'] = calcAngularDistance(tf_w_ado_est[0:3, 0:3], tf_w_ado_gt[0:3, 0:3]) log_data['pix_err'] = np.mean( la.norm(self.raptor_metrics.proj_2d_pr[name] - self.raptor_metrics.proj_2d_gt[name], axis=0)) log_data['add_err'] = np.mean( la.norm(corners3D_pr - corners3D_gt, axis=0)) log_data['measurement_dist'] = la.norm(tf_w_ego_gt[0:3, 3] - tf_w_ado_gt[0:3, 3]) self.logger.write_data_to_log(log_data, name, mode='ssp') self.logger.write_data_to_log(log_data, name, mode='ssperr') if np.any(np.isnan(corners3D_pr)) or np.any( np.isnan(corners3D_gt)) or np.any( np.isnan(self.raptor_metrics.proj_2d_pr[name]) ): #or la.norm(tf_cam_ado_gt[0:3, 3] - t_cam_ado_pr) > 10: print("ISSUE DETECTED!!") pdb.set_trace() ###################################################### if self.raptor_metrics is not None: self.raptor_metrics.calc_final_metrics() self.raptor_metrics.print_final_metrics() self.logger.close_files() print("done with post process!") def truths_length(self, truths, max_num_gt=50): for i in range(max_num_gt): if truths[i][1] == 0: return i def run(self): rate = rospy.Rate(100) b_flag = True while not rospy.is_shutdown(): try: rate.sleep() except: # this will happen if the clock goes backwards (i.e. rosbag loops) self.post_process_data() return
test_loader = torch.utils.data.DataLoader(InriaDataset(img_dir, lab_dir, shuffle=True), batch_size=3, shuffle=True) cfgfile = "cfg/yolov2.cfg" weightfile = "weights/yolov2.weights" printfile = "non_printability/30values.txt" patch_size = 400 darknet_model = Darknet(cfgfile) darknet_model.load_weights(weightfile) darknet_model = darknet_model.cuda() patch_applier = PatchApplier().cuda() patch_transformer = PatchTransformer().cuda() prob_extractor = MaxProbExtractor(0, 80).cuda() nms_calculator = NMSCalculator(printfile, patch_size) total_variation = TotalVariation() ''' img = Image.open('data/horse.jpg').convert('RGB') img = img.resize((darknet_model.width, darknet_model.height)) width = img.width height = img.height img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) img = img.view(height, width, 3).transpose(0, 1).transpose(0, 2).contiguous() img = img.view(1, 3, height, width) img = img.float().div(255.0) img = torch.autograd.Variable(img)
#Set up the neural network print("Loading network.....") model = Darknet(args.cfgfile) model.load_weights(args.weightsfile) print("Network successfully loaded") model.net_info["height"] = args.reso inp_dim = int(model.net_info["height"]) assert inp_dim % 32 == 0 assert inp_dim > 32 #If there's a GPU availible, put the model on GPU if CUDA: model.cuda() #Set the model in evaluation mode model.eval() read_dir = time.time() #Detection phase try: imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images)] except NotADirectoryError: imlist = [] imlist.append(osp.join(osp.realpath('.'), images)) except FileNotFoundError: print ("No file or directory with the name {}".format(images)) exit()