def build_feature_db(net,images_info,imdbs,out_obj): paths1 = [os.path.basename(imdbs[0].image_path_at(i)) for i in range(imdbs[0].num_images)] paths2 = [os.path.basename(imdbs[1].image_path_at(i)) for i in range(imdbs[1].num_images)] paths = [paths1,paths2] #im_db = [] _t = Timer() for i in range(len(images_info['image_name'])): print 'caching features for image {:d}/{:d}'.format(i+1,len(images_info['image_name'])) _t.tic() if images_info['image_name'][i] in paths[0]: im = cv2.imread(imdbs[0].image_path_at(paths[0].index(images_info['image_name'][i]))) elif images_info['image_name'][i] in paths[1]: im = cv2.imread(imdbs[1].image_path_at(paths[1].index(images_info['image_name'][i]))) print 'Done running NN' #gt features if 'gt' in images_info.keys(): scores, boxes = im_detect(net,im,images_info['gt'][i]) feat_pos = net.blobs['fc7'].data #roi features scores, boxes = im_detect(net,im,images_info['roi'][i]) feat_neg = net.blobs['fc7'].data print 'Done extracting features from fc7' #generate image db if 'gt' in images_info.keys(): im_reg = {'name' : images_info['image_name'][i], 'roi_boxes' : images_info['roi'][i], 'roi_features' : feat_neg,'gt_boxes' : images_info['gt'][i], 'gt_features' : feat_pos} else: im_reg = {'name' : images_info['image_name'][i], 'roi_boxes' : images_info['roi'][i], 'roi_features' : feat_neg} pickle.dump(im_reg,out_obj) _t.toc() #im_db.append(im_reg) print 'Done in {}'.format(_t.average_time)
def demo (net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def get_pos_examples(self): counts = self._get_pos_counts() for i in xrange(len(counts)): self.trainers[i].alloc_pos(counts[i]) _t = Timer() roidb = self.imdb.roidb num_images = len(roidb) # num_images = 100 for i in xrange(num_images): im = cv2.imread(self.imdb.image_path_at(i)) if roidb[i]['flipped']: im = im[:, ::-1, :] gt_inds = np.where(roidb[i]['gt_classes'] > 0)[0] gt_boxes = roidb[i]['boxes'][gt_inds] _t.tic() scores, boxes = im_detect(self.net, im, gt_boxes) _t.toc() feat = self.net.blobs[self.layer].data for j in xrange(1, self.imdb.num_classes): cls_inds = np.where(roidb[i]['gt_classes'][gt_inds] == j)[0] if len(cls_inds) > 0: cls_feat = feat[cls_inds, :] self.trainers[j].append_pos(cls_feat) print('get_pos_examples: {:d}/{:d} {:.3f}s' \ .format(i + 1, len(roidb), _t.average_time))
def demo(sess, net, im_file, result_dir, viz=False, oriented=False): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = helper.read_rgb_img(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes, resized_im_shape, im_scale = im_detect(sess, net, im) timer.toc() im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) img_name = im_file.split('/')[-1] draw_rpn_boxes(im, img_name, boxes, scores[:, np.newaxis], im_scale, True, result_dir) draw_rpn_boxes(im, img_name, boxes, scores[:, np.newaxis], im_scale, False, result_dir) # Run TextDetector to merge small box line_detector = TextDetector(oriented) # line_detector 的输入必须是在 scale 之后的图片上!!, # 如果还原了以后再进行行构建,原图可能太大,导致每个 anchor 的 width 很大,导致 MAX_HORIZONTAL_GAP 太小 # text_lines point order: left-top, right-top, left-bottom, right-bottom text_lines = line_detector.detect(boxes, scores[:, np.newaxis], resized_im_shape) print("Image %s, detect %d text lines in %.3fs" % (im_file, len(text_lines), timer.diff)) if len(text_lines) != 0: text_lines = recover_scale(text_lines, im_scale) save_result(im, img_name, text_lines, result_dir) # Visualize detections if viz: vis_detections(im, CLASSES[1], text_lines)
def load_model_h5(weight_file): darknet = DarkNet() model = SimpleNet(darknet) model.load_weights(weight_file) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) timer = Timer() timer.tic() model.compile(optimizer=sgd, loss='categorical_crossentropy') timer.toc() print 'Total compile time is {:.3f}s'.format(timer.total_time) for i in xrange(len(model.layers)): print model.layers[i] print model.layers[i].input_shape, model.layers[i].output_shape weights = model.layers[i].get_weights() if not weights is None and len(weights) > 0: print weights[0].shape, weights[0].max(), weights[0].min() # if len(weights) > 1: # # print weights[0].shape, weights[0].max(), weights[0].min() # # print "layer: %d" % (i) # # w = weights[0].transpose() # # w = weights[1] # # print w.shape # # cnt = 0 # # for val in w.flatten(): # # # print >> f, val # # print 'weights[1]', cnt, ':', val # # cnt += 1 # # raw_input() # # print model.layers[4].get_weights()[0].shape, model.layers[4].get_weights()[1].shape # # weights = model.layers[4].get_weights()[0] # weights = weights[0] # vis_square(weights.reshape((weights.shape[0]*weights.shape[1], weights.shape[2], weights.shape[3]))) return model
def demo(net, image_name, classes): """Detect object classes in an image using pre-computed object proposals.""" # Load pre-computed Selected Search object proposals box_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '_boxes.mat') obj_proposals = sio.loadmat(box_file)['boxes'] # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name + '.jpg') im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, CONF_THRESH) vis_detections(im, cls, dets, thresh=CONF_THRESH)
def train_with_hard_negatives(self): _t = Timer() roidb = self.imdb.roidb num_images = len(roidb) # num_images = 100 for i in xrange(num_images): im = cv2.imread(self.imdb.image_path_at(i)) if roidb[i]['flipped']: im = im[:, ::-1, :] _t.tic() scores, boxes = im_detect(self.net, im, roidb[i]['boxes']) _t.toc() feat = self.net.blobs[self.layer].data for j in xrange(1, self.imdb.num_classes): hard_inds = \ np.where((scores[:, j] > self.hard_thresh) & (roidb[i]['gt_overlaps'][:, j].toarray().ravel() < self.neg_iou_thresh))[0] if len(hard_inds) > 0: hard_feat = feat[hard_inds, :].copy() new_w_b = \ self.trainers[j].append_neg_and_retrain(feat=hard_feat) if new_w_b is not None: self.update_net(j, new_w_b[0], new_w_b[1]) print(('train_with_hard_negatives: ' '{:d}/{:d} {:.3f}s').format(i + 1, len(roidb), _t.average_time))
def detection_to_file(target_path, v_num, file_list, detect,total_frames, current_frames, max_proposal=100, thresh=0): timer = Timer() w = open("{}/{}.txt".format(target_path, v_num), "w") for file_index, file_path in enumerate(file_list): file_name = file_path.split("/")[-1] set_num, v_num, frame_num = file_name[:-4].split("_") timer.tic() dets = detect(file_path) timer.toc() print('Detection Time:{:.3f}s {}/{} images'.format(timer.average_time, current_frames+file_index+1 , total_frames)) inds = np.where(dets[:, -1] >= thresh)[0] for i in inds: bbox = dets[i, :4] score = dets[i, -1] x = bbox[0] y = bbox[1] width = bbox[2] - x length = bbox[3] - y w.write("{},{},{},{},{},{}\n".format(frame_num, x, y, width, length, score*100)) w.close() print("Evalutaion file {} has been writen".format(w.name)) return file_index + 1
def Detect(net, image_path): """Detect object classes in an image assuming the whole image is an object.""" # Load the image im = cv2.imread(image_path) h, w, c = im.shape # TODO: Run selective search first # # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, np.array([[0, 0, w, h]])) timer.toc() scores = scores[0] # get top 6 prediction pred_classes = [CLASSES[idx] for idx in ((-scores).argsort()[:6]).tolist()] conf = [ (-1) * prob for prob in np.sort(-scores)[:6].tolist()] img_blob = {} img_blob['image_path'] = image_path img_blob['pred'] = {'text': pred_classes, 'conf': conf} img_blob['rcnn_time'] = timer.total_time return img_blob
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print('Detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 output_path = os.path.join(cfg.DATA_DIR,'test_output') for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] name = image_name.split('.')[0] + '.txt' with open(os.path.join(output_path,name),'a') as f: for item in dets: f.write(str(item[0]) + '\t' + str(item[1]) + '\t' + str(item[2])+ '\t' + str(item[3]) + '\t' +str(item[4]) + '\n')
def demo(net, im_file): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image as gray scale gim = cv2.imread(im_file, flags= cv2.CV_LOAD_IMAGE_GRAYSCALE) # convert to rgb repeated in each channel im = cv2.cvtColor(gim, cv2.COLOR_GRAY2BGR) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH)
def test_net_on_dataset( args, dataset_name, proposal_file, output_dir, multi_gpu=False, gpu_id=0): """Run inference on a dataset.""" dataset = JsonDataset(dataset_name) test_timer = Timer() test_timer.tic() if multi_gpu: num_images = len(dataset.get_roidb()) all_boxes, all_segms, all_keyps = multi_gpu_test_net_on_dataset( args, dataset_name, proposal_file, num_images, output_dir ) else: all_boxes, all_segms, all_keyps = test_net( args, dataset_name, proposal_file, output_dir, gpu_id=gpu_id ) test_timer.toc() logger.info('Total inference time: {:.3f}s'.format(test_timer.average_time)) results = task_evaluation.evaluate_all( dataset, all_boxes, all_segms, all_keyps, output_dir ) return results
def _get_feature_scale(self, num_images=100): TARGET_NORM = 20.0 # Magic value from traditional R-CNN _t = Timer() roidb = self.imdb.roidb total_norm = 0.0 count = 0.0 inds = npr.choice( range(self.imdb.num_images), size=num_images, replace=False ) for i_, i in enumerate(inds): im = cv2.imread(self.imdb.image_path_at(i)) if roidb[i]['flipped']: im = im[:, ::-1, :] _t.tic() scores, boxes = im_detect(self.net, im, roidb[i]['boxes']) _t.toc() feat = self.net.blobs[self.layer].data total_norm += np.sqrt((feat ** 2).sum(axis=1)).sum() count += feat.shape[0] print('{}/{}: avg feature norm: {:.3f}'.format( i_ + 1, num_images, total_norm / count ) ) return TARGET_NORM * 1.0 / (total_norm / count)
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.7 NMS_THRESH = 0.3 json_data_list = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] bbox, score = vis_detections(im, cls, dets, thresh=CONF_THRESH) if score: json_data_list.append({"class":cls, 'bbox':bbox, 'score':score}) if len(json_data_list): f = open("result/"+image_name+".json", "w") json.dump(json_data_list, f, indent=2)
def demo(net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.ROOT_DIR, 'data', 'demo', image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background\ cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) order = cls_scores.argsort()[::-1] sorted_dets = dets[order, :] keep = nms(dets, NMS_THRESH) with open('/home/xyy/Desktop/doing/Object Detection/py-faster-rcnn/test_python.txt','w') as f: dets = dets[keep, :] for i in dets: for j in i: f.write(str(j)+ ' ') f.write('\n') vis_detections(im, cls, dets, thresh=CONF_THRESH)
def demo(net, im, return_boxes): """Detect object classes in an image using pre-computed object proposals.""" # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 classes = {} for cls_ind, cls in enumerate(CLASSES[1:]): try: cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] bboxes = vis_detections(im, cls, dets, return_boxes, thresh=CONF_THRESH) classes[cls] = bboxes except Exception as e: continue if not return_boxes: cv2.imshow("image", im) return classes
def imdb_proposals(net, imdb): """Generate RPN proposals on all images in an imdb.""" _t = Timer() imdb_boxes = [[] for _ in xrange(imdb.num_images)] for i in xrange(imdb.num_images): im = None if cfg.TRAIN.FORMAT == 'pickle': with open(imdb.image_path_at(i), 'rb') as f: im = cPickle.load(f) else: im = cv2.imread(imdb.image_path_at(i)) _t.tic() imdb_boxes[i], scores = im_proposals(net, im) _t.toc() print 'im_proposals: {:d}/{:d} {:.3f}s' \ .format(i + 1, imdb.num_images, _t.average_time) if 0: dets = np.hstack((imdb_boxes[i], scores)) # from IPython import embed; embed() _vis_proposals(im, dets[:3, :], thresh=0.9) plt.show() return imdb_boxes
def detect_person(net, im,cls_ind=1,cls='person',CONF_THRESH = 0.8): """Detect object classes in an image using pre-computed object proposals.""" # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class NMS_THRESH = 0.3 cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) # Filtering by confidence threshold as well keep = [ind for ind in keep if cls_scores[ind]>CONF_THRESH] if (len(keep)>1): sizes = np.zeros((len(keep),)) for ind,curr_ind in enumerate(keep): bbox = dets[curr_ind,:4] sizes[ind] = (bbox[3]-bbox[1])*(bbox[2]-bbox[0]) # Retain only the biggest bounding box keep = keep[np.argmax(sizes)] dets = dets[keep, :] return (dets.reshape(1,-1),cls_scores[keep])
def demo (net, imagePathName, scoreThreshold): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im = cv2.imread(imagePathName) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() debug('Object detection took {:.3f}s for {:d} object proposals'.format(timer.total_time, boxes.shape[0])) # Visualize detections for each class path, imageFilename = os.path.split(imagePathName) catDir = os.path.split(path)[-1] imageName = catDir + '/' + imageFilename for i, cls in enumerate(CLASSES[1:]): i += 1 # because we skipped background cls_boxes = boxes[:, 4 * i:4 * (i + 1)] cls_scores = scores[:, i] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESHOLD) dets = dets[keep, :] vis_detections(im, cls, imageName, dets, scoreThreshold)
def train_model(self, max_iters): """Network training loop.""" last_snapshot_iter = -1 train_result = {} timer = Timer() while self.solver.iter < max_iters: # Make one SGD update timer.tic() self.solver.step(1) timer.toc() # store accurate (fg/bg) tmp_result = self.check_error() train_result = self.expandTrainResult(train_result, tmp_result) if self.solver.iter % (100 * self.solver_param.display) == 0: self.show_status(self.solver.iter, train_result) train_result = {} print 'speed: {:.3f}s / iter'.format(timer.average_time) if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = self.solver.iter self.snapshot() if last_snapshot_iter != self.solver.iter: self.snapshot()
def detect_objects(imgpath): """Detect object classes in an image using pre-computed object proposals.""" print("in detect object") # Load the demo image im_file = os.path.join(imgpath) im = cv2.imread(im_file) print("read image") # Detect all object classes and regress object bounds timer = Timer() timer.tic() print("im_detect") scores, boxes = im_detect(app.config['net'], im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 results = dict() for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] results[cls] = detect_positions(im, cls, dets, thresh=CONF_THRESH) return results
def train_SVM(setting, y): print "train SVM" # SVM Training # SVM options # svm_kernel = 'rbf'; # svm_C = 1.0; # svm_loss = 'squared_hinge' # svm_penalty = 'l2' # svm_multi_class = 'ovr' # svm_random_state = 0 filePath = os.path.join(setting['DST_MODEL_DIR'], "svm_trained.pkl") try: clf = joblib.load(filePath) print "using trained model" except: print "building svm model" X = loadDesc(setting) X = X.astype('float') timer = Timer() timer.tic() clf = OneVsRestClassifier(LinearSVC(random_state=0)).fit(X, y) timer.toc() print timer.total_time joblib.dump(clf, filePath) # TEST # print clf.decision_function(X[0]) # print clf.predict(X[5000]) return clf
def tattoo_detection(net, image_name, args): """Detect object classes in an image using pre-computed object proposals.""" im_in = cv2.imread(image_name) if im_in is None: print('cannot open %s for read' % image_name ) exit(-1) rows,cols = im_in.shape[:2] print([rows,cols]) scale=1.0 if rows >= cols: scale = float(args.longdim) / float(rows) im = cv2.resize( im_in, (int(0.5 + float(cols)*scale), args.longdim) ) else: scale = float(args.longdim) / float(cols) im = cv2.resize( im_in, (args.longdim, int(0.5 + float(rows)*scale)) ) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() seconds = '%.3f' % timer.total_time print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) max_scores = scores.max(axis=0) print(max_scores) print(boxes.shape) # Visualize detections for each class CONF_THRESH = args.threshold NMS_THRESH = args.nms_thresh tattoo_dets=[] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] inds = np.where(dets[:, -1] >= CONF_THRESH)[0] dets_filter = dets[inds] vis_detections(im, cls, dets_filter, thresh=CONF_THRESH) if cls == 'tattoo' and len(dets_filter)>0: plt.savefig(os.path.join(args.output, os.path.splitext(os.path.basename(image_name))[0] + '_det.png')) tattoo_dets = dets_filter if args.inspect == 'v': plt.show() plt.clf() return tattoo_dets, max_scores, seconds, scale
def demoRest(net, image_name, classes, box_file, obj_proposals, im_file, im): # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] print 'All {} detections with p({} | box) >= {:.1f}'.format(cls, cls, CONF_THRESH) vis_detections(im, cls, dets, thresh=CONF_THRESH)
def train_whole_model(self, tester=None): ''' test the performance using all the features may be memory consuming. ''' self.comm.barrier() mpi.rootprint('*'*46) mpi.rootprint('*'*15+'whole featureset'+'*'*15) mpi.rootprint('*'*46) if tester is not None: # normalize the test data with the stats of the training data tester.normalize_data(self.mLocal, self.stdLocal) timer = Timer() timer.reset() if self.maxGraftDim != self.nMetabins*self.nCodes: mpi.rootprint('Please initialize with maxGraftDim=nMetabins*nCodes') return self.nSelFeats = 0 self.isSelected[:] = False mpi.rootprint('Generating Features...') for code in range(self.nCodes): for metabin in range(self.nMetabins): self.append_feature(code, metabin) if tester is not None: tester.append_feature(code, metabin) mpi.rootprint('Feature generation took {} secs'.format(timer.lap())) mpi.rootprint('Training...') loss = self.retrain_model(None) mpi.rootprint('Training took {} secs'.format(timer.lap())) mpi.rootprint('Training accuracy: {}'.format(self.compute_current_accuracy())) if tester is not None: mpi.rootprint('Current Testing accuracy: {}'.format(tester.compute_test_accuracy(self.weights, self.b)))
def train_model(self, max_iters): #display = self.solver_param.display #40 #test_iter = 1 #test_interval = 1 #_accuracy = 0 #accuracy = 0 timer = Timer() while self.solver.iter < max_iters: #print self.solver.iter #make one SGD update timer.tic() self.solver.step(1) timer.toc() """ _train_loss += self.solver.net.blobs['euclidean_loss'].data if (self.solver.iter-1) % display == 0: train_loss[(self.solver.iter-1) // display] = _train_loss / display _train_loss = 0 """ if self.solver.iter % (self.solver_param.display) == 0: print ('speed {:.3f}s / iter').format(timer.average_time) """ if self.solver.iter % test_interval == 0: for test_it in range(test_iter): self.solver.test_nets[0].forward() _accuracy += self.solver.test_nets[0].blobs['loss3/top-5'].data accuracy = _accuracy / test_iter f.write(str(self.solver.iter) + ' ' + str(accuracy) + '\n') _accuracy = 0 """ """
def detect(net, image_set, image_name, output_file): """Detect object classes in an image using pre-computed object proposals.""" # Load pre-computed Selected Search object proposals #box_file = os.path.join(coco_root, 'boxes', image_set, image_name + '.mat') box_file = os.path.join(coco_root, 'boxes_full', image_set, image_name + '.mat') if not os.path.exists(box_file): print 'File does not exist', box_file return obj_proposals = sio.loadmat(box_file)['boxes'] # Load the demo image im_file = os.path.join(coco_root, 'images', image_set, image_name + '.jpg') im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) np.savez(output_file, scores=scores, boxes=boxes)
def demo(sess, net, image_name): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = os.path.join(cfg.DATA_DIR, 'demo', image_name) #im_file = os.path.join('/home/corgi/Lab/label/pos_frame/ACCV/training/000001/',image_name) im = cv2.imread(im_file) # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(sess, net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class im = im[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def detect_bboxes(net, im_names, subset_classes): """Detect object classes in an image using pre-computed object proposals.""" df = cnn_utils.create_bbox_data_frame(with_object_index=False) for im_name in im_names: print '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' print 'Demo for {}'.format(im_name) # Load the input image. im_file = os.path.join(FLAGS.data_dir, 'images', im_name) im = cv2.imread(im_file) im_size_x = im.shape[1] im_size_y = im.shape[0] # Detect all object classes and regress object bounds. timer = Timer() timer.tic() scores, boxes = im_detect(net, im) timer.toc() print ('Detection took {:.3f}s for ' '{:d} object proposals').format( timer.total_time, boxes.shape[0]) # Detect for each class for subset_cls_ind in range(len(class_names_to_be_detected)): cls = class_names_to_be_detected[subset_cls_ind] try: cls_ind = CLASSES.index(cls) except: print('error: class does not exist in training data: ' '{0}'.format(cls)) exit(-1) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, FLAGS.nms_thresh) dets = dets[keep, :] inds = np.where(dets[:, -1] >= FLAGS.conf_thresh)[0] if len(inds) > 0: print ('{} {}(s) are detected.'.format(len(inds), cls)) for i in inds: # ['image_name', 'class_index', 'x1', 'y1', 'x2', 'y2', 'score'] x1 = dets[i, 0] y1 = dets[i, 1] x2 = dets[i, 2] y2 = dets[i, 3] score = dets[i, -1] if FLAGS.ignore_bbox_on_boundary: # Ignore bounding boxes on the frame boundary. if x1 <= 0 or x2 >= (im_size_x - 1) or \ y1 <= 0 or y2 >= (im_size_y - 1): continue # Append a row. df.loc[len(df)] = [ im_name, subset_cls_ind, x1, y1, x2, y2, score] return df
def loadDesc(setting): print "Load Desc..." timer = Timer() featureDstDir = setting['featureDstDir'] sortedList = sorted([ f for f in os.listdir(featureDstDir)]) descPath = np.array([ os.path.join(featureDstDir, x) for x in sortedList]) X = [] cnt = 0 size = len(descPath) timer.tic() for path in descPath: feature = readCSV(path) X.append(feature) print "%d / %d file loaded" % (cnt, size) cnt = cnt + 1 timer.toc() # print timer.total_time X = np.array(X) X = np.reshape(X, X.shape[0:2]) return X
def test_net(net, imdb, max_per_image=400, thresh=0.03, vis=False): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} if not cfg.TEST.HAS_RPN: roidb = imdb.roidb for i in xrange(num_images): # filter out any ground truth boxes if cfg.TEST.HAS_RPN: box_proposals = None else: # The roidb may contain ground-truth rois (for example, if the roidb # comes from the training or val split). We only want to evaluate # detection on the *non*-ground-truth rois. We select those the rois # that have the gt_classes field set to 0, which means there's no # ground truth. box_proposals = roidb[i]['boxes'][roidb[i]['gt_classes'] == 0] im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, box_proposals) _t['im_detect'].toc() _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] if cfg.TEST.AGNOSTIC: cls_boxes = boxes[inds, 4:8] else: cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] # # ----- set numpy decimal precision ------ # float_formatter = lambda x: "%.5f" % x # np.set_printoptions(formatter={'float_kind':float_formatter}) # # cls_dets[:,:-1] = np.round(cls_dets[:,:-1], decimals=0) # --- test --- # print 'cls_dets:' # --- test --- # print cls_dets # --- test --- # # ---------------------------------------- if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb.evaluate_detections(all_boxes, output_dir)
def test_net(sess, net, imdb, weights_filename, max_per_image=100, thresh=0.05): np.random.seed(cfg.RNG_SEED) """Test a Fast R-CNN network on an image database.""" # num_images = len(imdb.image_index) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) # all_boxes = [[[] for _ in range(num_images)] # for _ in range(imdb.num_classes)] # # output_dir = get_output_dir(imdb, weights_filename) # timers _t = {'im_detect': Timer(), 'misc': Timer()} # testFile = open('/netscratch/siddiqui/Datasets/ComplexBackground/data/ImageSets/test.txt') testFile = open( '/netscratch/siddiqui/Datasets/ComplexBackground/data_of_bgs_gray/data/ImageSets/test.txt' ) imageNames = testFile.readlines() counter = 0 reject_classes = [] imagesOutputDir = '/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output-images/' os.system("rm -rf " + imagesOutputDir) os.system("mkdir " + imagesOutputDir) fileAlreadyProcessed = False if os.path.isfile( "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output.txt" ): f = open( "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output.txt", "r") processedFiles = f.readlines() f.close() #print (processedFiles) if len(processedFiles) != 0: print("Resuming processing") lastProcessedFile = processedFiles[-1] lastProcessedFile = lastProcessedFile.split(';')[0] fileAlreadyProcessed = True print("Last processed file: %s" % lastProcessedFile) fileIndex = 0 videoScores = {} scoreFile = open( "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output-image.txt", "w") for im_name in imageNames: im_name = im_name.strip() # Skip all names already processed if fileAlreadyProcessed: fileIndex += 1 if im_name == lastProcessedFile: print("Resuming processing from file (%d): %s" % (fileIndex, im_name)) fileAlreadyProcessed = False continue rejectExample = False for r_class in reject_classes: if r_class in im_name: rejectExample = True break if rejectExample: continue # im_path = '/netscratch/siddiqui/Datasets/ComplexBackground/data/Images/' + im_name + '.png' # annot_file = '/netscratch/siddiqui/Datasets/ComplexBackground/data/Annotations/' + im_name + '.xml' im_path = '/netscratch/siddiqui/Datasets/ComplexBackground/data_of_bgs_gray/data/Images/' + im_name + '.png' annot_file = '/netscratch/siddiqui/Datasets/ComplexBackground/data_of_bgs_gray/data/Annotations/' + im_name + '.xml' video_name = im_name[:im_name.rfind('_')] if video_name not in videoScores: # True Positives, False Positives, False Negatives videoScores[video_name] = [0, 0, 0] im = cv2.imread(im_path) if im is None: print("Error loading file: %s" % im_path) continue overlay = im.copy() _t['im_detect'].tic() scores, boxes = im_detect(sess, net, im) _t['im_detect'].toc() _t['misc'].tic() # Visualize detections for each class CONF_THRESH = 0.5 NMS_THRESH = 0.3 with open(annot_file, 'r') as fd: doc = xmltodict.parse(fd.read()) # Load GT bboxes gtBBoxes = [] for xmlAttribName, xmlData in doc['annotation'].items(): # print (xmlAttribName) if isinstance(xmlData, list): for obj in xmlData: # If multiple objects bbox = obj['bndbox'] gtBBoxes.append([ int(bbox['xmin']), int(bbox['ymin']), int(bbox['xmax']), int(bbox['ymax']) ]) else: # If only one object bbox = xmlData['bndbox'] gtBBoxes.append([ int(bbox['xmin']), int(bbox['ymin']), int(bbox['xmax']), int(bbox['ymax']) ]) bboxes = [] for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls = CLASSES[cls_ind] cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] if SAVE_SINGLE_IMAGE: inds = np.where(dets[:, -1] >= CONF_THRESH)[0] for i in inds: bbox = dets[i, :4] score = dets[i, -1] bboxes.append( [bbox[0], bbox[1], bbox[2], bbox[3], score, cls]) cv2.rectangle( overlay, (bbox[0], bbox[1]), (bbox[2], bbox[3]), CLASSES_COLORS[cls_ind], 3) # Negative thinkness results in filled rect else: vis_detections(im, cls, dets, thresh=CONF_THRESH) for gtBBox in gtBBoxes: cv2.rectangle(overlay, (gtBBox[0], gtBBox[1]), (gtBBox[2], gtBBox[3]), CLASSES_COLORS[2], 3) # Negative thinkness results in filled rect if SAVE_SINGLE_IMAGE: if True: #len(bboxes) > 0: # (3) blend with the original: opacity = 0.5 cv2.addWeighted(overlay, opacity, im, 1 - opacity, 0, im) # out_im_path = '/netscratch/siddiqui/Bosch/data/faster-rcnn/output-defected-io/' + img_name + '.jpg' # out_im_path = '/netscratch/siddiqui/TableDetection/output-images/' + im_name.split('/')[-1] out_im_path = imagesOutputDir + im_name + '.png' cv2.imwrite(out_im_path, im) print("Writing output image for file (%d): %s" % (fileIndex, im_name)) f = open( "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/output.txt", "a+") f.write(im_name + ';' + str(len(bboxes)) + ';' + str(bboxes) + "\n") f.close() else: # Close previous plots before moving onto the next image plt.show() plt.close('all') # Compute F measure based on bounding boxes truePositives = 0 falsePositives = 0 falseNegatives = 0 matchedGTBBox = [0] * len(gtBBoxes) # Iterate over all the predicted bboxes for predictedBBox in bboxes: bboxMatchedIdx = -1 # Iterate over all the GT bboxes for gtBBoxIdx in range(len(gtBBoxes)): gtBBox = gtBBoxes[gtBBoxIdx] if USE_IOU: # Compute IoU iou = bbox_intersection_over_union(gtBBox, predictedBBox) if (iou > IOU_THRESHOLD): if (matchedGTBBox[gtBBoxIdx] == 0): bboxMatchedIdx = gtBBoxIdx break else: # Compute IoM iom = bbox_intersection_over_min(gtBBox, predictedBBox) # if ((iom > IOM_THRESHOLD) and (not matchedGTBBox[bboxMatchedIdx])): # bboxMatchedIdx = gtBBoxIdx # break if (iom > IOM_THRESHOLD): if (matchedGTBBox[gtBBoxIdx] == 0): bboxMatchedIdx = gtBBoxIdx break if (bboxMatchedIdx != -1): truePositives += 1 matchedGTBBox[bboxMatchedIdx] = 1 else: falsePositives += 1 # All the unmatched bboxes are false negatives falseNegatives = len(matchedGTBBox) - sum(matchedGTBBox) # Print final statistics for the frame print("True positives: %d" % truePositives) print("False positives: %d" % falsePositives) print("False negatives: %d" % falseNegatives) videoScores[video_name][0] += truePositives videoScores[video_name][1] += falsePositives videoScores[video_name][2] += falseNegatives # Compute F-Score if ((truePositives == 0) and (falseNegatives == 0) and (falsePositives == 0)): assert ((len(gtBBoxes) == 0) and (len(bboxes) == 0)) recall = 100.0 precision = 100.0 else: if ((truePositives == 0) and (falseNegatives == 0)): recall = 0.0 else: recall = (truePositives / float(truePositives + falseNegatives)) * 100 if ((truePositives == 0) and (falsePositives == 0)): precision = 0.0 else: precision = (truePositives / float(truePositives + falsePositives)) * 100 if ((precision == 0.0) and (recall == 0.0)): fMeasure = 0.0 else: fMeasure = 2 * ((precision * recall) / (precision + recall)) print("Recall: %f" % recall) print("Precision: %f" % precision) print("F-Measure: %f" % fMeasure) scoreFile.write(im_name + ';' + str([ len(bboxes), len(gtBBoxes), truePositives, falsePositives, falseNegatives, recall, precision, fMeasure ]) + '\n') fileIndex += 1 print("-------------------------------------------") # Write video scores to file videoScoresFileName = "/netscratch/siddiqui/Datasets/ComplexBackground/faster-rcnn/video.txt" averageFMeasure = 0 videoScoresFile = open(videoScoresFileName, 'w') for videoName, videoScore in videoScores.items(): print(videoName) recall = (videoScore[0] / float(videoScore[0] + videoScore[2])) * 100 precision = (videoScore[0] / float(videoScore[0] + videoScore[1])) * 100 fMeasure = 2 * ((precision * recall) / (precision + recall)) videoScoresFile.write(videoName + ";" + str(videoScore + [recall, precision, fMeasure]) + '\n') print("Recall: %f" % recall) print("Precision: %f" % precision) print("F-Measure: %f" % fMeasure) averageFMeasure += fMeasure print("-------------------------------------------") averageFMeasure = averageFMeasure / len(videoScores) print("Average F-Measure: %f" % averageFMeasure) videoScoresFile.write('Average F-Measure: ' + str(averageFMeasure) + '\n') videoScoresFile.close() scoreFile.close()
def demo(net, image_name, gt_boxes, result_dir, conf=0.75): """Detect object classes in an image using pre-computed object proposals.""" # Load the demo image im_file = image_name im = cv2.imread(im_file) print "src image : ", im.shape im_height = im.shape[0] im_width = im.shape[1] # Detect all object classes and regress object bounds timer = Timer() timer.tic() # scores : (300, 2) # boxes : (300, 10) scores, boxes = r_im_detect(net, im) print __file__, '==>scores : ', scores.shape print __file__, '==>boxes : ', boxes.shape print __file__, "==>gt_margin: ", cfg.TEST.GT_MARGIN, cfg.TRAIN.GT_MARGIN print __file__, "==>img_padding: ", cfg.IMG_PADDING timer.toc() print('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0]) # Visualize detections for each class CONF_THRESH = conf NMS_THRESH = 0.3 # enumerate [(0, 'Spring'), (1, 'Summer'), (2, 'Fall'), (3, 'Winter')] #for cls_ind, cls in enumerate(CLASSES[1:]): # because we skipped background cls_ind = 1 cls_boxes = boxes[:, 5 * cls_ind:5 * (cls_ind + 1)] # D cls_scores = scores[:, cls_ind] # 将每个box对应的score并入对应的位置 dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = rotate_gpu_nms(dets, NMS_THRESH) # D # print __file__,'==>keep : ',keep dets = dets[keep, :] print __file__, '==>dets : ', dets.shape print dets[:, 5] #dets = dets[0:20] #dets[:, 4] = dets[:, 4] * 0.45 dets[:, 2] = dets[:, 2] / cfg.TEST.GT_MARGIN dets[:, 3] = dets[:, 3] / cfg.TEST.GT_MARGIN #if imdb_name == "icdar13": #write_result_ICDAR2013(im_file, dets, CONF_THRESH, ori_result_dir, im_height, im_width) #result_file = write_result_ICDAR2013(im_file, dets, CONF_THRESH, result_dir, im_height, im_width) #print dets #if imdb_name == "icdar15": # write_result_ICDAR(im_file, dets, CONF_THRESH, ori_result_dir, im_height, im_width) results = write_result_ICDAR(im_file, dets, CONF_THRESH, result_dir, im_height, im_width) # write_result(im_file, dets, CONF_THRESH, ori_result_dir, im_height, im_width) # result_file = write_result(im_file, dets, CONF_THRESH, result_dir, im_height, im_width) # print "write done" # post_merge(result_file) # print "merge done" # #print "merge done" #vis_detections(im, cls, dets, gt_boxes, thresh=CONF_THRESH) return results
cfg = cfg_mnet net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # testing dataset testset_folder = os.path.join(args.dataset_folder, "images") testset_list = os.path.join(args.dataset_folder, "wider_val.txt") with open(testset_list, 'r') as fr: test_dataset = fr.read().split() num_images = len(test_dataset) _t = {'forward_pass': Timer(), 'misc': Timer()} # testing begin for i, img_name in enumerate(test_dataset): image_path = os.path.join(testset_folder, img_name) img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) # testing scale target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size:
def detection_to_file(target_path, v_num, file_list, detect, total_frames, current_frames, max_proposal=100, thresh=0): timer = Timer() w = open("{}/{}.txt".format(target_path, v_num), "w") for file_index, file_path in enumerate(file_list): file_name = file_path.split("/")[-1] set_num, v_num, frame_num = file_name[:-4].split("_") frame_num = str(int(frame_num) + 1) im = cv2.imread(file_path) timer = Timer() timer.tic() #print(file_path) #print(im.shape) #_t = {'im_preproc': Timer(), 'im_net' : Timer(), 'im_postproc': Timer(), 'misc' : Timer()} _t = { 'im_preproc': Timer(), 'im_net': Timer(), 'im_postproc': Timer(), 'misc': Timer() } scores, sub_scores, boxes = im_detect_hierarchy(net, im, _t) timer.toc() print('Detection Time:{:.3f}s on {} {}/{} images'.format(timer.average_time,\ file_name ,current_frames+file_index+1 , total_frames)) NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES_main[1:]): if cls != "car": continue cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] tmp_SS = sub_scores[keep, :] thresh = 0 inds = np.where(dets[:, -1] > thresh)[0] for i in inds: bbox = dets[i, :4] score = dets[i, -1] * 100 if score < 50: continue sub_ind = np.argmax(tmp_SS[i]) sub_score = tmp_SS[i][sub_ind] * 100 sub_label = CLASSES_sub[sub_ind] if sub_label == "__background__" or sub_label == "not-target": continue #Fix bug 6 x = bbox[0] y = bbox[1] width = bbox[2] - bbox[0] height = bbox[3] - bbox[1] label = sub_label w.write("{},{},{},{},{},{},{}\n".format( frame_num, x, y, width, height, sub_score, label)) w.close() print("Evalutaion file {} has been writen".format(w.name)) return file_index + 1
def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30): ''' Processes the video sequence given and provides the output of tracking result (write the results in video file) It uses JDE model for getting information about the online targets present. Parameters ---------- opt : Namespace Contains information passed as commandline arguments. dataloader : LoadVideo Instance of LoadVideo class used for fetching the image sequence and associated data. data_type : String Type of dataset corresponding(similar) to the given video. result_filename : String The name(path) of the file for storing results. save_dir : String Path to the folder for storing the frames containing bounding box information (Result frames). show_image : bool Option for shhowing individial frames during run-time. frame_rate : int Frame-rate of the given video. Returns ------- (Returns are not significant here) frame_id : int Sequence number of the last sequence ''' width, height = dataloader.w, dataloader.h if save_dir: mkdir_if_missing(save_dir) tracker = JDETracker(opt, frame_rate=frame_rate) timer = Timer() results = [] frame_id = 0 # for selected object tracking global click_pos global is_selected selected_id = None # set video output writer counter = 0 encode = 0x00000021 output_video = cv2.VideoWriter( os.path.join(save_dir, f'result_{counter}.mp4'), encode, 5, (width, height), True) # start tracking for path, img, img0 in dataloader: if frame_id % 100 == 0: logger.info('Processing frame {} ({:.2f} fps)'.format( frame_id, 1. / max(1e-5, timer.average_time))) output_video.release() # Call MP4Box to divide new mp4 file output_video = cv2.VideoWriter( os.path.join(save_dir, f'result_{counter}.mp4'), encode, 5, (width, height), True) counter += 1 # run tracking timer.tic() blob = torch.from_numpy(img).cuda().unsqueeze(0) online_targets = tracker.update(blob, img0) online_tlwhs = [] online_ids = [] for t in online_targets: tlwh = t.tlwh tid = t.track_id vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(tid) timer.toc() # save results results.append((frame_id + 1, online_tlwhs, online_ids)) if show_image or save_dir is not None: # get visualization result and some control flags for selected object tracking online_im, click_pos, selected_id, is_selected = vis.plot_tracking( img0, online_tlwhs, online_ids, frame_id=frame_id, fps=1. / timer.average_time, selected_id=selected_id, click_pos=click_pos, is_selected=is_selected) if show_image: # bind mouse event linstener cv2.setMouseCallback("online_im", on_click) cv2.imshow('online_im', online_im) if save_dir is not None: cv2.imwrite( os.path.join(save_dir, 'frame', '{:05d}.jpg'.format(frame_id)), online_im) output_video.write(online_im) frame_id += 1 output_video.release() # save results write_results(result_filename, results, data_type) return frame_id, timer.average_time, timer.calls
MIN_COUNT = 2 TARGET_FEAT = 'advertiser_id' N_CLS = 10 TRAIN_SEQ_FILE = 'data/train_preliminary/train_seq.pkl' TEST_SEQ_FILE = 'data/test/test_seq.pkl' LABEL_FILE = 'data/train_preliminary/user.csv' TRAIN_GENDER_FEAT = f'data/train_feat/train_tfidf_gender_feat_{TARGET_FEAT}.pkl' TEST_GENDER_FEAT = f'data/test_feat/test_tfidf_gender_feat_{TARGET_FEAT}.pkl' TRAIN_AGE_FEAT = f'data/train_feat/train_tfidf_age_feat_{TARGET_FEAT}.pkl' TEST_AGE_FEAT = f'data/test_feat/test_tfidf_age_feat_{TARGET_FEAT}.pkl' dtype = {'user_id': 'int32', 'age': 'uint8', 'gender': 'uint8'} timer = Timer() # ------------------------------------------------------------------------------------------------- print('Loading data and preprocessing...') timer.start() train = pd.read_pickle(TRAIN_SEQ_FILE) test = pd.read_pickle(TEST_SEQ_FILE) user = pd.read_csv(LABEL_FILE, dtype=dtype) label_gender = user.gender.values - 1 label_age = user.age.values - 1 # concatenate train and test into one dataframe concated_data = pd.concat([train[TARGET_FEAT], test[TARGET_FEAT]]) \ .reset_index(level=0, drop=True) \ .tolist() timer.stop()
def train(self): ''' 训练 ''' train_timer = Timer() #train_timer load_timer = Timer() #load_timer # 加载验证数据 val_images, val_labels = self.data.getValid() val_feed_dict = {self.net.images: val_images, self.net.labels: val_labels} # 迭代训练 for step in range(1, self.max_iter + 1): #print("step: ",step) load_timer.tic() train_images, train_labels = self.data.getTrain() #获取到batch_size大小的图片和对应的label load_timer.toc() ################# 开始训练 ################ train_feed_dict = {self.net.images: train_images, self.net.labels: train_labels} train_timer.tic() summary_str, _ = self.sess.run([self.summary_op, self.train_op], feed_dict=train_feed_dict) train_timer.toc() if step % self.summary_iter == 0: # 记录summary信息 self.writer.add_summary(summary_str, step) if step % (self.summary_iter * 10) == 0: #记录当前训练的模型信息 [loss_train] = self.sess.run([self.net.total_loss],feed_dict=train_feed_dict) [loss_val] = self.sess.run([self.net.total_loss],feed_dict=val_feed_dict) log_str = '''{} Epoch: {}, Step: {}, Learning rate: {}, Loss-train: {:5.3f}, Loss-val: {:5.3f}, Speed: {:.3f}s/iter, Load: {:.3f}s/iter, Remain: {} '''.format( datetime.datetime.now().strftime('%m-%d %H:%M:%S'), self.data.epoch, int(step), round(self.learning_rate.eval(session=self.sess), 6), loss_train, loss_val, train_timer.average_time, load_timer.average_time, train_timer.remain(step, self.max_iter)) print(log_str) if step % self.save_iter == 0: #保留检查点,以供测试时用 print('{} Saving checkpoint file to: {}'.format(datetime.datetime.now().strftime('%m-%d %H:%M:%S'), self.output_dir)) self.saver.save( self.sess, self.ckpt_file, global_step=self.global_step) print("save done!!!")
class JsonDatasetRel(object): """A class representing a COCO json dataset.""" def __init__(self, name): assert name in DATASETS.keys(), \ 'Unknown dataset name: {}'.format(name) assert os.path.exists(DATASETS[name][IM_DIR]), \ 'Image directory \'{}\' not found'.format(DATASETS[name][IM_DIR]) assert os.path.exists(DATASETS[name][ANN_FN]), \ 'Annotation file \'{}\' not found'.format(DATASETS[name][ANN_FN]) logger.debug('Creating: {}'.format(name)) self.name = name self.image_directory = DATASETS[name][IM_DIR] self.image_prefix = ('' if IM_PREFIX not in DATASETS[name] else DATASETS[name][IM_PREFIX]) self.COCO = COCO(DATASETS[name][ANN_FN]) self.debug_timer = Timer() # Set up dataset classes category_ids = self.COCO.getCatIds() categories = [c['name'] for c in self.COCO.loadCats(category_ids)] self.category_to_id_map = dict(zip(categories, category_ids)) self.classes = ['__background__'] + categories self.num_classes = len(self.classes) self.json_category_id_to_contiguous_id = { v: i + 1 for i, v in enumerate(self.COCO.getCatIds()) } self.contiguous_category_id_to_json_id = { v: k for k, v in self.json_category_id_to_contiguous_id.items() } self._init_keypoints() assert ANN_FN2 in DATASETS[name] and ANN_FN3 in DATASETS[name] with open(DATASETS[name][ANN_FN2]) as f: self.rel_anns = json.load(f) with open(DATASETS[name][ANN_FN3]) as f: prd_categories = json.load(f) self.obj_classes = self.classes[1:] # excludes background for now self.num_obj_classes = len(self.obj_classes) # self.prd_classes = ['__background__'] + prd_categories self.prd_classes = prd_categories # excludes background for now self.num_prd_classes = len(self.prd_classes) @property def cache_path(self): cache_path = os.path.abspath(os.path.join(cfg.DATA_DIR, 'cache')) if not os.path.exists(cache_path): os.makedirs(cache_path) return cache_path @property def valid_cached_keys(self): """ Can load following key-ed values from the cached roidb file 'image'(image path) and 'flipped' values are already filled on _prep_roidb_entry, so we don't need to overwrite it again. """ keys = [ 'dataset_name', 'boxes', 'segms', 'gt_classes', 'seg_areas', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'sbj_gt_boxes', 'sbj_gt_classes', 'obj_gt_boxes', 'obj_gt_classes', 'prd_gt_classes', 'sbj_gt_overlaps', 'obj_gt_overlaps', 'prd_gt_overlaps', 'pair_to_gt_ind_map' ] if self.keypoints is not None: keys += ['gt_keypoints', 'has_visible_keypoints'] return keys def get_roidb(self, gt=False, proposal_file=None, min_proposal_size=2, proposal_limit=-1, crowd_filter_thresh=0): """Return an roidb corresponding to the json dataset. Optionally: - include ground truth boxes in the roidb - add proposals specified in a proposals file - filter proposals based on a minimum side length - filter proposals that intersect with crowd regions """ assert gt is True or crowd_filter_thresh == 0, \ 'Crowd filter threshold must be 0 if ground-truth annotations ' \ 'are not included.' image_ids = self.COCO.getImgIds() image_ids.sort() if cfg.DEBUG: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))[:100] else: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) new_roidb = [] for entry in roidb: # In OpenImages_v4, the detection-annotated images are more than relationship # annotated images, hence the need to check if entry['file_name'] in self.rel_anns: self._prep_roidb_entry(entry) new_roidb.append(entry) roidb = new_roidb if gt: # Include ground-truth object annotations cache_filepath = os.path.join(self.cache_path, self.name + '_rel_gt_roidb.pkl') if os.path.exists(cache_filepath) and not cfg.DEBUG: self.debug_timer.tic() self._add_gt_from_cache(roidb, cache_filepath) logger.debug('_add_gt_from_cache took {:.3f}s'.format( self.debug_timer.toc(average=False))) else: self.debug_timer.tic() for entry in roidb: self._add_gt_annotations(entry) logger.debug('_add_gt_annotations took {:.3f}s'.format( self.debug_timer.toc(average=False))) if not cfg.DEBUG: with open(cache_filepath, 'wb') as fp: pickle.dump(roidb, fp, pickle.HIGHEST_PROTOCOL) logger.info('Cache ground truth roidb to %s', cache_filepath) if proposal_file is not None: # Include proposals from a file self.debug_timer.tic() self._add_proposals_from_file(roidb, proposal_file, min_proposal_size, proposal_limit, crowd_filter_thresh) logger.debug('_add_proposals_from_file took {:.3f}s'.format( self.debug_timer.toc(average=False))) _add_class_assignments(roidb) return roidb def _prep_roidb_entry(self, entry): """Adds empty metadata fields to an roidb entry.""" # Reference back to the parent dataset entry['dataset'] = self # Make file_name an abs path im_path = os.path.join(self.image_directory, self.image_prefix + entry['file_name']) assert os.path.exists(im_path), 'Image \'{}\' not found'.format( im_path) entry['image'] = im_path entry['flipped'] = False entry['has_visible_keypoints'] = False # Empty placeholders entry['boxes'] = np.empty((0, 4), dtype=np.float32) entry['segms'] = [] entry['gt_classes'] = np.empty((0), dtype=np.int32) entry['seg_areas'] = np.empty((0), dtype=np.float32) entry['gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_classes), dtype=np.float32)) entry['is_crowd'] = np.empty((0), dtype=np.bool) # 'box_to_gt_ind_map': Shape is (#rois). Maps from each roi to the index # in the list of rois that satisfy np.where(entry['gt_classes'] > 0) entry['box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) if self.keypoints is not None: entry['gt_keypoints'] = np.empty((0, 3, self.num_keypoints), dtype=np.int32) # Remove unwanted fields that come from the json file (if they exist) for k in ['date_captured', 'url', 'license']: if k in entry: del entry[k] entry['dataset_name'] = '' # add relationship annotations # sbj entry['sbj_gt_boxes'] = np.empty((0, 4), dtype=np.float32) entry['sbj_gt_classes'] = np.empty((0), dtype=np.int32) entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_obj_classes), dtype=np.float32)) # entry['sbj_box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) # obj entry['obj_gt_boxes'] = np.empty((0, 4), dtype=np.float32) entry['obj_gt_classes'] = np.empty((0), dtype=np.int32) entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_obj_classes), dtype=np.float32)) # entry['obj_box_to_gt_ind_map'] = np.empty((0), dtype=np.int32) # prd entry['prd_gt_classes'] = np.empty((0), dtype=np.int32) entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix( np.empty((0, self.num_prd_classes), dtype=np.float32)) entry['pair_to_gt_ind_map'] = np.empty((0), dtype=np.int32) def _add_gt_annotations(self, entry): """Add ground truth annotation metadata to an roidb entry.""" ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid valid_objs = [] valid_segms = [] width = entry['width'] height = entry['height'] for obj in objs: if obj['area'] < cfg.TRAIN.GT_MIN_AREA: continue if 'ignore' in obj and obj['ignore'] == 1: continue # Convert form (x1, y1, w, h) to (x1, y1, x2, y2) x1, y1, x2, y2 = box_utils.xywh_to_xyxy(obj['bbox']) x1, y1, x2, y2 = box_utils.clip_xyxy_to_image( x1, y1, x2, y2, height, width) # Require non-zero seg area and more than 1x1 box size if obj['area'] > 0 and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2, y2] valid_objs.append(obj) # valid_segms.append(obj['segmentation']) num_valid_objs = len(valid_objs) boxes = np.zeros((num_valid_objs, 4), dtype=entry['boxes'].dtype) gt_classes = np.zeros((num_valid_objs), dtype=entry['gt_classes'].dtype) gt_overlaps = np.zeros((num_valid_objs, self.num_classes), dtype=entry['gt_overlaps'].dtype) seg_areas = np.zeros((num_valid_objs), dtype=entry['seg_areas'].dtype) is_crowd = np.zeros((num_valid_objs), dtype=entry['is_crowd'].dtype) box_to_gt_ind_map = np.zeros((num_valid_objs), dtype=entry['box_to_gt_ind_map'].dtype) if self.keypoints is not None: gt_keypoints = np.zeros((num_valid_objs, 3, self.num_keypoints), dtype=entry['gt_keypoints'].dtype) im_has_visible_keypoints = False for ix, obj in enumerate(valid_objs): cls = self.json_category_id_to_contiguous_id[obj['category_id']] boxes[ix, :] = obj['clean_bbox'] gt_classes[ix] = cls seg_areas[ix] = obj['area'] is_crowd[ix] = obj['iscrowd'] box_to_gt_ind_map[ix] = ix if self.keypoints is not None: gt_keypoints[ix, :, :] = self._get_gt_keypoints(obj) if np.sum(gt_keypoints[ix, 2, :]) > 0: im_has_visible_keypoints = True if obj['iscrowd']: # Set overlap to -1 for all classes for crowd objects # so they will be excluded during training gt_overlaps[ix, :] = -1.0 else: gt_overlaps[ix, cls] = 1.0 entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(valid_segms) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(), gt_overlaps, axis=0) entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps']) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = im_has_visible_keypoints entry['dataset_name'] = self.name # add relationship annotations im_rels = self.rel_anns[entry['file_name']] sbj_gt_boxes = np.zeros((len(im_rels), 4), dtype=entry['sbj_gt_boxes'].dtype) obj_gt_boxes = np.zeros((len(im_rels), 4), dtype=entry['obj_gt_boxes'].dtype) sbj_gt_classes = np.zeros(len(im_rels), dtype=entry['sbj_gt_classes'].dtype) obj_gt_classes = np.zeros(len(im_rels), dtype=entry['obj_gt_classes'].dtype) prd_gt_classes = np.zeros(len(im_rels), dtype=entry['prd_gt_classes'].dtype) for ix, rel in enumerate(im_rels): # sbj sbj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2( rel['subject']['bbox']) sbj_gt_boxes[ix] = sbj_gt_box sbj_gt_classes[ix] = rel['subject'][ 'category'] # excludes background # obj obj_gt_box = box_utils_rel.y1y2x1x2_to_x1y1x2y2( rel['object']['bbox']) obj_gt_boxes[ix] = obj_gt_box obj_gt_classes[ix] = rel['object'][ 'category'] # excludes background # prd prd_gt_classes[ix] = rel['predicate'] # exclude background entry['sbj_gt_boxes'] = np.append(entry['sbj_gt_boxes'], sbj_gt_boxes, axis=0) entry['obj_gt_boxes'] = np.append(entry['obj_gt_boxes'], obj_gt_boxes, axis=0) entry['sbj_gt_classes'] = np.append(entry['sbj_gt_classes'], sbj_gt_classes) entry['obj_gt_classes'] = np.append(entry['obj_gt_classes'], obj_gt_classes) entry['prd_gt_classes'] = np.append(entry['prd_gt_classes'], prd_gt_classes) # misc sbj_gt_overlaps = np.zeros((len(im_rels), self.num_obj_classes), dtype=entry['sbj_gt_overlaps'].dtype) for ix in range(len(im_rels)): sbj_cls = sbj_gt_classes[ix] sbj_gt_overlaps[ix, sbj_cls] = 1.0 entry['sbj_gt_overlaps'] = np.append( entry['sbj_gt_overlaps'].toarray(), sbj_gt_overlaps, axis=0) entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix( entry['sbj_gt_overlaps']) obj_gt_overlaps = np.zeros((len(im_rels), self.num_obj_classes), dtype=entry['obj_gt_overlaps'].dtype) for ix in range(len(im_rels)): obj_cls = obj_gt_classes[ix] obj_gt_overlaps[ix, obj_cls] = 1.0 entry['obj_gt_overlaps'] = np.append( entry['obj_gt_overlaps'].toarray(), obj_gt_overlaps, axis=0) entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix( entry['obj_gt_overlaps']) prd_gt_overlaps = np.zeros((len(im_rels), self.num_prd_classes), dtype=entry['prd_gt_overlaps'].dtype) pair_to_gt_ind_map = np.zeros((len(im_rels)), dtype=entry['pair_to_gt_ind_map'].dtype) for ix in range(len(im_rels)): prd_cls = prd_gt_classes[ix] prd_gt_overlaps[ix, prd_cls] = 1.0 pair_to_gt_ind_map[ix] = ix entry['prd_gt_overlaps'] = np.append( entry['prd_gt_overlaps'].toarray(), prd_gt_overlaps, axis=0) entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix( entry['prd_gt_overlaps']) entry['pair_to_gt_ind_map'] = np.append(entry['pair_to_gt_ind_map'], pair_to_gt_ind_map) for k in ['file_name']: if k in entry: del entry[k] def _add_gt_from_cache(self, roidb, cache_filepath): """Add ground truth annotation metadata from cached file.""" logger.info('Loading cached gt_roidb from %s', cache_filepath) with open(cache_filepath, 'rb') as fp: cached_roidb = pickle.load(fp) assert len(roidb) == len(cached_roidb) for entry, cached_entry in zip(roidb, cached_roidb): values = [cached_entry[key] for key in self.valid_cached_keys] dataset_name, boxes, segms, gt_classes, seg_areas, gt_overlaps, is_crowd, box_to_gt_ind_map, \ sbj_gt_boxes, sbj_gt_classes, obj_gt_boxes, obj_gt_classes, prd_gt_classes, \ sbj_gt_overlaps, obj_gt_overlaps, prd_gt_overlaps, pair_to_gt_ind_map = values[:len(self.valid_cached_keys)] if self.keypoints is not None: gt_keypoints, has_visible_keypoints = values[ len(self.valid_cached_keys):] entry['dataset_name'] = dataset_name entry['boxes'] = np.append(entry['boxes'], boxes, axis=0) entry['segms'].extend(segms) entry['gt_classes'] = np.append(entry['gt_classes'], gt_classes) entry['seg_areas'] = np.append(entry['seg_areas'], seg_areas) entry['gt_overlaps'] = scipy.sparse.csr_matrix(gt_overlaps) entry['is_crowd'] = np.append(entry['is_crowd'], is_crowd) entry['box_to_gt_ind_map'] = np.append(entry['box_to_gt_ind_map'], box_to_gt_ind_map) if self.keypoints is not None: entry['gt_keypoints'] = np.append(entry['gt_keypoints'], gt_keypoints, axis=0) entry['has_visible_keypoints'] = has_visible_keypoints # add relationship annotations entry['sbj_gt_boxes'] = np.append(entry['sbj_gt_boxes'], sbj_gt_boxes, axis=0) entry['sbj_gt_classes'] = np.append(entry['sbj_gt_classes'], sbj_gt_classes) entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(sbj_gt_overlaps) entry['obj_gt_boxes'] = np.append(entry['obj_gt_boxes'], obj_gt_boxes, axis=0) entry['obj_gt_classes'] = np.append(entry['obj_gt_classes'], obj_gt_classes) entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(obj_gt_overlaps) entry['prd_gt_classes'] = np.append(entry['prd_gt_classes'], prd_gt_classes) entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(prd_gt_overlaps) entry['pair_to_gt_ind_map'] = np.append( entry['pair_to_gt_ind_map'], pair_to_gt_ind_map) def _add_proposals_from_file(self, roidb, proposal_file, min_proposal_size, top_k, crowd_thresh): """Add proposals from a proposals file to an roidb.""" logger.info('Loading proposals from: {}'.format(proposal_file)) with open(proposal_file, 'r') as f: proposals = pickle.load(f) id_field = 'indexes' if 'indexes' in proposals else 'ids' # compat fix _sort_proposals(proposals, id_field) box_list = [] for i, entry in enumerate(roidb): if i % 2500 == 0: logger.info(' {:d}/{:d}'.format(i + 1, len(roidb))) boxes = proposals['boxes'][i] # Sanity check that these boxes are for the correct image id assert entry['id'] == proposals[id_field][i] # Remove duplicate boxes and very small boxes and then take top k boxes = box_utils.clip_boxes_to_image(boxes, entry['height'], entry['width']) keep = box_utils.unique_boxes(boxes) boxes = boxes[keep, :] keep = box_utils.filter_small_boxes(boxes, min_proposal_size) boxes = boxes[keep, :] if top_k > 0: boxes = boxes[:top_k, :] box_list.append(boxes) _merge_proposal_boxes_into_roidb(roidb, box_list) if crowd_thresh > 0: _filter_crowd_proposals(roidb, crowd_thresh) def _init_keypoints(self): """Initialize COCO keypoint information.""" self.keypoints = None self.keypoint_flip_map = None self.keypoints_to_id_map = None self.num_keypoints = 0 # Thus far only the 'person' category has keypoints if 'person' in self.category_to_id_map: cat_info = self.COCO.loadCats([self.category_to_id_map['person']]) else: return # Check if the annotations contain keypoint data or not if 'keypoints' in cat_info[0]: keypoints = cat_info[0]['keypoints'] self.keypoints_to_id_map = dict( zip(keypoints, range(len(keypoints)))) self.keypoints = keypoints self.num_keypoints = len(keypoints) if cfg.KRCNN.NUM_KEYPOINTS != -1: assert cfg.KRCNN.NUM_KEYPOINTS == self.num_keypoints, \ "number of keypoints should equal when using multiple datasets" else: cfg.KRCNN.NUM_KEYPOINTS = self.num_keypoints self.keypoint_flip_map = { 'left_eye': 'right_eye', 'left_ear': 'right_ear', 'left_shoulder': 'right_shoulder', 'left_elbow': 'right_elbow', 'left_wrist': 'right_wrist', 'left_hip': 'right_hip', 'left_knee': 'right_knee', 'left_ankle': 'right_ankle' } def _get_gt_keypoints(self, obj): """Return ground truth keypoints.""" if 'keypoints' not in obj: return None kp = np.array(obj['keypoints']) x = kp[0::3] # 0-indexed x coordinates y = kp[1::3] # 0-indexed y coordinates # 0: not labeled; 1: labeled, not inside mask; # 2: labeled and inside mask v = kp[2::3] num_keypoints = len(obj['keypoints']) / 3 assert num_keypoints == self.num_keypoints gt_kps = np.ones((3, self.num_keypoints), dtype=np.int32) for i in range(self.num_keypoints): gt_kps[0, i] = x[i] gt_kps[1, i] = y[i] gt_kps[2, i] = v[i] return gt_kps
def train_model(self, max_iters): """Network training loop.""" last_snapshot_iter = -1 timer = Timer() model_paths = [] net = self.solver.net #def gen_data(t=0): rpn_loss_cls = 0 rpn_loss_bbox = 0 frcn_loss_cls = 0 frcn_loss_bbox = 0 accuarcy=0 while self.solver.iter < max_iters: # Make one SGD update t = self.solver.iter timer.tic() self.solver.step(1) timer.toc() rpn_loss_cls += net.blobs['rpn_cls_loss'].data rpn_loss_bbox += net.blobs['rpn_loss_bbox'].data frcn_loss_cls += net.blobs['loss_cls'].data frcn_loss_bbox += net.blobs['loss_bbox'].data accuarcy+=net.blobs['accuarcy'].data if self.solver.iter % (10 * self.solver_param.display) == 0: print 'speed: {:.3f}s / iter'.format(timer.average_time) if self.solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = self.solver.iter model_paths.append(self.snapshot()) if self.solver.iter % cfg.TRAIN.DRAW_ITERS == 0: #yield t, rpn_loss_cls / cfg.TRAIN.DRAW_ITERS ,rpn_loss_bbox / cfg.TRAIN.DRAW_ITERS, frcn_loss_cls / cfg.TRAIN.DRAW_ITERS ,frcn_loss_bbox / cfg.TRAIN.DRAW_ITERS,accuarcy / cfg.TRAIN.DRAW_ITERS rpn_loss_cls = 0 rpn_loss_bbox = 0 frcn_loss_cls = 0 frcn_loss_bbox = 0 accuarcy=0 if self.solver.iter==max_iters: time.sleep(5) #plt.close(fig) ''' def init1(): ax1.set_ylim(0,1) ax1.set_xlim(0,100) ax2.set_ylim(0,1) ax2.set_xlim(0,100) ax3.set_ylim(0,1) ax3.set_xlim(0,100) ax4.set_ylim(0,1) ax4.set_xlim(0,100) ax5.set_ylim(0,1) ax5.set_xlim(0,100) del xdata[:] del ydata1[:] del ydata2[:] del ydata3[:] del ydata4[:] del ydata5[:] line.set_data(xdata,ydata1) line2.set_data(xdata,ydata2) line3.set_data(xdata,ydata3) line4.set_data(xdata,ydata4) line5.set_data(xdata,ydata5) return line,line2,line3,line4,line5 fig = plt.figure() ax1 = fig.add_subplot(5,1,1) ax1.set_title("RPN cls loss") ax2 = fig.add_subplot(5,1,2) ax2.set_title("RPN bbox loss") ax3 = fig.add_subplot(5,1,3) ax3.set_title("FRCN cls loss") ax4 = fig.add_subplot(5,1,4) ax4.set_title("FRCN bbox loss") ax5 = fig.add_subplot(5,1,5) ax5.set_title("ACCUARCY") line, = ax1.plot([], [], lw=1) line2, = ax2.plot([], [], lw=1) line3, = ax3.plot([], [], lw=1) line4, = ax4.plot([], [], lw=1) line5, = ax5.plot([], [], lw=1) ax1.grid() ax2.grid() ax3.grid() ax4.grid() ax5.grid() xdata, ydata1,ydata2,ydata3,ydata4,ydata5 =[], [], [], [], [], [] def run1(data): t,y1,y2,y3,y4,y5 = data xdata.append(t) ydata1.append(y1) ydata2.append(y2) ydata3.append(y3) ydata4.append(y4) ydata5.append(y5) xmin, xmax = ax1.get_xlim() if t >= xmax: ax1.set_xlim(xmin,2*xmax) ax1.figure.canvas.draw() ax2.set_xlim(xmin,2*xmax) ax2.figure.canvas.draw() ax3.set_xlim(xmin,2*xmax) ax3.figure.canvas.draw() ax4.set_xlim(xmin,2*xmax) ax4.figure.canvas.draw() ax5.set_xlim(xmin,2*xmax) ax5.figure.canvas.draw() line.set_data(xdata,ydata1) line2.set_data(xdata,ydata2) line3.set_data(xdata,ydata3) line4.set_data(xdata,ydata4) line5.set_data(xdata,ydata5) return line, line2, line3, line4,line5 ani = animation.FuncAnimation(fig, run1, gen_data, blit=False, interval=10, repeat=False, init_func=init1) plt.show() ''' if last_snapshot_iter != self.solver.iter: model_paths.append(self.snapshot()) return model_paths
def test_net(net, imdb): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) # heuristic: keep an average of 40 detections per class per images prior # to NMS max_per_set = 40 * num_images # heuristic: keep at most 100 detection per class per image prior to NMS max_per_image = 100 # detection thresold for each class (this is adaptively set based on the # max_per_set constraint) thresh = -np.inf * np.ones(imdb.num_classes) # top_scores will hold one minheap of scores per class (used to enforce # the max_per_set constraint) top_scores = [[] for _ in xrange(imdb.num_classes)] # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, net) if not os.path.exists(output_dir): os.makedirs(output_dir) # timers _t = {'im_detect': Timer(), 'misc': Timer()} roidb = imdb.roidb for i in xrange(num_images): im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() scores, boxes = im_detect(net, im, roidb[i]['boxes']) _t['im_detect'].toc() _t['misc'].tic() for j in xrange(1, imdb.num_classes): inds = np.where((scores[:, j] > thresh[j]) & (roidb[i]['gt_classes'] == 0))[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] top_inds = np.argsort(-cls_scores)[:max_per_image] cls_scores = cls_scores[top_inds] cls_boxes = cls_boxes[top_inds, :] # push new scores onto the minheap for val in cls_scores: heapq.heappush(top_scores[j], val) # if we've collected more than the max number of detection, # then pop items off the minheap and update the class threshold if len(top_scores[j]) > max_per_set: while len(top_scores[j]) > max_per_set: heapq.heappop(top_scores[j]) thresh[j] = top_scores[j][0] all_boxes[j][i] = \ np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) if 0: keep = nms(all_boxes[j][i], 0.3) vis_detections(im, imdb.classes[j], all_boxes[j][i][keep, :]) _t['misc'].toc() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time) for j in xrange(1, imdb.num_classes): for i in xrange(num_images): inds = np.where(all_boxes[j][i][:, -1] > thresh[j])[0] all_boxes[j][i] = all_boxes[j][i][inds, :] det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Applying NMS to all detections' nms_dets = apply_nms(all_boxes, cfg.TEST.NMS) print 'Evaluating detections' imdb.evaluate_detections(nms_dets, output_dir)
caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpu_id) cfg.GPU_ID = args.gpu_id net = caffe.Net(prototxt, caffemodel, caffe.TEST) print '\n\nLoaded network {:s}'.format(caffemodel) # Warmup on a dummy image im = 128 * np.ones((300, 500, 3), dtype=np.uint8) for i in xrange(2): _, _ = im_detect(net, im) images = sorted(glob.glob(os.path.join(args.frames_dir, '*'))) print("Processing {}: {} files... ".format(args.frames_dir, len(images))), sys.stdout.flush if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) timer = Timer() timer.tic() for image in images: if args.debug: print("Processing file {}".format(image)) detection(net, image) timer.toc() print "{:.2f} min, {:.2f} fps".format( (timer.total_time) / 60., 1. * len(images) / (timer.total_time))
def main(): opt = TrainOptions() args = opt.initialize() os.environ["CUDA_VISIBLE_DEVICES"] = args.GPU _t = {'iter time': Timer()} model_name = args.source + '_to_' + args.target if not os.path.exists(args.snapshot_dir): os.makedirs(args.snapshot_dir) os.makedirs(os.path.join(args.snapshot_dir, 'logs')) opt.print_options(args) sourceloader, targetloader = CreateSrcDataLoader( args), CreateTrgDataLoader(args) sourceloader_iter, targetloader_iter = iter(sourceloader), iter( targetloader) pseudotrgloader = CreatePseudoTrgLoader(args) pseudoloader_iter = iter(pseudotrgloader) model, optimizer = CreateModel(args) start_iter = 0 if args.restore_from is not None: start_iter = int(args.restore_from.rsplit('/', 1)[1].rsplit('_')[1]) if args.restore_optim_from is not None: optimizer.load_state_dict(torch.load(args.restore_optim_from)) for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() cudnn.enabled = True cudnn.benchmark = True model.train() model.cuda() wandb.watch(model, log='gradient', log_freq=1) # losses to log loss = ['loss_seg_src', 'loss_seg_psu'] loss_train = 0.0 loss_val = 0.0 loss_pseudo = 0.0 loss_train_list = [] loss_val_list = [] loss_pseudo_list = [] mean_img = torch.zeros(1, 1) class_weights = Variable(CS_weights).cuda() _t['iter time'].tic() for i in range(start_iter, args.num_steps): model.adjust_learning_rate(args, optimizer, i) # adjust learning rate optimizer.zero_grad() # zero grad src_img, src_lbl, _, _ = sourceloader_iter.next() # new batch source trg_img, trg_lbl, _, _ = targetloader_iter.next() # new batch target psu_img, psu_lbl, _, _ = pseudoloader_iter.next() scr_img_copy = src_img.clone() if mean_img.shape[-1] < 2: B, C, H, W = src_img.shape mean_img = IMG_MEAN.repeat(B, 1, H, W) #-------------------------------------------------------------------# # 1. source to target, target to target src_in_trg = FDA_source_to_target(src_img, trg_img, L=args.LB) # src_lbl trg_in_trg = trg_img # 2. subtract mean src_img = src_in_trg.clone() - mean_img # src_1, trg_1, src_lbl trg_img = trg_in_trg.clone() - mean_img # trg_1, trg_0, trg_lbl psu_img = psu_img.clone() - mean_img #-------------------------------------------------------------------# # evaluate and update params ##### src_img, src_lbl = Variable(src_img).cuda(), Variable( src_lbl.long()).cuda() # to gpu src_seg_score = model(src_img, lbl=src_lbl, weight=class_weights, ita=args.ita) # forward pass loss_seg_src = model.loss_seg # get loss loss_ent_src = model.loss_ent # use pseudo label as supervision psu_img, psu_lbl = Variable(psu_img).cuda(), Variable( psu_lbl.long()).cuda() psu_seg_score = model(psu_img, lbl=psu_lbl, weight=class_weights, ita=args.ita) loss_seg_psu = model.loss_seg loss_ent_psu = model.loss_ent loss_all = loss_seg_src + (loss_seg_psu + args.entW * loss_ent_psu ) # loss of seg on src, and ent on s and t loss_all.backward() optimizer.step() loss_train += loss_seg_src.detach().cpu().numpy() loss_val += loss_seg_psu.detach().cpu().numpy() if (i + 1) % args.save_pred_every == 0: print('taking snapshot ...') torch.save( model.state_dict(), os.path.join(args.snapshot_dir, '%s_' % (args.source) + str(i + 1) + '.pth')) torch.save( optimizer.state_dict(), os.path.join(args.snapshot_dir_optim, '%s_' % (args.source) + '.pth')) wandb.log({ "src seg loss": loss_seg_src.data, "psu seg loss": loss_seg_psu.data, "learnign rate": optimizer.param_groups[0]['lr'] * 10000 }) if (i + 1) % args.print_freq == 0: _t['iter time'].toc(average=False) print('[it %d][src seg loss %.4f][psu seg loss %.4f][lr %.4f][%.2fs]' % \ (i + 1, loss_seg_src.data, loss_seg_psu.data, optimizer.param_groups[0]['lr']*10000, _t['iter time'].diff) ) sio.savemat(args.tempdata, { 'src_img': src_img.cpu().numpy(), 'trg_img': trg_img.cpu().numpy() }) loss_train /= args.print_freq loss_val /= args.print_freq loss_train_list.append(loss_train) loss_val_list.append(loss_val) sio.savemat(args.matname, { 'loss_train': loss_train_list, 'loss_val': loss_val_list }) loss_train = 0.0 loss_val = 0.0 if i + 1 > args.num_steps_stop: print('finish training') break _t['iter time'].tic()
def train_model(self, sess, max_iters): # Build data layers for both training and validation set self.data_layer = RoIDataLayer(self.roidb, self.imdb.num_classes) self.data_layer_val = RoIDataLayer(self.valroidb, self.imdb.num_classes, random=True) # Construct the computation graph lr, train_op = self.construct_graph(sess) # Find previous snapshots if there is any to restore from lsf, nfiles, sfiles = self.find_previous() # Initialize the variables or restore them from the last snapshot if lsf == 0: rate, last_snapshot_iter, stepsizes, np_paths, ss_paths = self.initialize( sess) else: rate, last_snapshot_iter, stepsizes, np_paths, ss_paths = self.restore( sess, str(sfiles[-1]), str(nfiles[-1])) timer = Timer() iter = last_snapshot_iter + 1 last_summary_time = time.time() # Make sure the lists are not empty stepsizes.append(max_iters) stepsizes.reverse() next_stepsize = stepsizes.pop() while iter < max_iters + 1: # Learning rate if iter == next_stepsize + 1: # Add snapshot here before reducing the learning rate self.snapshot(sess, iter) rate *= cfg.TRAIN.GAMMA sess.run(tf.assign(lr, rate)) next_stepsize = stepsizes.pop() timer.tic() # Get training data, one batch at a time blobs = self.data_layer.forward() now = time.time() if iter == 1 or now - last_summary_time > cfg.TRAIN.SUMMARY_INTERVAL: # Compute the graph with summary rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss, summary = \ self.net.train_step_with_summary(sess, blobs, train_op) self.writer.add_summary(summary, float(iter)) # Also check the summary on the validation set # todo: wn to modify blobs_val = self.data_layer_val.forward() summary_val = self.net.get_summary(sess, blobs_val) self.valwriter.add_summary(summary_val, float(iter)) last_summary_time = now else: # Compute the graph without summary rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = \ self.net.train_step(sess, blobs, train_op) timer.toc() # Display training information if iter % (cfg.TRAIN.DISPLAY) == 0: print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n ' '>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n >>> lr: %f' % \ (iter, max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, lr.eval())) print('speed: {:.3f}s / iter'.format(timer.average_time)) # Snapshotting if iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter ss_path, np_path = self.snapshot(sess, iter) np_paths.append(np_path) ss_paths.append(ss_path) # Remove the old snapshots if there are too many if len(np_paths) > cfg.TRAIN.SNAPSHOT_KEPT: self.remove_snapshot(np_paths, ss_paths) iter += 1 if last_snapshot_iter != iter - 1: self.snapshot(sess, iter - 1) self.writer.close() self.valwriter.close()
def extractObjects(self, video_path): import os import cv2 import torch import numpy as np from torch.multiprocessing import Pool from darknet import Darknet19 import utils.yolo as yolo_utils import utils.network as net_utils from utils.timer import Timer import cfgs.config as cfg def preprocess(fname): # return fname image = cv2.imread(fname) im_data = np.expand_dims( yolo_utils.preprocess_test((image, None, cfg.inp_size))[0], 0) return image, im_data # hyper-parameters # npz_fname = 'models/yolo-voc.weights.npz' # h5_fname = 'models/yolo-voc.weights.h5' trained_model = cfg.trained_model # trained_model = os.path.join(cfg.train_output_dir, 'darknet19_voc07trainval_exp3_158.h5') thresh = 0.5 im_path = video_path # --- net = Darknet19() net_utils.load_net(trained_model, net) # net.load_from_npz(npz_fname) # net_utils.save_net(h5_fname, net) net.cuda() net.eval() print('load model succ...') t_det = Timer() t_total = Timer() # im_fnames = ['person.jpg'] im_fnames = sorted([ fname for fname in sorted(os.listdir(im_path)) if os.path.splitext(fname)[-1] == '.jpg' ]) im_fnames = (os.path.join(im_path, fname) for fname in im_fnames) objectDetect = [] for i, (image) in enumerate(im_fnames): t_total.tic() im_data = preprocess(image) image = im_data[0] im_data = im_data[1] im_data = net_utils.np_to_variable(im_data, is_cuda=True, volatile=True).permute( 0, 3, 1, 2) t_det.tic() bbox_pred, iou_pred, prob_pred = net(im_data) det_time = t_det.toc() # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() # print bbox_pred.shape, iou_pred.shape, prob_pred.shape bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh) objectDetect.append(','.join( set([cfg.label_names[i] for i in cls_inds]))) return objectDetect
def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30): ''' Processes the video sequence given and provides the output of tracking result (write the results in video file) It uses JDE model for getting information about the online targets present. Parameters ---------- opt : Namespace Contains information passed as commandline arguments. dataloader : LoadVideo Instance of LoadVideo class used for fetching the image sequence and associated data. data_type : String Type of dataset corresponding(similar) to the given video. result_filename : String The name(path) of the file for storing results. save_dir : String Path to the folder for storing the frames containing bounding box information (Result frames). show_image : bool Option for shhowing individial frames during run-time. frame_rate : int Frame-rate of the given video. Returns ------- (Returns are not significant here) frame_id : int Sequence number of the last sequence ''' ''' width = dataloader.vw height = dataloader.vh ''' width = 640 height = 480 ''' process = ( ffmpeg #new added re #new added preset ultrafast (try different mode if not ok) .input('pipe:', format = 'rawvideo', pix_fmt = 'rgb24', s = '{}x{}'.format(width, height), re = None) #new added #.setpts('1.7*PTS') .output('../try.m3u8', format = 'hls', pix_fmt = 'yuv420p', vcodec = 'libx264', preset = "ultrafast", hls_time = 10, hls_list_size = 2, start_number = 0, hls_flags = 'delete_segments+append_list', hls_segment_filename = '../try_%05d.ts') .overwrite_output() .run_async(pipe_stdin = True) ) ''' track_id = 0 if save_dir: mkdir_if_missing(save_dir) tracker = JDETracker(opt, frame_rate=frame_rate) timer = Timer() results = [] frame_id = 0 cv2.namedWindow('online_im') cv2.setMouseCallback('online_im', mouse_click) #ffmpeg process for path, img, img0 in dataloader: if frame_id % 20 == 0: logger.info('Processing frame {} ({:.2f} fps)'.format( frame_id, 1. / max(1e-5, timer.average_time))) # run tracking timer.tic() blob = torch.from_numpy(img).cuda().unsqueeze(0) online_targets = tracker.update(blob, img0) online_tlwhs = [] online_ids = [] for t in online_targets: tlwh = t.tlwh tid = t.track_id vertical = tlwh[2] / tlwh[3] > 1.6 if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(tid) timer.toc() # save results results.append((frame_id + 1, online_tlwhs, online_ids)) if show_image or save_dir is not None: online_im, track_id = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id, fps=1. / timer.average_time, single=single, mouse_x=mouse_x, mouse_y=mouse_y, track_id=track_id) if show_image: pass #cv2.imshow('online_im', online_im) #cv2.waitKey(1) #plt.imshow(online_im) #plt.show() #online_im_rgb = cv2.cvtColor(online_im, cv2.COLOR_BGR2RGB) #write_frame(process, online_im_rgb) stream(online_im) if save_dir is not None: cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im) frame_id += 1 # save results write_results(result_filename, results, data_type) # close process #close_process(process) terminate_stream() return frame_id, timer.average_time, timer.calls
def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005): if not os.path.exists(save_folder): os.mkdir(save_folder) # dump predictions and assoc. ground truth to text file for now num_images = len(testset) num_classes = (21, 81)[args.dataset == 'COCO'] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(save_folder, 'detections.pkl') if args.retest: f = open(det_file,'rb') all_boxes = pickle.load(f) print('Evaluating detections') testset.evaluate_detections(all_boxes, save_folder) return for i in range(num_images): img = testset.pull_image(i) x = Variable(transform(img).unsqueeze(0),volatile=True) if cuda: x = x.cuda() _t['im_detect'].tic() out = net(x=x, test=True) # forward pass arm_loc,arm_conf,odm_loc,odm_conf = out boxes, scores = detector.forward((odm_loc,odm_conf), priors,(arm_loc,arm_conf)) detect_time = _t['im_detect'].toc() boxes = boxes[0] scores=scores[0] boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]).cpu().numpy() boxes *= scale _t['misc'].tic() for j in range(1, num_classes): inds = np.where(scores[:, j] > thresh)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) if args.dataset == 'VOC': cpu = False else: cpu = False keep = nms(c_dets, 0.45, force_cpu=cpu) keep = keep[:50] c_dets = c_dets[keep, :] all_boxes[j][i] = c_dets if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1,num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' .format(i + 1, num_images, detect_time, nms_time)) _t['im_detect'].clear() _t['misc'].clear() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') if args.dataset == 'VOC': APs,mAP = testset.evaluate_detections(all_boxes, save_folder) return APs,mAP else: testset.evaluate_detections(all_boxes, save_folder)
opt_param = list(net.parameters()) optimizer = torch.optim.SGD(opt_param[2:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir): os.makedirs(output_dir) # training firstFlag = True train_loss = 0 tp, tf, fg, bg = 0., 0., 0, 0 step_cnt = 0 re_cnt = False t = Timer() t.tic() for step in range(start_step, end_step + 1): # get one batch blobs = data_layer.forward() im_data = blobs['data'] rois = blobs['rois'] im_info = blobs['im_info'] gt_vec = blobs['labels'] #gt_boxes = blobs['gt_boxes'] # forward net(im_data, rois, im_info, gt_vec) loss = net.loss train_loss += loss.item()
def test_net(net, imdb, thresh=0.05, visualize=False, no_cache=False, output_path=None): """ Testing the SSH network on a dataset :param net: The trained network :param imdb: The test imdb :param thresh: Detections with a probability less than this threshold are ignored :param visualize: Whether to visualize the detections :param no_cache: Whether to cache detections or not :param output_path: Output directory """ # Initializing the timers print('Evaluating {} on {}'.format(net.name, imdb.name)) timers = {'detect': Timer(), 'misc': Timer()} dets = [[[] for _ in xrange(len(imdb))] for _ in xrange(imdb.num_classes)] # NOTE: by default the detections for a given method is cached, set no_cache to disable caching! run_inference = True if not no_cache: output_dir = get_output_dir(imdb_name=imdb.name, net_name=net.name, output_dir=output_path) det_file = os.path.join(output_dir, 'detections.pkl') if os.path.exists(det_file) and not visualize: try: with open(det_file, 'r') as f: dets = cPickle.load(f) run_inference = False print('Loading detections from cache: {}'.format(det_file)) except: print( 'Could not load the cached detections file, detecting from scratch!' ) # Perform inference on images if necessary if run_inference: pyramid = True if len(cfg.TEST.SCALES) > 1 else False for i in xrange(len(imdb)): im_path = imdb.image_path_at(i) dets[1][i], detect_time = detect(net, im_path, thresh, visualize=visualize, timers=timers, pyramid=pyramid) print('\r{:d}/{:d} detect-time: {:.3f}s, misc-time:{:.3f}s'.format( i + 1, len(imdb), timers['detect'].average_time, timers['misc'].average_time), end='') det_file = os.path.join(output_dir, 'detections.pkl') if not no_cache: with open(det_file, 'wb') as f: cPickle.dump(dets, f, cPickle.HIGHEST_PROTOCOL) print('\n', end='') # Evaluate the detections print('Evaluating detections') imdb.evaluate_detections(all_boxes=dets, output_dir=output_dir, method_name=net.name) print('All Done!')
from interface import compare_vector, Detect, Reid import cv2 import multiprocessing from multiprocessing import Process, Queue import threading from threading import Lock import time import zmq import sys import signal import datetime from configparser import ConfigParser # import matplotlib.pyplot as plt _t = {'detect': Timer(), 'reid': Timer()} r''' port: 11111 to receive message from system port: 11112 to send the result of recognition ''' def quit(signum, frame): zmq_process.stop() # os.kill(zmq_process.pid, signal.SIGTERM) for i in processes: os.kill(i.pid, signal.SIGTERM) sys.exit() class ZMQReID(threading.Thread):
from utils.timer import Timer import numpy as np import time caffe.set_mode_gpu() caffe.set_device(3) #cfg_from_file("/tmp/test/submit_1019.yml") cfg_from_file("/tmp/test/submit_0716.yml") prototxt = "/tmp/test/weaponModel_test.prototxt" caffemodel = "/tmp/test/weaponModel_iter_6000.caffemodel" net = caffe.Net(prototxt, caffemodel, caffe.TEST) im = cv2.imread("/tmp/test/test.jpg") _t = { 'im_preproc': Timer(), 'im_net': Timer(), 'im_postproc': Timer(), 'misc': Timer() } scores, boxes = im_detect(net, im, _t) for i in range(10): _s = time.time() scores, boxes = im_detect(net, im, _t) _e = time.time() print "time: %s" % (_e - _s) time.sleep(1)
def detect(net, im_path, thresh=0.05, visualize=False, timers=None, pyramid=False, visualization_folder=None): """ Main module to detect faces :param net: The trained network :param im_path: The path to the image :param thresh: Detection with a less score than thresh are ignored :param visualize: Whether to visualize the detections :param timers: Timers for calculating detect time (if None new timers would be created) :param pyramid: Whether to use pyramid during inference :param visualization_folder: If set the visualizations would be saved in this folder (if visualize=True) :return: cls_dets (bounding boxes concatenated with scores) and the timers """ if not timers: timers = {'detect': Timer(), 'misc': Timer()} im = cv2.imread(im_path) imfname = os.path.basename(im_path) sys.stdout.flush() timers['detect'].tic() if not pyramid: im_scale = _compute_scaling_factor(im.shape, cfg.TEST.SCALES[0], cfg.TEST.MAX_SIZE) im_blob = _get_image_blob(im, [im_scale]) probs, boxes = forward_net(net, im_blob[0], im_scale, False) boxes = boxes[:, 0:4] else: all_probs = [] all_boxes = [] # Compute the scaling coefficients for the pyramid base_scale = _compute_scaling_factor(im.shape, cfg.TEST.PYRAMID_BASE_SIZE[0], cfg.TEST.PYRAMID_BASE_SIZE[1]) pyramid_scales = [ float(scale) / cfg.TEST.PYRAMID_BASE_SIZE[0] * base_scale for scale in cfg.TEST.SCALES ] im_blobs = _get_image_blob(im, pyramid_scales) for i in range(len(pyramid_scales)): probs, boxes = forward_net(net, im_blobs[i], pyramid_scales[i], True) for j in xrange(len(probs)): # Do not apply M3 to the largest scale if i < len(pyramid_scales) - 1 or j < len(probs) - 1: all_boxes.append(boxes[j][:, 0:4]) all_probs.append(probs[j].copy()) probs = np.concatenate(all_probs) boxes = np.concatenate(all_boxes) timers['detect'].toc() timers['misc'].tic() inds = np.where(probs[:, 0] > thresh)[0] probs = probs[inds, 0] boxes = boxes[inds, :] dets = np.hstack((boxes, probs[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(dets, cfg.TEST.NMS_THRESH) cls_dets = dets[keep, :] if visualize: plt_name = os.path.splitext(imfname)[0] + '_detections_{}'.format( net.name) visusalize_detections(im, cls_dets, plt_name=plt_name, visualization_folder=visualization_folder) timers['misc'].toc() return cls_dets, timers
def main(): parser = argparse.ArgumentParser() parser.add_argument("--job_name", type=str, default="") parser.add_argument("--task_index", type=int, default=0) parser.add_argument('--debug', default=False, type=bool) parser.add_argument('--stop_globalstep', default=2000, type=int) parser.add_argument('--checkpoint_dir', default="checkpoint_dir",type=str) parser.add_argument('--watch_gpu',required=True ,type=int, help="watch gpu id filled Set it the same as visible gpu id") parser.add_argument('--warm_up_step',default = 20, type = int) profiler_save_steps = cfg.PROFILER_SAVE_STEP summary_save_steps = cfg.SUMMARY_SAVE_STEP FLAGS, unparsed = parser.parse_known_args() ps_hosts = cfg.PS_HOSTS.split(",") worker_hosts = cfg.WORKER_HOSTS.split(",") ps_size = len(ps_hosts) workers_size = len(worker_hosts) dispipe_dir="DisPipe_"+str(workers_size)+"workers"+str(ps_size)+"ps"+"_train_logs" if not os.path.exists(dispipe_dir): os.makedirs(dispipe_dir) inside_bsnQnM_dir = "Dis_Pipe_"+cfg.BS_NT_MUL_PREFIX logrootpath = os.path.join(dispipe_dir, inside_bsnQnM_dir) if not os.path.exists(logrootpath): os.makedirs(logrootpath) fpslog_name = "DisPipe_" +"task"+str(FLAGS.task_index) +cfg.BS_NT_MUL_PREFIX+ "_fpslog.txt" concated_path = logrootpath + "/" + fpslog_name checkpoint_dir = FLAGS.checkpoint_dir if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) gpulog_name = "DisPipe" + "_task" + str(FLAGS.task_index)+"gpu"+str(FLAGS.watch_gpu)+cfg.BS_NT_MUL_PREFIX + "_gpulog.txt" ############ ###########################gpulog################################# def start_gpulog(path, fname): # has to be called before start of training gpuinfo_path = path + "/" + fname with open(gpuinfo_path, 'w'): argument = 'timestamp,count,gpu_name,gpu_bus_id,memory.total,memory.used,utilization.gpu,utilization.memory' try: proc = subprocess.Popen( ['nvidia-smi --format=csv --query-gpu=%s %s %s %s' % (argument, ' -l', '-i '+ str(FLAGS.watch_gpu), '-f ' + gpuinfo_path)],shell=True) except KeyboardInterrupt: try: proc.kill() except OSError: pass proc.wait() return proc initial_learning_rate = cfg.LEARNING_RATE decay_steps = cfg.DECAY_STEPS decay_rate = cfg.DECAY_RATE staircase = cfg.STAIRCASE #os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU print('Start training ...') ###############################pipeline########################################### tf.reset_default_graph() image_producer = Pascal_voc('train') (image, label) = image_producer.get_one_image_label_element() image_shape = (image_producer.image_size, image_producer.image_size, 3) # possible value is a int number label_size = (image_producer.cell_size, image_producer.cell_size, 25) # possible value is 0 or 1 processed_queue = tf.FIFOQueue(capacity=int(image_producer.batch_size * cfg.MUL_QUEUE_BATCH),shapes = [image_shape, label_size],dtypes = [tf.float32, tf.float32],name = 'processed_queue') enqueue_processed_op = processed_queue.enqueue([image, label]) num_enqueue_threads = min(image_producer.num_enqueue_threads, image_producer.gt_labels_length) queue_runner = tf.train.QueueRunner(processed_queue, [enqueue_processed_op] * num_enqueue_threads) tf.train.add_queue_runner(queue_runner) (images, labels) = processed_queue.dequeue_many(image_producer.batch_size) ############################################################################## #############################Parameters####################################### cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) with tf.device(tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_index,cluster=cluster)): yolo = YOLONet(images, labels) # print('allocate variable and tensor successfuly') global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay( initial_learning_rate, global_step, decay_steps, decay_rate, staircase, name='learning_rate') optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate) train_op = slim.learning.create_train_op( yolo.total_loss, optimizer, global_step=global_step) ################################################################################ #############################loghook############################################ profiler_hook = tf.train.ProfilerHook(save_steps=profiler_save_steps, output_dir=logrootpath, show_memory=True,show_dataflow=True) summary_op = tf.summary.merge_all() summary_hook = tf.train.SummarySaverHook(save_steps=summary_save_steps, output_dir=logrootpath, summary_op=summary_op) if FLAGS.debug == True: tensors_to_log = [global_step, yolo.total_loss] def formatter(curvals): print("Global step %d, Loss %f!" % ( curvals[global_step], curvals[yolo.total_loss])) logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=100, formatter=formatter) hooks = [tf.train.StopAtStepHook(last_step=FLAGS.stop_globalstep), logging_hook, profiler_hook, summary_hook] else: hooks = [tf.train.StopAtStepHook(last_step=FLAGS.stop_globalstep), profiler_hook, summary_hook] # config.gpu_options.allocator_type = 'BFC' # config.gpu_options.per_process_gpu_memory_fraction = 0.8 config = tf.ConfigProto(allow_soft_placement = True, log_device_placement=False) config.gpu_options.allow_growth = True proc = start_gpulog(logrootpath, gpulog_name) ################################################################################ ###########################train#################################################### with tf.train.MonitoredTrainingSession(master=server.target, is_chief=(FLAGS.task_index == 0), config=config,hooks=hooks, checkpoint_dir=FLAGS.checkpoint_dir,save_checkpoint_secs=3600) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) start_global_step_value = sess.run(global_step) timer = Timer() iters_per_toc = 20 txtForm = "Training speed:global step %d, local avg %f fps, global %f fps, loss %f" #run and log n = 0 while not sess.should_stop(): n = n + 1 if n==FLAGS.warm_up_step: start_global_step_value = sess.run(global_step) timer.tic(global_restart=True, start_global_step_value = start_global_step_value) if n % iters_per_toc ==0: timer.tic() yolo_loss, global_step_value, _ = sess.run([yolo.total_loss, global_step, train_op]) if n % iters_per_toc == 0: local_avg_fps, global_avg_fps = timer.toc(iters_per_toc, global_step_value) txtData = global_step_value, local_avg_fps, global_avg_fps, yolo_loss print(txtForm % txtData) with open(concated_path, 'a+') as log: log.write("%d,%.4f,%.4f,%.4f\n" % txtData) coord.request_stop() coord.join(threads) print('Done training.') try: proc.terminate() except OSError: pass print("Kill subprocess failed. Kill nvidia-smi mannually")
nn.Conv2d(160, anchor_num[4] * num_classes, kernel_size=3, padding=1) ] # ===================================================================================# loc_layers += [nn.Conv2d(160, anchor_num[5] * 4, kernel_size=3, padding=1)] conf_layers += [ nn.Conv2d(160, anchor_num[5] * num_classes, kernel_size=3, padding=1) ] return (loc_layers, conf_layers) def build_net(phase, size=300, num_classes=21): if phase != "test" and phase != "train": print("Error: Phase not recognized") return return MobileNetV3(phase, size, multibox(num_classes), num_classes) if __name__ == '__main__': x = torch.randn(1, 3, 300, 300) net = build_net('test') net.eval() from utils.timer import Timer _t = {'im_detect': Timer()} for i in range(300): _t['im_detect'].tic() net.forward(x) detect_time = _t['im_detect'].toc() print(detect_time)
def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False,test='01'): """Test a Fast R-CNN network on an image database.""" num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] # timers _t = {'im_detect' : Timer(), 'misc' : Timer()} # results file directory: data/test_results/ result_dir = os.path.join(cfg.DATA_DIR,'test_results') for i in xrange(num_images): # filter out any ground truth boxes box_proposals = None # generate test results file according to the rules sequence = imdb._image_index[i].split('/')[0] frame = int(imdb._image_index[i].split('/')[1].split('g')[-1]) test_file = os.path.join(result_dir, sequence.split('_')[0] + '_' + sequence.split('_')[1] + '_Det_EB.txt') #print 'test_file:', test_file f = open(test_file, 'a') im = cv2.imread(imdb.image_path_at(i)) _t['im_detect'].tic() print _t['im_detect'].start_time scores, boxes = im_detect(net, im, box_proposals) _t['misc'].tic() # skip j = 0, because it's the background class for j in xrange(1, imdb.num_classes): #print 'last number of proposal:',scores.shape inds = np.where(scores[:, j] > thresh)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j*4:(j+1)*4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \ .astype(np.float32, copy=False) keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] if vis: vis_detections(im, imdb.classes[j], cls_dets) all_boxes[j][i] = cls_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] _t['misc'].toc() for k in range(len(all_boxes[1][i])): temp = str(frame) + ',' + str(k + 1) + ',' + str(all_boxes[1][i][k][0]) + ',' + str( all_boxes[1][i][k][1]) + ',' + str( all_boxes[1][i][k][2] - all_boxes[1][i][k][0]) + ',' + str( all_boxes[1][i][k][3] - all_boxes[1][i][k][1]) + ',' + str(all_boxes[1][i][k][4]) + '\n' f.write(temp) f.close() print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, _t['im_detect'].average_time, _t['misc'].average_time)
def train(self): train_timer = Timer() load_timer = Timer() sum_loss = np.zeros([cfg.MAX_ITER + 1], dtype=float) #plt.axis([0, cfg.MAX_ITER, cfg.AX_LOW, cfg.AX_HIGHT]) #plt.ion() for step in xrange(1, self.max_iter + 1): load_timer.tic() images, labels = self.data.get() load_timer.toc() feed_dict = {self.net.images: images, self.net.labels: labels} if step % self.summary_iter == 0: if step % (self.summary_iter * 10) == 0: train_timer.tic() summary_str, loss, _ = self.sess.run( [self.summary_op, self.net.total_loss, self.train_op], feed_dict=feed_dict) sum_loss[step] = loss #plt.scatter(step,loss) #plt.pause(0.1) train_timer.toc() log_str = ( '{} Epoch: {}, Step: {}, Learning rate: {},' ' Loss: {:5.3f}\nSpeed: {:.3f}s/iter,' ' Load: {:.3f}s/iter, Remain: {}').format( datetime.datetime.now().strftime('%m/%d %H:%M:%S'), self.data.epoch, int(step) + cfg.LAST_STEP, round(self.learning_rate.eval(session=self.sess), 6), loss, train_timer.average_time, load_timer.average_time, train_timer.remain(step, self.max_iter)) print(log_str) with open(self.train_process_save_txt, 'a') as f: f.writelines(log_str + '\n') else: train_timer.tic() summary_str, loss = self.sess.run( [self.summary_op, self.train_op], feed_dict=feed_dict) sum_loss[step] = loss train_timer.toc() #print('\nb') #print(summary_str) self.writer.add_summary(summary_str, step) else: train_timer.tic() loss = self.sess.run(self.train_op, feed_dict=feed_dict) sum_loss[step] = loss train_timer.toc() #print('q') if step % self.save_iter == 0: print('Saving checkpoint file to:{}-{}'.format( self.ckpt_file, step + cfg.LAST_STEP)) self.saver.save(self.sess, self.ckpt_file, global_step=self.global_step + cfg.LAST_STEP) with open(self.train_process_save_txt, 'a') as f: f.writelines('Saving checkpoint file to:{}-{}\n'.format( self.ckpt_file, step + cfg.LAST_STEP))
def train_model(self, sess, max_iters): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) part_features_fc7 = self.net.get_output('pool_5')[:self.proposal_number, :] part_features_fc71 = self.net1.get_output('pool_5')[:self.proposal_number, :] part_features_fc72 = self.net2.get_output('pool_5')[:self.proposal_number, :] part_features_fc73 = self.net3.get_output('pool_5')[:self.proposal_number, :] part_features_fc74 = self.net4.get_output('pool_5')[:self.proposal_number, :] part_features_fc75 = self.net5.get_output('pool_5')[:self.proposal_number, :] part_features_fc76 = self.net6.get_output('pool_5')[:self.proposal_number, :] part_features_fc77 = self.net7.get_output('pool_5')[:self.proposal_number, :] part_features_fc78 = self.net8.get_output('pool_5')[:self.proposal_number, :] part_features_fc79 = self.net9.get_output('pool_5')[:self.proposal_number, :] part_features_fc710 = self.net10.get_output('pool_5')[:self.proposal_number, :] part_features_fc711 = self.net11.get_output('pool_5')[:self.proposal_number, :] part_features_fc712 = self.net12.get_output('pool_5')[:self.proposal_number, :] part_features_fc713 = self.net13.get_output('pool_5')[:self.proposal_number, :] part_features_fc714 = self.net14.get_output('pool_5')[:self.proposal_number, :] part_features_fc715 = self.net15.get_output('pool_5')[:self.proposal_number, :] part_features_fc716 = self.net16.get_output('pool_5')[:self.proposal_number, :] part_features_fc717 = self.net17.get_output('pool_5')[:self.proposal_number, :] part_features_fc718 = self.net18.get_output('pool_5')[:self.proposal_number, :] part_features_fc719 = self.net19.get_output('pool_5')[:self.proposal_number, :] #print(part_features) # learning matrix 1 Matrix_L1_S1 = tf.get_variable('L1_S1', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer( stddev=1 / math.sqrt(self.feature_size * self.feature_size))) # learning matrix 2 Matrix_L1_S2 = tf.get_variable('L1_S2', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer( stddev=1 / math.sqrt(self.feature_size * self.feature_size))) # # learning matrix 3 # Matrix_L2_S1 = tf.get_variable('L2_S1', [self.feature_size, self.feature_size], initializer=tf.random_normal_initializer( # stddev=1 / math.sqrt(self.feature_size * self.feature_size))) # learning matrix 4 #Matrix_L1_S3 = tf.get_variable('L1_S3', [self.hidden_size, self.hidden_size], # initializer=tf.random_normal_initializer( # stddev=1 / math.sqrt(self.hidden_size * self.hidden_size))) ################################ #### get the region feature #### ######### max pooling ########## ################################ part_features_fc7 = tf.reduce_max(tf.reshape(part_features_fc7, [self.proposal_number, 49, 512]), axis=1) part_features_fc71 = tf.reduce_max(tf.reshape(part_features_fc71, [self.proposal_number, 49, 512]), axis=1) part_features_fc72 = tf.reduce_max(tf.reshape(part_features_fc72, [self.proposal_number, 49, 512]), axis=1) part_features_fc73 = tf.reduce_max(tf.reshape(part_features_fc73, [self.proposal_number, 49, 512]), axis=1) part_features_fc74 = tf.reduce_max(tf.reshape(part_features_fc74, [self.proposal_number, 49, 512]), axis=1) part_features_fc75 = tf.reduce_max(tf.reshape(part_features_fc75, [self.proposal_number, 49, 512]), axis=1) part_features_fc76 = tf.reduce_max(tf.reshape(part_features_fc76, [self.proposal_number, 49, 512]), axis=1) part_features_fc77 = tf.reduce_max(tf.reshape(part_features_fc77, [self.proposal_number, 49, 512]), axis=1) part_features_fc78 = tf.reduce_max(tf.reshape(part_features_fc78, [self.proposal_number, 49, 512]), axis=1) part_features_fc79 = tf.reduce_max(tf.reshape(part_features_fc79, [self.proposal_number, 49, 512]), axis=1) part_features_fc710 = tf.reduce_max(tf.reshape(part_features_fc710, [self.proposal_number, 49, 512]), axis=1) part_features_fc711 = tf.reduce_max(tf.reshape(part_features_fc711, [self.proposal_number, 49, 512]), axis=1) part_features_fc712 = tf.reduce_max(tf.reshape(part_features_fc712, [self.proposal_number, 49, 512]), axis=1) part_features_fc713 = tf.reduce_max(tf.reshape(part_features_fc713, [self.proposal_number, 49, 512]), axis=1) part_features_fc714 = tf.reduce_max(tf.reshape(part_features_fc714, [self.proposal_number, 49, 512]), axis=1) part_features_fc715 = tf.reduce_max(tf.reshape(part_features_fc715, [self.proposal_number, 49, 512]), axis=1) part_features_fc716 = tf.reduce_max(tf.reshape(part_features_fc716, [self.proposal_number, 49, 512]), axis=1) part_features_fc717 = tf.reduce_max(tf.reshape(part_features_fc717, [self.proposal_number, 49, 512]), axis=1) part_features_fc718 = tf.reduce_max(tf.reshape(part_features_fc718, [self.proposal_number, 49, 512]), axis=1) part_features_fc719 = tf.reduce_max(tf.reshape(part_features_fc719, [self.proposal_number, 49, 512]), axis=1) #######get model parts ######### #part_features = tf.stack([part_features_fc7, part_features_fc71], axis=0) #part_features = tf.concat([part_features, [part_features_fc72]], axis=0) #part_features = tf.concat([part_features, [part_features_fc73]], axis=0) #part_features = tf.concat([part_features, [part_features_fc74]], axis=0) #part_features = tf.concat([part_features, [part_features_fc75]], axis=0) #part_features = tf.concat([part_features, [part_features_fc76]], axis=0) #part_features = tf.concat([part_features, [part_features_fc77]], axis=0) #part_features = tf.concat([part_features, [part_features_fc78]], axis=0) #part_features = tf.concat([part_features, [part_features_fc79]], axis=0) #part_features = tf.concat([part_features, [part_features_fc710]], axis=0) #part_features = tf.concat([part_features, [part_features_fc711]], axis=0) ######no attention ########### # similarity = tf.constant([[1.0 / self.proposal_number]] * self.proposal_number, dtype=tf.float32) # similarity1 = similarity # similarity2 = similarity # similarity3 = similarity # similarity4 = similarity # similarity5 = similarity # similarity6 = similarity # similarity7 = similarity # similarity8 = similarity # similarity9 = similarity # similarity10 = similarity # similarity11 = similarity # part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7), axis=0, keep_dims=True) # part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71), axis=0, keep_dims=True) # part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72), axis=0, keep_dims=True) # part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73), axis=0, keep_dims=True) # part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74), axis=0, keep_dims=True) # part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75), axis=0, keep_dims=True) # part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76), axis=0, keep_dims=True) # part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77), axis=0, keep_dims=True) # part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78), axis=0, keep_dims=True) # part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79), axis=0, keep_dims=True) # part_sum10 = tf.reduce_sum(tf.multiply(similarity10, part_features_fc710), axis=0, keep_dims=True) # part_sum11 = tf.reduce_sum(tf.multiply(similarity11, part_features_fc711), axis=0, keep_dims=True) ############################## ######### L1_S1 ############## ############################## # view 0 L1_S1_Similarity = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc7, Matrix_L1_S1), tf.transpose(part_features_fc7))) similarity = tf.reduce_sum(L1_S1_Similarity, axis=0, keep_dims=True) / self.proposal_number similarity = tf.transpose(similarity) part_sum = tf.reduce_sum(tf.multiply(similarity, part_features_fc7), axis=0, keep_dims=True) # view 1 L1_S1_Similarity1 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc71, Matrix_L1_S1), tf.transpose(part_features_fc71))) similarity1 = tf.reduce_sum(L1_S1_Similarity1, axis=0, keep_dims=True) / self.proposal_number similarity1 = tf.transpose(similarity1) part_sum1 = tf.reduce_sum(tf.multiply(similarity1, part_features_fc71), axis=0, keep_dims=True) # view 2 L1_S1_Similarity2 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc72, Matrix_L1_S1), tf.transpose(part_features_fc72))) similarity2 = tf.reduce_sum(L1_S1_Similarity2, axis=0, keep_dims=True) / self.proposal_number similarity2 = tf.transpose(similarity2) part_sum2 = tf.reduce_sum(tf.multiply(similarity2, part_features_fc72), axis=0, keep_dims=True) # view 3 L1_S1_Similarity3 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc73, Matrix_L1_S1), tf.transpose(part_features_fc73))) similarity3 = tf.reduce_sum(L1_S1_Similarity3, axis=0, keep_dims=True) / self.proposal_number similarity3 = tf.transpose(similarity3) part_sum3 = tf.reduce_sum(tf.multiply(similarity3, part_features_fc73), axis=0, keep_dims=True) # view 4 L1_S1_Similarity4 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc74, Matrix_L1_S1), tf.transpose(part_features_fc74))) similarity4 = tf.reduce_sum(L1_S1_Similarity4, axis=0, keep_dims=True) / self.proposal_number similarity4 = tf.transpose(similarity4) part_sum4 = tf.reduce_sum(tf.multiply(similarity4, part_features_fc74), axis=0, keep_dims=True) # view 5 L1_S1_Similarity5 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc75, Matrix_L1_S1), tf.transpose(part_features_fc75))) similarity5 = tf.reduce_sum(L1_S1_Similarity5, axis=0, keep_dims=True) / self.proposal_number similarity5 = tf.transpose(similarity5) part_sum5 = tf.reduce_sum(tf.multiply(similarity5, part_features_fc75), axis=0, keep_dims=True) # view 6 L1_S1_Similarity6 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc76, Matrix_L1_S1), tf.transpose(part_features_fc76))) similarity6 = tf.reduce_sum(L1_S1_Similarity6, axis=0, keep_dims=True) / self.proposal_number similarity6 = tf.transpose(similarity6) part_sum6 = tf.reduce_sum(tf.multiply(similarity6, part_features_fc76), axis=0, keep_dims=True) # view 7 L1_S1_Similarity7 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc77, Matrix_L1_S1), tf.transpose(part_features_fc77))) similarity7 = tf.reduce_sum(L1_S1_Similarity7, axis=0, keep_dims=True) / self.proposal_number similarity7 = tf.transpose(similarity7) part_sum7 = tf.reduce_sum(tf.multiply(similarity7, part_features_fc77), axis=0, keep_dims=True) # view 8 L1_S1_Similarity8 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc78, Matrix_L1_S1), tf.transpose(part_features_fc78))) similarity8 = tf.reduce_sum(L1_S1_Similarity8, axis=0, keep_dims=True) / self.proposal_number similarity8 = tf.transpose(similarity8) part_sum8 = tf.reduce_sum(tf.multiply(similarity8, part_features_fc78), axis=0, keep_dims=True) # view 9 L1_S1_Similarity9 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc79, Matrix_L1_S1), tf.transpose(part_features_fc79))) similarity9 = tf.reduce_sum(L1_S1_Similarity9, axis=0, keep_dims=True) / self.proposal_number similarity9 = tf.transpose(similarity9) part_sum9 = tf.reduce_sum(tf.multiply(similarity9, part_features_fc79), axis=0, keep_dims=True) # view 10 L1_S1_Similarity10 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc710, Matrix_L1_S1), tf.transpose(part_features_fc710))) similarity10 = tf.reduce_sum(L1_S1_Similarity10, axis=0, keep_dims=True) / self.proposal_number similarity10 = tf.transpose(similarity10) part_sum10 = tf.reduce_sum(tf.multiply(similarity10, part_features_fc710), axis=0, keep_dims=True) # view 11 L1_S1_Similarity11 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc711, Matrix_L1_S1), tf.transpose(part_features_fc711))) similarity11 = tf.reduce_sum(L1_S1_Similarity11, axis=0, keep_dims=True) / self.proposal_number similarity11 = tf.transpose(similarity11) part_sum11 = tf.reduce_sum(tf.multiply(similarity11, part_features_fc711), axis=0, keep_dims=True) # view 12 L1_S1_Similarity12 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc712, Matrix_L1_S1), tf.transpose(part_features_fc712))) similarity12 = tf.reduce_sum(L1_S1_Similarity12, axis=0, keep_dims=True) / self.proposal_number similarity12 = tf.transpose(similarity12) part_sum12 = tf.reduce_sum(tf.multiply(similarity12, part_features_fc712), axis=0, keep_dims=True) # view 13 L1_S1_Similarity13 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc713, Matrix_L1_S1), tf.transpose(part_features_fc713))) similarity13 = tf.reduce_sum(L1_S1_Similarity13, axis=0, keep_dims=True) / self.proposal_number similarity13 = tf.transpose(similarity13) part_sum13 = tf.reduce_sum(tf.multiply(similarity13, part_features_fc713), axis=0, keep_dims=True) # view 14 L1_S1_Similarity14 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc714, Matrix_L1_S1), tf.transpose(part_features_fc714))) similarity14 = tf.reduce_sum(L1_S1_Similarity14, axis=0, keep_dims=True) / self.proposal_number similarity14 = tf.transpose(similarity14) part_sum14 = tf.reduce_sum(tf.multiply(similarity14, part_features_fc714), axis=0, keep_dims=True) # view 15 L1_S1_Similarity15 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc715, Matrix_L1_S1), tf.transpose(part_features_fc715))) similarity15 = tf.reduce_sum(L1_S1_Similarity5, axis=0, keep_dims=True) / self.proposal_number similarity15 = tf.transpose(similarity15) part_sum15 = tf.reduce_sum(tf.multiply(similarity15, part_features_fc715), axis=0, keep_dims=True) # view 16 L1_S1_Similarity16 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc716, Matrix_L1_S1), tf.transpose(part_features_fc716))) similarity16 = tf.reduce_sum(L1_S1_Similarity16, axis=0, keep_dims=True) / self.proposal_number similarity16 = tf.transpose(similarity16) part_sum16 = tf.reduce_sum(tf.multiply(similarity16, part_features_fc716), axis=0, keep_dims=True) # view 17 L1_S1_Similarity17 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc717, Matrix_L1_S1), tf.transpose(part_features_fc717))) similarity17 = tf.reduce_sum(L1_S1_Similarity17, axis=0, keep_dims=True) / self.proposal_number similarity17 = tf.transpose(similarity17) part_sum17 = tf.reduce_sum(tf.multiply(similarity17, part_features_fc717), axis=0, keep_dims=True) # view 18 L1_S1_Similarity18 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc718, Matrix_L1_S1), tf.transpose(part_features_fc718))) similarity18 = tf.reduce_sum(L1_S1_Similarity18, axis=0, keep_dims=True) / self.proposal_number similarity18 = tf.transpose(similarity18) part_sum18 = tf.reduce_sum(tf.multiply(similarity18, part_features_fc718), axis=0, keep_dims=True) # view 19 L1_S1_Similarity19 = tf.nn.softmax(tf.matmul(tf.matmul(part_features_fc719, Matrix_L1_S1), tf.transpose(part_features_fc719))) similarity19 = tf.reduce_sum(L1_S1_Similarity19, axis=0, keep_dims=True) / self.proposal_number similarity19 = tf.transpose(similarity19) part_sum19 = tf.reduce_sum(tf.multiply(similarity19, part_features_fc719), axis=0, keep_dims=True) # concat views view_parts = tf.concat([part_sum, part_sum1], axis=0) view_parts = tf.concat([view_parts, part_sum2], axis=0) view_parts = tf.concat([view_parts, part_sum3], axis=0) view_parts = tf.concat([view_parts, part_sum4], axis=0) view_parts = tf.concat([view_parts, part_sum5], axis=0) view_parts = tf.concat([view_parts, part_sum6], axis=0) view_parts = tf.concat([view_parts, part_sum7], axis=0) view_parts = tf.concat([view_parts, part_sum8], axis=0) view_parts = tf.concat([view_parts, part_sum9], axis=0) view_parts = tf.concat([view_parts, part_sum10], axis=0) view_parts = tf.concat([view_parts, part_sum11], axis=0) view_parts = tf.concat([view_parts, part_sum12], axis=0) view_parts = tf.concat([view_parts, part_sum13], axis=0) view_parts = tf.concat([view_parts, part_sum14], axis=0) view_parts = tf.concat([view_parts, part_sum15], axis=0) view_parts = tf.concat([view_parts, part_sum16], axis=0) view_parts = tf.concat([view_parts, part_sum17], axis=0) view_parts = tf.concat([view_parts, part_sum18], axis=0) view_parts = tf.concat([view_parts, part_sum19], axis=0) view_parts = tf.nn.l2_normalize(view_parts, 1) '''L1_S2''' L1_S2_Similarity = tf.nn.softmax(tf.matmul(tf.matmul(view_parts, Matrix_L1_S2), tf.transpose(view_parts))) view_similarity = tf.reduce_sum(L1_S2_Similarity, axis=0, keep_dims=True) / self.views view_similarity = tf.transpose(view_similarity) # view_similarity = tf.constant([[1.0 / self.views]] * self.views, dtype=tf.float32) view_sums = tf.reduce_sum(tf.multiply(view_similarity, view_parts), axis=0, keep_dims=True) view_sums = tf.nn.l2_normalize(view_sums, 1) view_sums_extend = tf.tile(view_sums,[self.views,1]) views_input = tf.add(view_parts,view_sums_extend) view_extend = [views_input] view_sequence = tf.unstack(view_extend, self.rnn_steps, 1) ######RNN Part########## ######################## ######################## outputs, states = self.build_RNN(view_sequence) outputs = tf.reshape(outputs, [-1, self.views, 4096]) #outputs = tf.concat(outputs, 1) outputs = tf.reshape(outputs, [-1, self.views, self.hidden_size]) model_feature = tf.reduce_max(outputs, 1) # classification layer # second attention part is related to the acutual classes w_init = tf.truncated_normal_initializer(stddev=0.1) b_init = tf.constant_initializer(0.1) fc2_w = tf.get_variable('fc2_w', [self.hidden_size, self.classes], dtype=tf.float32, initializer=w_init) fc2_b = tf.get_variable('fc2_b', [self.classes], dtype=tf.float32, initializer=b_init) cls_logits = tf.matmul(model_feature, fc2_w) + fc2_b cls_prob = tf.nn.softmax(cls_logits) # initializing variables saver1 = tf.train.Saver(max_to_keep=150) self.saver = saver1 sess.run(tf.global_variables_initializer()) self.saver.restore(sess, self.pretrained_model) print('loaded:%s'%(self.pretrained_model)) last_snapshot_iter = -1 timer = Timer() sums = .0 class_ac_test = True # class_ac_test = False class_acc = np.zeros(13, np.float32) cmatrix = np.zeros([13,13], np.float32) if class_ac_test == True: model_num = 732 classes_num = [100, 10, 50, 50, 100, 100, 100, 100, 20, 50, 7, 30, 15] else: model_num = 3991 classes_num = [106, 515, 889, 200, 200, 465, 200, 680, 392, 344] cnum = [[100], [10], [50], [50], [100], [100], [100], [100], [20], [50], [7], [30], [15]] for iter in range(model_num): # get one batch train_target = data_layer.netvlad_target() # print(train_target) blobs = data_layer.forward() blobs1 = data_layer.forward() blobs2 = data_layer.forward() blobs3 = data_layer.forward() blobs4 = data_layer.forward() blobs5 = data_layer.forward() blobs6 = data_layer.forward() blobs7 = data_layer.forward() blobs8 = data_layer.forward() blobs9 = data_layer.forward() blobs10 = data_layer.forward() blobs11 = data_layer.forward() blobs12 = data_layer.forward() blobs13 = data_layer.forward() blobs14 = data_layer.forward() blobs15 = data_layer.forward() blobs16 = data_layer.forward() blobs17 = data_layer.forward() blobs18 = data_layer.forward() blobs19 = data_layer.forward() # randnum = random.randint(0, 11) # bloblist = [blobs, blobs1, blobs2, blobs3, blobs4, blobs5, blobs6, blobs7, blobs8, blobs9, blobs10, blobs11] # bloblist = bloblist[randnum:self.views] + bloblist[0:randnum] # feed_dict = {self.net.data: bloblist[0]['data'], self.net.im_info: bloblist[0]['im_info'], # self.net.keep_prob: 1.0, # self.net1.data: bloblist[1]['data'], self.net1.im_info: bloblist[1]['im_info'], # self.net1.keep_prob: 1.0, # self.net2.data: bloblist[2]['data'], self.net2.im_info: bloblist[2]['im_info'], # self.net2.keep_prob: 1.0, # self.net3.data: bloblist[3]['data'], self.net3.im_info: bloblist[3]['im_info'], # self.net3.keep_prob: 1.0, # self.net4.data: bloblist[4]['data'], self.net4.im_info: bloblist[4]['im_info'], # self.net4.keep_prob: 1.0, # self.net5.data: bloblist[5]['data'], self.net5.im_info: bloblist[5]['im_info'], # self.net5.keep_prob: 1.0, # self.net6.data: bloblist[6]['data'], self.net6.im_info: bloblist[6]['im_info'], # self.net6.keep_prob: 1.0, # self.net7.data: bloblist[7]['data'], self.net7.im_info: bloblist[7]['im_info'], # self.net7.keep_prob: 1.0, # self.net8.data: bloblist[8]['data'], self.net8.im_info: bloblist[8]['im_info'], # self.net8.keep_prob: 1.0, # self.net9.data: bloblist[9]['data'], self.net9.im_info: bloblist[9]['im_info'], # self.net9.keep_prob: 1.0, # self.net10.data: bloblist[10]['data'], self.net10.im_info: bloblist[10]['im_info'], # self.net10.keep_prob: 1.0, # self.net11.data: bloblist[11]['data'], self.net11.im_info: bloblist[11]['im_info'], # self.net11.keep_prob: 1.0} #raw_input() # Make one SGD update feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 1.0, self.net1.data: blobs1['data'], self.net1.im_info: blobs1['im_info'], self.net1.keep_prob: 1.0, self.net2.data: blobs2['data'], self.net2.im_info: blobs2['im_info'], self.net2.keep_prob: 1.0, self.net3.data: blobs3['data'], self.net3.im_info: blobs3['im_info'], self.net3.keep_prob: 1.0, self.net4.data: blobs4['data'], self.net4.im_info: blobs4['im_info'], self.net4.keep_prob: 1.0, self.net5.data: blobs5['data'], self.net5.im_info: blobs5['im_info'], self.net5.keep_prob: 1.0, self.net6.data: blobs6['data'], self.net6.im_info: blobs6['im_info'], self.net6.keep_prob: 1.0, self.net7.data: blobs7['data'], self.net7.im_info: blobs7['im_info'], self.net7.keep_prob: 1.0, self.net8.data: blobs8['data'], self.net8.im_info: blobs8['im_info'], self.net8.keep_prob: 1.0, self.net9.data: blobs9['data'], self.net9.im_info: blobs9['im_info'], self.net9.keep_prob: 1.0, self.net10.data: blobs10['data'], self.net10.im_info: blobs10['im_info'],self.net10.keep_prob: 1.0, self.net11.data: blobs11['data'], self.net11.im_info: blobs11['im_info'],self.net11.keep_prob: 1.0, self.net12.data: blobs12['data'], self.net12.im_info: blobs12['im_info'],self.net12.keep_prob: 1.0, self.net13.data: blobs13['data'], self.net13.im_info: blobs13['im_info'],self.net13.keep_prob: 1.0, self.net14.data: blobs14['data'], self.net14.im_info: blobs14['im_info'],self.net14.keep_prob: 1.0, self.net15.data: blobs15['data'], self.net15.im_info: blobs15['im_info'],self.net15.keep_prob: 1.0, self.net16.data: blobs16['data'], self.net16.im_info: blobs16['im_info'],self.net16.keep_prob: 1.0, self.net17.data: blobs17['data'], self.net17.im_info: blobs17['im_info'],self.net17.keep_prob: 1.0, self.net18.data: blobs18['data'], self.net18.im_info: blobs18['im_info'],self.net18.keep_prob: 1.0, self.net19.data: blobs19['data'], self.net19.im_info: blobs19['im_info'],self.net19.keep_prob: 1.0 } run_options = None run_metadata = None if cfg.TRAIN.DEBUG_TIMELINE: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() test_acc = sess.run(cls_prob, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() cmatrix[np.argmax(train_target)][np.argmax(test_acc, axis=1)[0]] += 1 if np.argmax(test_acc, axis=1)[0] == np.argmax(train_target): sums += 1.0 class_acc[np.argmax(train_target)] += 1.0 print('model id: %d' % iter, np.argmax(test_acc, axis=1)[0], np.argmax(train_target)) print("Total accuracy: %f" % (sums / model_num)) print(cmatrix) print(cmatrix/cnum) fid = open('/home/liuxinhai/fine-grained/results/airplane_v20_deco.txt', 'a+') fid.write('{:.6f}\n'.format(sums / model_num)) fid.close() for i in range(self.classes): print("the %d class:%f" % (i, class_acc[i] / classes_num[i])) print('class acc: %f'%(sum(class_acc / classes_num) / self.classes)) fid = open('/home/liuxinhai/fine-grained/results/airplane_v20_deco_class.txt', 'a+') fid.write('{:.6f}\n'.format(sum(class_acc / classes_num) / self.classes)) fid.close()
def test_net(net, imdb, max_per_image=300, thresh=0.5, vis=False): num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # timers _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(output_dir, 'detections.pkl') size_index = args.image_size_index for i in range(num_images): batch = imdb.next_batch(size_index=size_index) ori_im = batch['origin_im'][0] im_data = net_utils.np_to_variable(batch['images'], is_cuda=True, volatile=True).permute(0, 3, 1, 2) _t['im_detect'].tic() bbox_pred, iou_pred, prob_pred = net(im_data) # to numpy bbox_pred = bbox_pred.data.cpu().numpy() iou_pred = iou_pred.data.cpu().numpy() prob_pred = prob_pred.data.cpu().numpy() bboxes, scores, cls_inds = yolo_utils.postprocess( bbox_pred, iou_pred, prob_pred, ori_im.shape, cfg, thresh, size_index) detect_time = _t['im_detect'].toc() _t['misc'].tic() for j in range(imdb.num_classes): inds = np.where(cls_inds == j)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = bboxes[inds] c_scores = scores[inds] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = c_dets # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( i + 1, num_images, detect_time, nms_time)) # noqa _t['im_detect'].clear() _t['misc'].clear() if vis: im2show = yolo_utils.draw_detection(ori_im, bboxes, scores, cls_inds, cfg, thr=0.1) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) # noqa cv2.imshow('test', im2show) cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def train_model(self, sess, max_iters): """Network training loop.""" data_layer = get_data_layer(self.roidb, self.imdb.num_classes) # RPN # classification loss rpn_cls_score = tf.reshape( self.net.get_output('rpn_cls_score_reshape'), [-1, 2]) rpn_label = tf.reshape(self.net.get_output('rpn-data')[0], [-1]) rpn_cls_score = tf.reshape( tf.gather(rpn_cls_score, tf.where(tf.not_equal(rpn_label, -1))), [-1, 2]) rpn_label = tf.reshape( tf.gather(rpn_label, tf.where(tf.not_equal(rpn_label, -1))), [-1]) rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) # bounding box regression L1 loss rpn_bbox_pred = self.net.get_output('rpn_bbox_pred') rpn_bbox_targets = tf.transpose( self.net.get_output('rpn-data')[1], [0, 2, 3, 1]) rpn_bbox_inside_weights = tf.transpose( self.net.get_output('rpn-data')[2], [0, 2, 3, 1]) rpn_bbox_outside_weights = tf.transpose( self.net.get_output('rpn-data')[3], [0, 2, 3, 1]) rpn_smooth_l1 = self._modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights) rpn_loss_box = tf.reduce_mean( tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1, 2, 3])) # R-CNN # classification loss cls_score = self.net.get_output('cls_score') label = tf.reshape(self.net.get_output('roi-data')[1], [-1]) cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)) # bounding box regression L1 loss bbox_pred = self.net.get_output('bbox_pred') bbox_targets = self.net.get_output('roi-data')[2] bbox_inside_weights = self.net.get_output('roi-data')[3] bbox_outside_weights = self.net.get_output('roi-data')[4] smooth_l1 = self._modified_smooth_l1(1.0, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) loss_box = tf.reduce_mean( tf.reduce_sum(smooth_l1, reduction_indices=[1])) # final loss loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box # optimizer and learning rate global_step = tf.Variable(0, trainable=False) lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, cfg.TRAIN.STEPSIZE, 0.1, staircase=True) momentum = cfg.TRAIN.MOMENTUM train_op = tf.train.MomentumOptimizer(lr, momentum).minimize( loss, global_step=global_step) # iintialize variables sess.run(tf.global_variables_initializer()) if self.pretrained_model is not None: print('Loading pretrained model ' 'weights from {:s}').format(self.pretrained_model) self.net.load(self.pretrained_model, sess, self.saver, True) last_snapshot_iter = -1 timer = Timer() for iter in range(max_iters): # get one batch blobs = data_layer.forward() # Make one SGD update feed_dict={self.net.data: blobs['data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, \ self.net.gt_boxes: blobs['gt_boxes']} run_options = None run_metadata = None if cfg.TRAIN.DEBUG_TIMELINE: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() rpn_loss_cls_value, rpn_loss_box_value, loss_cls_value, loss_box_value, _ = sess.run( [ rpn_cross_entropy, rpn_loss_box, cross_entropy, loss_box, train_op ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if cfg.TRAIN.DEBUG_TIMELINE: trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( str(long(time.time() * 1000)) + '-train-timeline.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if (iter + 1) % (cfg.TRAIN.DISPLAY) == 0: print 'iter: %d / %d, total loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, loss_cls: %.4f, loss_box: %.4f, lr: %f'%\ (iter+1, max_iters, rpn_loss_cls_value + rpn_loss_box_value + loss_cls_value + loss_box_value ,rpn_loss_cls_value, rpn_loss_box_value,loss_cls_value, loss_box_value, lr.eval()) print 'speed: {:.3f}s / iter'.format(timer.average_time) if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: last_snapshot_iter = iter self.snapshot(sess, iter) if last_snapshot_iter != iter: self.snapshot(sess, iter)
def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30): ''' Processes the video sequence given and provides the output of tracking result (write the results in video file) It uses JDE model for getting information about the online targets present. Parameters ---------- opt : Namespace Contains information passed as commandline arguments. dataloader : LoadVideo Instance of LoadVideo class used for fetching the image sequence and associated data. data_type : String Type of dataset corresponding(similar) to the given video. result_filename : String The name(path) of the file for storing results. save_dir : String Path to the folder for storing the frames containing bounding box information (Result frames). show_image : bool Option for shhowing individial frames during run-time. frame_rate : int Frame-rate of the given video. Returns ------- (Returns are not significant here) frame_id : int Sequence number of the last sequence ''' if save_dir: mkdir_if_missing(save_dir) tracker = JDETracker(opt, frame_rate=frame_rate) timer = Timer() results = [] frame_id = 0 for path, img, img0 in dataloader: if frame_id % 20 == 0: logger.info('Processing frame {} ({:.2f} fps)'.format( frame_id, 1. / max(1e-5, timer.average_time))) # run tracking timer.tic() blob = torch.from_numpy(img).cuda().unsqueeze(0) online_targets = tracker.update(blob, img0) online_tlwhs = [] online_ids = [] for t in online_targets: tlwh = t.tlwh tid = t.track_id # vertical = tlwh[2] / tlwh[3] > 1.6 # wh aspect ratio if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: online_tlwhs.append(tlwh) online_ids.append(tid) timer.toc() # save results results.append((frame_id + 1, online_tlwhs, online_ids)) if show_image or save_dir is not None: online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id, fps=1. / timer.average_time) if show_image: cv2.imshow('online_im', online_im) if save_dir is not None: cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im) frame_id += 1 # save results write_results(result_filename, results, data_type) return frame_id, timer.average_time, timer.calls