def __init__(self): super(trainer, self).__init__() self.total_loss = 0. self.rpn_reg_loss = 0. self.rpn_cls_loss = 0. self.reg_loss = 0. self.cls_loss = 0. self.model=FRCNN('train') self.model.get_data_loader(shuffule=False) self.model.get_network() self.n_sample = [256, 128] # number of samples for two stage targets self.at = AnchorTargetCreator(self.n_sample[0]) # generate labels for rpn self.pt = ProposalTargetCreator(self.n_sample[1]) # generate labels for classifier self.post_thre = n_train_post_nms # number of rois kept for each image
from keras.layers import Input from frcnn import FRCNN from PIL import Image import numpy as np import cv2 import time frcnn = FRCNN() # 调用摄像头 capture = cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4") fps = 0.0 while (True): t1 = time.time() # 读取某一帧 ref, frame = capture.read() # 格式转变,BGRtoRGB frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # 转变成Image frame = Image.fromarray(np.uint8(frame)) # 进行检测 frame = np.array(frcnn.detect_image(frame)) # RGBtoBGR满足opencv显示格式 frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) fps = (fps + (1. / (time.time() - t1))) / 2 print("fps= %.2f" % (fps)) frame = cv2.putText(frame, "fps= %.2f" % (fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
''' predict.py有几个注意点 1、无法进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。 2、如果想要保存,利用r_image.save("img.jpg")即可保存。 3、如果想要获得框的坐标,可以进入detect_image函数,读取top,left,bottom,right这四个值。 4、如果想要截取下目标,可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。 ''' from frcnn import FRCNN from PIL import Image frcnn = FRCNN() while True: img = input('Input image filename:') try: image = Image.open(img) #-------------------------------------# # 转换成RGB图片,可以用于灰度图预测。 #-------------------------------------# image = image.convert("RGB") except: print('Open Error! Try again!') continue else: r_image = frcnn.detect_image(image) r_image.show()
#-------------------------------------# # 对所有测试图片进行预测 #-------------------------------------# from frcnn import FRCNN from PIL import Image frcnn = FRCNN() # img = 'dataset/test/images/006486001008661.jpg' # image = Image.open(img) # r_image = frcnn.detect_image(image) # r_image.save('result.jpg') testdata_file = 'dataset/main/test.txt' f = open(testdata_file) lines = f.readlines() result = open('result.txt', 'w') for l in lines: l = l.strip() img = "dataset/test/images/%s.jpg" % l image = Image.open(img) bbox, conf, label = frcnn.detect_image(image) if len(bbox) == 0: continue for i in range(len(bbox)): score = conf[i] # 置信度 left, top, right, bottom = bbox[i] # 框框位置 result.write(l)
class trainer(nn.Module): def __init__(self): super(trainer, self).__init__() self.total_loss = 0. self.rpn_reg_loss = 0. self.rpn_cls_loss = 0. self.reg_loss = 0. self.cls_loss = 0. self.model=FRCNN('train') self.model.get_data_loader(shuffule=False) self.model.get_network() self.n_sample = [256, 128] # number of samples for two stage targets self.at = AnchorTargetCreator(self.n_sample[0]) # generate labels for rpn self.pt = ProposalTargetCreator(self.n_sample[1]) # generate labels for classifier self.post_thre = n_train_post_nms # number of rois kept for each image def _fast_rcnn_loc_loss(self, pred_loc, gt_loc, gt_label, sigma=1): """ loss function of the regressors of rpn and heads Inputs: pred_loc: [n_sample, 4] gt_loc: [n_sample, 4] gt_label: [n_sample] Outputs: loss """ # only train the positive samples pred_loc = pred_loc[gt_label>0] gt_loc = gt_loc[gt_label>0] def _smooth_l1_loss(x, t, sigma): sigma_squared = sigma ** 2 regression_diff = (x - t) regression_diff = regression_diff.abs() regression_loss = torch.where( regression_diff < (1. / sigma_squared), 0.5 * sigma_squared * regression_diff ** 2, regression_diff - 0.5 / sigma_squared ) return regression_loss.sum() loc_loss = _smooth_l1_loss(pred_loc, gt_loc, sigma) num_pos = (gt_label > 0).sum().float() loc_loss /= torch.max(num_pos, torch.ones_like(num_pos)) return loc_loss def train_step(self, image, boxes, labels): self.optimizer.zero_grad() # extract the backbone features features = self.model.net.backbone(image.float()) # cls: [N,2, KHW], reg: [N,KHW, 4], rois: [N*post_thre, 4], roi_ind: [N*post_thre], anchors: [KHW, 4] cls, reg, rois, roi_inds, anchors = self.model.net.rpn(features, image.shape[3], image.shape[2]) # labels for rpn reg_targets_rpn = torch.zeros([len(image), anchors.shape[0], anchors.shape[1]]) # [N, KHW, 4] cls_labels_rpn = torch.zeros([len(image), anchors.shape[0]]) #[N,KHW] for i in range(len(image)): # for each image # make the rpn ratgets label = labels[i] # [32] true_ind = label!=-1 box = boxes[i, true_ind, :] # [32, 4] reg_target_rpn, label_rpn = self.at(box, anchors) reg_targets_rpn[i] = torch.from_numpy(reg_target_rpn) cls_labels_rpn[i] = torch.from_numpy(label_rpn) rpn_reg_loss = self._fast_rcnn_loc_loss(reg, reg_targets_rpn, cls_labels_rpn) rpn_cls_loss = F.cross_entropy(cls, cls_labels_rpn.type(torch.LongTensor), ignore_index=-1) # labels for classifier sample_rois = torch.zeros([image.shape[0], self.n_sample[1], 4]) # [N,128,4] sample_rois_ind = torch.zeros([image.shape[0], self.n_sample[1]]) # [N,128] reg_targets_classifier = torch.zeros([len(image), self.n_sample[1], 4]) # [N, n_sample, 4] cls_labels_classifier = torch.zeros([len(image), self.n_sample[1]]) #[N, n_sample] for i in range(len(image)): sample_rois_ind[i,:] = i # make the classifier tartgets roi = rois.detach()[roi_inds==i, :] # get the rois for one image. [600, 4] label = labels[i] # [32] true_ind = label!=-1 box = boxes[i, true_ind, :] # [32, 4] label = label[true_ind] # 32 # sample_roi:[n_sample, 4], reg_target_classifier:[n_sample, 4], cls_label_classifier:[n_sample,] sample_roi, reg_target_classifier, cls_label_classifier = self.pt(roi, box, label) sample_rois[i,:,:] = torch.from_numpy(sample_roi) reg_targets_classifier[i,:,:] = torch.from_numpy(reg_target_classifier) cls_labels_classifier[i,:] = torch.from_numpy(cls_label_classifier) # flatten the rois and the inds sample_rois = sample_rois.contiguous().view(-1, 4) sample_rois_ind = torch.flatten(sample_rois_ind) #cls_outputs: [N, 21, n_sample], reg_outputs: [N, n_sample, 21*4], cls_output, reg_output = self.model.net.head(features, sample_rois, sample_rois_ind, image.shape[2], image.shape[3]) # gather selected boxes # first make the indexes from cls labels reg_ind = cls_labels_classifier.detach().unsqueeze(-1).type(torch.LongTensor)*4 # [N,n_sample,1] reg_ind = torch.cat([reg_ind, reg_ind+1, reg_ind+2, reg_ind+3], dim=-1) # select the boxes. [N, n_sample, 4] reg_output = torch.gather(reg_output, dim=-1, index=reg_ind) # losses reg_loss = self._fast_rcnn_loc_loss(reg_output, reg_targets_classifier, cls_labels_classifier) cls_loss = F.cross_entropy(cls_output, cls_labels_classifier.type(torch.LongTensor), ignore_index=-1) total_loss = rpn_cls_loss+rpn_reg_loss+cls_loss+reg_loss print("Total loss:{}, \nrpn cls loss:{}, rpn reg loss:{}, \ncls loss:{},reg loss:{}".format(total_loss,rpn_cls_loss,rpn_reg_loss,cls_loss, reg_loss)) total_loss.backward() self.optimizer.step() def train(self, lr, n_epoch, save_folder='data/voc/model', load_path=None): """The code for training a faster rcnn""" if load_path: self.model.load_param(load_path) if not os.path.exists(save_folder): os.makedirs(save_folder) self.model.net.train() self.optimizer = opt.Adam(self.model.net.parameters(), lr=lr, weight_decay=5e-6) self.scheduler = opt.lr_scheduler.CosineAnnealingLR(self.optimizer, n_epoch) for iter in range(n_epoch): print('Training epoch:{}'.format(iter)) for i, data in enumerate(self.model.data_loader): image, box, label = data['image'], data['box'].numpy(), data['label'].numpy() self.train_step(image, box, label) self.scheduler.step() if iter % 10 == 0: torch.save(self.model.net.state_dict(), os.path.join(save_folder, '{}.pth'.format(iter)))
from keras.layers import Input from frcnn import FRCNN from PIL import Image frcnn = FRCNN() raman = [ 56.25, 53.25, 40.25, 49.25, 68.25, 65.25, 57.25, 63.25, 43.25, 47.25, 52.25, 34.25, 55.25, 51.25, 50.25, 51.25, 47.25, 49.25, 49.25, 65.25, 47.25, 63.25, 57.25, 37.25, 50.25, 42.25, 55.25, 71.25, 39.25, 46.25, 46.25, 53.25, 42.25, 48.25, 41.25, 83.25, 62.25, 69.25, 45.25, 38.25, 52.25, 31.25, 52.25, 54.25, 47.25, 66.25, 60.25, 52.25, 67.25, 73.25, 81.25, 113.25, 157.25, 271.25, 506.25, 770.25, 1099.25, 1414.25, 1630.25, 1767.25, 1767.25, 1769.25, 1790.25, 1862.25, 1935.25, 1999.25, 2076.25, 2157.25, 2169.25, 2209.25, 2229.25, 2273.25, 2314.25, 2339.25, 2440.25, 2473.25, 2510.25, 2510.25, 2529.25, 2515.25, 2499.25, 2438.25, 2407.25, 2345.25, 2320.25, 2299.25, 2323.25, 2346.25, 2302.25, 2143.25, 1973.25, 1719.25, 1509.25, 1312.25, 1204.25, 1083.25, 1010.25, 946.25, 898.25, 863.25, 829.25, 829.25, 791.25, 819.25, 803.25, 848.25, 881.25, 922.25, 912.25, 896.25, 938.25, 790.25, 705.25, 652.25, 637.25, 612.25, 579.25, 568.25, 527.25, 520.25, 499.25, 494.25, 484.25, 470.25, 477.25, 469.25, 461.25, 471.25, 487.25, 489.25, 481.25, 479.25, 476.25, 502.25, 471.25, 504.25, 506.25, 531.25, 571.25, 607.25, 674.25, 687.25, 714.25, 750.25, 691.25, 710.25, 690.25, 681.25, 677.25, 671.25, 626.25, 632.25, 608.25, 551.25, 524.25, 488.25, 471.25, 425.25, 426.25, 410.25, 399.25, 395.25, 409.25, 384.25, 393.25, 406.25, 389.25, 375.25, 390.25, 361.25, 359.25, 365.25, 353.25, 362.25, 363.25, 344.25, 364.25, 358.25, 350.25, 370.25, 372.25, 372.25, 383.25, 370.25, 397.25, 395.25, 420.25, 414.25, 409.25, 408.25, 445.25, 435.25, 418.25, 454.25, 426.25, 422.25, 436.25, 410.25, 413.25, 384.25, 383.25, 379.25, 358.25, 348.25, 336.25, 351.25, 341.25, 350.25, 344.25, 341.25, 343.25, 356.25, 326.25, 328.25, 369.25, 336.25,
from glob import glob from itertools import chain from skimage.io import imread, imshow, concatenate_images from skimage.transform import resize from skimage.morphology import label from sklearn.model_selection import train_test_split import tensorflow as tf #from skimage.color import rgb2gray from tensorflow.keras import Input from tensorflow.keras.models import Model, load_model, save_model from tensorflow.keras import backend as K from PIL import Image from frcnn import FRCNN frcnn = FRCNN() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ALLOWED_EXTENSIONS = set( ['pdf', 'png', 'jpg', 'jpeg', 'PDF', 'PNG', 'JPG', 'JPEG', 'tif']) app = Flask(__name__) app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 app.config['TEMPLATES_AUTO_RELOAD'] = True app.jinja_env.auto_reload = True prediction = ' ' confidence = 0 filename = 'Image_No_Pred_MJRoBot.png' image_name = filename
"VOC2007/ImageSets/Main/test.txt")).read().strip().split() if not os.path.exists(map_out_path): os.makedirs(map_out_path) if not os.path.exists(os.path.join(map_out_path, 'ground-truth')): os.makedirs(os.path.join(map_out_path, 'ground-truth')) if not os.path.exists(os.path.join(map_out_path, 'detection-results')): os.makedirs(os.path.join(map_out_path, 'detection-results')) if not os.path.exists(os.path.join(map_out_path, 'images-optional')): os.makedirs(os.path.join(map_out_path, 'images-optional')) class_names, _ = get_classes(classes_path) if map_mode == 0 or map_mode == 1: print("Load model.") frcnn = FRCNN(confidence=confidence, nms_iou=nms_iou) print("Load model done.") print("Get predict result.") for image_id in tqdm(image_ids): image_path = os.path.join( VOCdevkit_path, "VOC2007/JPEGImages/" + image_id + ".jpg") image = Image.open(image_path) if map_vis: image.save( os.path.join(map_out_path, "images-optional/" + image_id + ".jpg")) frcnn.get_map_txt(image_id, image, class_names, map_out_path) print("Get predict result done.") if map_mode == 0 or map_mode == 2:
#----------------------------------------------------# import time import cv2 import numpy as np import tensorflow as tf from PIL import Image from frcnn import FRCNN gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) if __name__ == "__main__": frcnn = FRCNN() #----------------------------------------------------------------------------------------------------------# # mode用于指定测试的模式: # 'predict'表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释 # 'video'表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。 # 'fps'表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。 # 'dir_predict'表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。 #----------------------------------------------------------------------------------------------------------# mode = "predict" #-------------------------------------------------------------------------# # crop指定了是否在单张图片预测后对目标进行截取 # crop仅在mode='predict'时有效 #-------------------------------------------------------------------------# crop = False #----------------------------------------------------------------------------------------------------------# # video_path用于指定视频的路径,当video_path=0时表示检测摄像头
from frcnn import FRCNN import numpy net = FRCNN() def setup(model_name): net.set_model(model_name) return 0 def detect_object(img, width, height): image = numpy.array(img, dtype='uint8').reshape((height, width, 3)) result = net.detect_object(image) return result
default='resnet50') # parser.add_argument('-g', "--cuda", type=bool, help='do you have a gpu?', default=True) parser.add_argument('-n', "--num2show", type=int, help='num img 2 show', default=1) parser.add_argument('-r', "--root", type=str, help='root dir filled with *.jpg') parser.add_argument('-i', "--filename", type=str, help='filename', default='') args = parser.parse_args() frcnn = FRCNN(args.model_path, args.backbone, args.conf) # efficientdet = EfficientDet(args.model_path, args.version, args.conf, args.cuda) if args.num2show == 1: image = Image.open(os.path.join(args.root, args.filename)) res, cls, score = frcnn.detect_image(image) print(cls, score) else: print('结果将会保存到temp.png') files = os.listdir(args.root) idx = [ int(len(os.listdir(args.root)) * random.random()) for i in range(args.num2show) ] imgs = [Image.open(os.path.join(args.root, files[id])) for id in idx]
from keras.layers import Input from frcnn import FRCNN from PIL import Image frcnn = FRCNN() while True: img = input('Input image filename:') try: image = Image.open(img) except: print('Open Error! Try again!') continue else: r_image = frcnn.detect_image(image) r_image.show() frcnn.close_session()