def efficientDet_video_inference(video_src,compound_coef = 0,force_input_size=None, frame_skipping = 3, threshold=0.2,out_path=None,imshow=False, display_fps=False): #deep-sort variables # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 model_filename = '/home/shaheryar/Desktop/Projects/Football-Monitoring/deep_sort/model_weights/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric,n_init=5) # efficientDet-pytorch variables iou_threshold = 0.4 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MPEG') fps = cap.get(cv2.CAP_PROP_FPS) print("Video fps",fps) if(out_path is not None): outp = cv2.VideoWriter(out_path, fourcc, fps, (frame_width, frame_height)) i=0 start= time.time() current_frame_fps=0 while True: ret, frame = cap.read() if not ret: break t1=time.time() if (frame_skipping==0 or i%frame_skipping==0): # if(True): # frame preprocessing (running detections) ori_imgs, framed_imgs, framed_metas, t1 = preprocess_video(frame, width=input_size, height=input_size) if use_cuda: x = torch.stack([fi.cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) # model predict t1=time.time() with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # Post processing out = invert_affine(framed_metas, out) # decoding bbox ,object name and scores boxes,classes,scores =decode_predictions(out[0]) org_boxes = boxes.copy() t2 = time.time() - t1 # feature extraction for deep sort boxes = [convert_bbox_to_deep_sort_format(frame.shape, b) for b in boxes] features = encoder(frame,boxes) detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)] boxes = np.array([d.tlwh for d in detections]) # print(boxes) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) i = i + 1 img_show=frame.copy() for j in range(len(org_boxes)): img_show =drawBoxes(img_show,org_boxes[j],(255,255,0),str(tracker.tracks[j].track_id)) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() x1=int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2=int(bbox[3]) roi= frame[y1:y2,x1:x2] cv2.rectangle(img_show, (x1, y1), (x2, y2), update_color_association(roi, track.track_id), 2) cv2.putText(img_show, str(track.track_id), (x1, y1), 0, 5e-3 * 100, (255, 255, 0), 1) if display_fps: current_frame_fps=1/t2 else: current_frame_fps=0 cv2.putText(img_show, 'FPS: {0:.2f}'.format(current_frame_fps), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) if (i % int(fps) == 0): print("Processed ", str(int(i / fps)), "seconds") print("Time taken",time.time()-start) # print(color_dict) if imshow: img_show=cv2.resize(img_show,(0,0),fx=0.75,fy=0.75) cv2.imshow('Frame',img_show) # Press Q on keyboard to exit if cv2.waitKey(1) & 0xFF == ord('q'): break if out_path is not None: outp.write(img_show) cap.release() outp.release()
def train(opt): params = Params(f'projects/{opt.project}.yml') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) training_params = { 'batch_size': opt.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_id_divided_train = divide_id('./datasets/necklace/train/image') file_list = split_train_already(input_id_divided_train) input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1356] training_set = NeckDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.train_set, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ]), file_list=file_list) training_generator = DataLoader(training_set, **training_params) val_set = NeckDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.val_set, transform=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef]) ]), file_list=file_list) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] ################just GT check######################### # img_sample = imgs[0,:,:,:] # annot_sample = annot[0,:,:] # img_out = img_sample.numpy() # img_out = np.transpose(img_out, (1,2,0)) # img_out = cv2.cvtColor(img_out, cv2.COLOR_RGB2BGR) # annot_out = annot_sample.numpy() # count, _ = annot_out.shape # for i in range(count): # if annot_out[i,4] >= 0: # cv2.rectangle(img_out, (int(annot_out[i,0]),int(annot_out[i,1])), (int(annot_out[i,2]),int(annot_out[i,3])), (255,0,0), 1) # # cv2.imwrite("test.png", img_out*255) ###################################################### if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
def __init__(self, video_src: str, video_output: str, text_output: str, obj_list: list, input_sizes: list, reid_cpkt: str, compound_coef: int, force_input_size=None, threshold=0.2, iou_threshold=0.2, use_cuda=True, use_float16=False, cudnn_fastest=True, cudnn_benchmark=True, max_dist=0.2, min_confidence=0.3, nms_max_overlap=0.5, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, selected_target=None): # I/O # Video's path self.video_src = video_src # set int to use webcam, set str to read from a video file self.video_output = video_output # output to the specific position # text path self.text_output = text_output # output to the file with the csv format # DETECTOR self.compound_coef = compound_coef self.force_input_size = force_input_size # set None to use default size self.threshold = threshold self.iou_threshold = iou_threshold self.use_cuda = use_cuda self.use_float16 = use_float16 cudnn.fastest = cudnn_fastest cudnn.benchmark = cudnn_benchmark # coco_name self.obj_list = obj_list # input size self.input_sizes = input_sizes self.input_size = input_sizes[self.compound_coef] if force_input_size is None else force_input_size # load detector model model = EfficientDetBackbone(compound_coef=self.compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(f'weights/efficientdet-d{self.compound_coef}.pth')) model.requires_grad_(False) model.eval() if self.use_cuda and torch.cuda.is_available(): self.detector = model.cuda() if self.use_float16: self.detector = model.half() # TRACKER self.reid_cpkt = reid_cpkt self.max_dist = max_dist self.min_confidence = min_confidence self.nms_max_overlap = nms_max_overlap self.max_iou_distance = max_iou_distance self.max_age = max_age self.n_init = n_init self.nn_budget = nn_budget # load tracker model, self.trackers = [] self.selected_target = selected_target for num in range(0, len(self.selected_target)): self.trackers.append(build_tracker(reid_cpkt, max_dist, min_confidence, nms_max_overlap, max_iou_distance, max_age, n_init, nn_budget, use_cuda)) # video frames self.frame_id = 0
def start_training(self): if self.system_dict["params"]["num_gpus"] == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) self.system_dict["params"]["saved_path"] = self.system_dict["params"][ "saved_path"] + "/" + self.system_dict["params"][ "project_name"] + "/" self.system_dict["params"]["log_path"] = self.system_dict["params"][ "log_path"] + "/" + self.system_dict["params"][ "project_name"] + "/tensorboard/" os.makedirs(self.system_dict["params"]["saved_path"], exist_ok=True) os.makedirs(self.system_dict["params"]["log_path"], exist_ok=True) training_params = { 'batch_size': self.system_dict["params"]["batch_size"], 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': self.system_dict["params"]["num_workers"] } val_params = { 'batch_size': self.system_dict["params"]["batch_size"], 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': self.system_dict["params"]["num_workers"] } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] training_set = CocoDataset( self.system_dict["dataset"]["train"]["root_dir"], self.system_dict["dataset"]["train"]["coco_dir"], self.system_dict["dataset"]["train"]["img_dir"], set_dir=self.system_dict["dataset"]["train"]["set_dir"], transform=transforms.Compose([ Normalizer(mean=self.system_dict["params"]["mean"], std=self.system_dict["params"]["std"]), Augmenter(), Resizer( input_sizes[self.system_dict["params"]["compound_coef"]]) ])) training_generator = DataLoader(training_set, **training_params) if (self.system_dict["dataset"]["val"]["status"]): val_set = CocoDataset( self.system_dict["dataset"]["val"]["root_dir"], self.system_dict["dataset"]["val"]["coco_dir"], self.system_dict["dataset"]["val"]["img_dir"], set_dir=self.system_dict["dataset"]["val"]["set_dir"], transform=transforms.Compose([ Normalizer(self.system_dict["params"]["mean"], self.system_dict["params"]["std"]), Resizer(input_sizes[self.system_dict["params"] ["compound_coef"]]) ])) val_generator = DataLoader(val_set, **val_params) print("") print("") model = EfficientDetBackbone( num_classes=len(self.system_dict["params"]["obj_list"]), compound_coef=self.system_dict["params"]["compound_coef"], ratios=eval(self.system_dict["params"]["anchors_ratios"]), scales=eval(self.system_dict["params"]["anchors_scales"])) os.makedirs("pretrained_weights", exist_ok=True) if (self.system_dict["params"]["compound_coef"] == 0): if (not os.path.isfile( self.system_dict["params"]["load_weights"])): print("Downloading weights") cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d0.pth -O " + \ self.system_dict["params"]["load_weights"] os.system(cmd) elif (self.system_dict["params"]["compound_coef"] == 1): if (not os.path.isfile( self.system_dict["params"]["load_weights"])): print("Downloading weights") cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d1.pth -O " + \ self.system_dict["params"]["load_weights"] os.system(cmd) elif (self.system_dict["params"]["compound_coef"] == 2): if (not os.path.isfile( self.system_dict["params"]["load_weights"])): print("Downloading weights") cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d2.pth -O " + \ self.system_dict["params"]["load_weights"] os.system(cmd) elif (self.system_dict["params"]["compound_coef"] == 3): if (not os.path.isfile( self.system_dict["params"]["load_weights"])): print("Downloading weights") cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d3.pth -O " + \ self.system_dict["params"]["load_weights"] os.system(cmd) elif (self.system_dict["params"]["compound_coef"] == 4): if (not os.path.isfile( self.system_dict["params"]["load_weights"])): print("Downloading weights") cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d4.pth -O " + \ self.system_dict["params"]["load_weights"] os.system(cmd) elif (self.system_dict["params"]["compound_coef"] == 5): if (not os.path.isfile( self.system_dict["params"]["load_weights"])): print("Downloading weights") cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d5.pth -O " + \ self.system_dict["params"]["load_weights"] os.system(cmd) elif (self.system_dict["params"]["compound_coef"] == 6): if (not os.path.isfile( self.system_dict["params"]["load_weights"])): print("Downloading weights") cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d6.pth -O " + \ self.system_dict["params"]["load_weights"] os.system(cmd) elif (self.system_dict["params"]["compound_coef"] == 7): if (not os.path.isfile( self.system_dict["params"]["load_weights"])): print("Downloading weights") cmd = "wget https://github.com/zylo117/Yet-Another-Efficient-Pytorch/releases/download/1.0/efficientdet-d7.pth -O " + \ self.system_dict["params"]["load_weights"] os.system(cmd) # load last weights if self.system_dict["params"]["load_weights"] is not None: if self.system_dict["params"]["load_weights"].endswith('.pth'): weights_path = self.system_dict["params"]["load_weights"] else: weights_path = get_last_weights( self.system_dict["params"]["saved_path"]) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.') [0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) print("") print("") # freeze backbone if train head_only if self.system_dict["params"]["head_only"]: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') print("") print("") if self.system_dict["params"]["num_gpus"] > 1 and self.system_dict[ "params"]["batch_size"] // self.system_dict["params"][ "num_gpus"] < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( self.system_dict["params"]["log_path"] + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') model = ModelWithLoss(model, debug=self.system_dict["params"]["debug"]) if self.system_dict["params"]["num_gpus"] > 0: model = model.cuda() if self.system_dict["params"]["num_gpus"] > 1: model = CustomDataParallel( model, self.system_dict["params"]["num_gpus"]) if use_sync_bn: patch_replication_callback(model) if self.system_dict["params"]["optim"] == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), self.system_dict["params"]["lr"]) else: optimizer = torch.optim.SGD(model.parameters(), self.system_dict["params"]["lr"], momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(self.system_dict["params"]["num_epochs"]): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] if self.system_dict["params"]["num_gpus"] == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss = model( imgs, annot, obj_list=self.system_dict["params"]["obj_list"]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, self.system_dict["params"]["num_epochs"], iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % self.system_dict["params"][ "save_interval"] == 0 and step > 0: self.save_checkpoint( model, f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth' ) #print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % self.system_dict["params"][ "val_interval"] == 0 and self.system_dict["dataset"][ "val"]["status"]: print("Running validation") model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if self.system_dict["params"]["num_gpus"] == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model( imgs, annot, obj_list=self.system_dict["params"] ["obj_list"]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, self.system_dict["params"]["num_epochs"], cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) if loss + self.system_dict["params"][ "es_min_delta"] < best_loss: best_loss = loss best_epoch = epoch self.save_checkpoint( model, f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth' ) model.train() # Early stopping if epoch - best_epoch > self.system_dict["params"][ "es_patience"] > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: self.save_checkpoint( model, f'efficientdet-d{self.system_dict["params"]["compound_coef"]}_trained.pth' ) writer.close() writer.close() print("") print("") print("Training complete")
def train(args): # noqa: C901 train_start_time = time.perf_counter() assert num_gpus > 0, "Found 0 cuda devices, CPU training is not supported." total_batch_size = args.batch_size * num_gpus assert total_batch_size % args.num_workers == 0, ( f"batch_size * num_gpus ({total_batch_size}) must be divisible by num_workers " f"({args.num_workers}).") with open(os.path.join(args.model_dir, "hyperparameters.yml"), "w") as f: yaml.dump(vars(args), f) # initialization of tensorboard summary writers date_time = datetime.datetime.now().strftime(_STRFTIME_FORMAT) writer = SummaryWriter( os.path.join(args.tensorboard_dir, f"logs/{date_time}")) train_writer = SummaryWriter( os.path.join(args.tensorboard_dir, f"logs/{date_time}/train")) val_writer = SummaryWriter( os.path.join(args.tensorboard_dir, f"logs/{date_time}/val")) # get weights path, selecting the best weights if weights == "best" weights_path = _get_weights_path(args.weights_dir, args.weights) # create the correct data structure splitting input data in train and val sets prepare_annotations(args.data_dir, args.classes, ["train", "val"]) torch.cuda.manual_seed(args.seed) train_loader = _get_train_data_loader(args) val_loader = _get_val_data_loader(args) model = EfficientDetBackbone( num_classes=len(args.classes), compound_coef=args.compound_coef, ratios=args.anchors_ratios, scales=args.anchors_scales, ) _init_weights(model, weights_path) if args.freeze_backbone: logger.info("Freezing backbone") model.apply(_freeze_submodule_if_backbone) # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # use synchronized batch normalization when the batch size per gpu is too small if args.batch_size < 4: model.apply(replace_w_sync_bn) use_sync_bn = True logger.info("Using Synchronized Batch Normalization") else: use_sync_bn = False # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model) model = model.cuda() if num_gpus > 1: # TODO: see if there are better way to parallelize model = CustomDataParallel(model, num_gpus) if use_sync_bn: patch_replication_callback(model) steps_per_epoch = len(train_loader) last_step, es_baseline = _get_last_step_and_es_baseline( weights_path, args.resume_training) es = EarlyStopping(args, baseline=es_baseline, best_epoch=last_step // steps_per_epoch - 1) optimizer = _get_optimizer(model, args) scheduler = _get_scheduler(optimizer, steps_per_epoch, args) model.train() logger.info(f"Starting training from step {last_step}") for epoch in range(args.epochs): if epoch in args.milestones: for group in optimizer.param_groups: if args.scheduler == "onecyclelr": group["max_lr"] *= args.multisteplr_gamma group["min_lr"] *= args.multisteplr_gamma else: group["lr"] *= args.multisteplr_gamma last_epoch = last_step // steps_per_epoch if epoch < last_epoch: if scheduler is not None: for _ in range(steps_per_epoch): scheduler.step() continue train_loader_iter = iter(train_loader) for batch_idx in range(steps_per_epoch): iter_start_time = time.perf_counter() data_start_time = time.perf_counter() data = next(train_loader_iter) data_time = time.perf_counter() - data_start_time if batch_idx < (last_step - last_epoch * steps_per_epoch): if scheduler is not None: scheduler.step() continue imgs = data["img"] annotations = data["annot"] # if only one gpu, just send it to cuda:0 elif multiple gpus, # send it to multiple gpus in CustomDataParallel if num_gpus == 1: imgs = imgs.cuda() annotations = annotations.cuda() optimizer.zero_grad() loss_cls, loss_box_reg = model(imgs, annotations) loss_cls = loss_cls.mean() loss_box_reg = loss_box_reg.mean() total_loss = loss_cls + loss_box_reg if total_loss == 0 or not torch.isfinite(total_loss): continue total_loss.backward() if args.clip_gradients_norm > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_gradients_norm) lr = optimizer.param_groups[0]["lr"] optimizer.step() if scheduler is not None: scheduler.step() date_time = datetime.datetime.now().strftime("%m/%d %H:%M:%S") eta = datetime.timedelta(seconds=round(time.perf_counter() - train_start_time)) max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 iter_time = time.perf_counter() - iter_start_time logger.info(f"[{date_time} train]: " f"eta: {eta} " f"epoch: {epoch + 1}/{args.epochs} " f"batch: {batch_idx + 1}/{steps_per_epoch} " f"loss_cls: {loss_cls.item():.4f} " f"loss_box_reg: {loss_box_reg.item():.4f} " f"total_loss: {total_loss.item():.4f} " f"time: {iter_time:.4f} " f"data_time: {data_time:.4f} " f"lr: {lr:.6f} " f"max_mem: {max_mem_mb:.0f}M") writer.add_scalar("hp/lr", lr, last_step) if args.cycle_momentum: momentum = optimizer.param_groups[0]["momentum"] writer.add_scalar("hp/momentum", momentum, last_step) writer.add_scalar("usage/max_mem", max_mem_mb, last_step) writer.flush() train_writer.add_scalar("loss/total_loss", total_loss.item(), last_step) train_writer.add_scalar("loss/loss_cls", loss_cls.item(), last_step) train_writer.add_scalar("loss/loss_box_reg", loss_box_reg.item(), last_step) train_writer.add_scalar("time/time", iter_time, last_step) train_writer.add_scalar("time/data_time", data_time, last_step) train_writer.flush() last_step += 1 # See https://github.com/pytorch/pytorch/issues/1355#issuecomment-658660582. del train_loader_iter if epoch % args.val_interval == 0 or epoch + 1 == args.epochs: total_val_loss = validate(model, val_loader, last_step - 1, epoch, args.epochs, val_writer) _save_model( model, args.checkpoints_dir, args.compound_coef, epoch, last_step, total_val_loss, ) if es.step(epoch, total_val_loss): break model.train() model_params = { "classes": args.classes, "compound_coef": args.compound_coef, "anchors_scales": args.anchors_scales, "anchors_ratios": args.anchors_ratios, } with open(os.path.join(args.model_dir, "model_params.yml"), "w") as f: yaml.dump(model_params, f) writer.close() train_writer.close() val_writer.close() best_weights_path = _get_best_weights_path(args.checkpoints_dir) shutil.copyfile(best_weights_path, os.path.join(args.model_dir, "model.pth")) evaluate( args.model_dir, args.data_dir, eval_set="val", threshold=args.eval_threshold, nms_threshold=args.eval_nms_threshold, max_imgs=args.eval_max_imgs, use_float16=args.use_float16, device=args.eval_device, )
def main(i): compound_coef = i force_input_size = None # set None to use default size # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] out_dict = dict() input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() base_dir = '/data/jiashenc/jackson/' print('Processing Det-' + str(i)) for k in range(1000000, 1100000): if k % 1000 == 0: print(' Finish {} frames'.format(k + 1)) img_path = os.path.join(base_dir, 'frame{}.jpg'.format(k)) ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) to_json(out, out_dict) with open(os.path.join(base_dir, '10', 'res-{:d}.json'.format(i)), 'w') as f: json.dump(out_dict, f) out_dict = dict()
def batch_inference(args): input_size = input_sizes[args.compound_coef] model = EfficientDetBackbone(compound_coef=args.compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) # load pth file model.load_state_dict(torch.load(args.pth, map_location='cpu')) model.requires_grad_(False) model.eval() if args.use_cuda: model = model.cuda(device=args.device) path = args.file_list imgpath = args.img_path content = [] with open(path, 'r') as f_in: lines = f_in.readlines() for idx in range(len(lines)): line = lines[idx] line = line.strip().split(' ') content.append(line[0]) for i in tqdm(range(len(content)), ncols=88): filebasename = content[i] img_path = os.path.join(imgpath, filebasename + '.jpg') try: ori_imgs, framed_imgs, framed_metas = eval_preprocess( img_path, max_size=input_size) except: f'{img_path.split("/")[-1]} is not in {args.img_path}' if args.use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = Rotation_BBoxTransform() clipBoxes = ClipBoxes() addBoxes = BBoxAddScores() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, addBoxes, args.score_threshold, args.iou_threshold) out = invert_affine(framed_metas, out) file_name = ['Task1_large-vehicle.txt', 'Task1_small-vehicle.txt'] rois = out[0]['rois'] class_ids = out[0]['class_ids'] scores = out[0]['scores'] filecontent = [] for ii in range(len(scores)): xmin, ymin, xmax, ymax, theta = rois[ii] rect = OPENCV2xywh([xmin, ymin, xmax, ymax, theta])[0].tolist() x1, y1 = float(rect[0][0]), float(rect[0][1]) x2, y2 = float(rect[1][0]), float(rect[1][1]) x3, y3 = float(rect[2][0]), float(rect[2][1]) x4, y4 = float(rect[3][0]), float(rect[3][1]) single_filecontent = [ int(class_ids[ii]), filebasename, float(scores[ii]), x1, y1, x2, y2, x3, y3, x4, y4 ] filecontent.append(single_filecontent) write_into_txt(file_name, filecontent)
def test(opt): compound_coef = 2 force_input_size = None # set None to use default size img_id = opt.img_id img_path = opt.img_path img_path = img_path + str(img_id) + '.jpg' # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['02010001', '02010002'] color_list = standard_to_bgr(STANDARD_COLORS) input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(opt.weights, map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) def display(preds, imgs, imshow=True, imwrite=False, img_id=1): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: continue imgs[i] = imgs[i].copy() imgs[i] = cv2.cvtColor(imgs[i], cv2.COLOR_BGR2RGB) for j in range(len(preds[i]['rois'])): x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) plot_one_box(imgs[i], [x1, y1, x2, y2], label=obj, score=score, color=color_list[get_index_label(obj, obj_list)]) if imshow: cv2.imshow('img', imgs[i]) cv2.waitKey(0) if imwrite: str1 = 'test/' + str(img_id) + '.jpg' cv2.imwrite(str1, imgs[i]) out = invert_affine(framed_metas, out) display(out, ori_imgs, imshow=False, imwrite=True, img_id=img_id) print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) tempList = [] for j in range(len(out[0]['class_ids'])): tempout = {} tempout['image_id'] = img_id if out[0]['class_ids'][j] == 1: tempout['category_id'] = 2 else: tempout['category_id'] = 1 tempout['score'] = out[0]['scores'][j].astype(np.float64) tempout['bbox'] = [ (out[0]['rois'][j][0]).astype(np.float64), (out[0]['rois'][j][1]).astype(np.float64), (out[0]['rois'][j][2]).astype(np.float64) - (out[0]['rois'][j][0]).astype(np.float64), (out[0]['rois'][j][3]).astype(np.float64) - (out[0]['rois'][j][1]).astype(np.float64), ] tempList.append(tempout) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1') with open("test/" + str(img_id) + ".json", "w") as f: json.dump(tempList, f) print("生成标注后的图片(" + str(img_id) + ".jpg)和json(" + str(img_id) + ".json)到test文件夹中...")
def EfficientDetNode(): rospy.init_node('efficient_det_node', anonymous=True) rospy.Subscriber('input', String, image_callback, queue_size=1) pub = rospy.Publisher('/image_detections', Detection2DArray, queue_size=10) rate = rospy.Rate(1) # 10hz path_list = os.listdir(path) path_list.sort(key=lambda x: int(x.split('.')[0])) stamp_file = open(stamp_path) stamp_lines = stamp_file.readlines() stamp_i = 0 for filename in path_list: img_path = filename cur_frame = img_path[:-4] img_path = path + "/" + img_path cur_stamp = ((float)(stamp_lines[stamp_i][-13:].strip('\n'))) # cur_stamp = rospy.Time.from_sec( # ((float)(stamp_lines[stamp_i][-13:].strip('\n')))) stamp_i += 1 detection_results = Detection2DArray() # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(cur_frame, out, ori_imgs, imshow=False, imwrite=True) for i in range(len(out)): for j in range(len(out[i]['rois'])): x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int) obj = obj_list[out[i]['class_ids'][j]] score = float(out[i]['scores'][j]) result = ObjectHypothesisWithPose() result.score = score if (obj == 'car'): result.id = 0 if (obj == 'person'): result.id = 1 if (obj == 'cyclist'): result.id = 2 detection_msg = Detection2D() detection_msg.bbox.center.x = (x1 + x2) / 2 detection_msg.bbox.center.y = (y1 + y2) / 2 detection_msg.bbox.size_x = x2 - x1 detection_msg.bbox.size_y = y2 - y1 detection_msg.results.append(result) detection_results.detections.append(detection_msg) rospy.loginfo("%d: %lf", detection_msg.results[0].id, detection_msg.results[0].score) detection_results.header.seq = cur_frame #detection_results.header.stamp = cur_stamp rospy.loginfo(detection_results.header.stamp) pub.publish(detection_results) if not os.path.exists(txt_path): os.makedirs(txt_path) #with open(f'txt/{cur_frame}.txt', 'w') as f: with open(f'{txt_path}/{cur_frame}.txt', 'w') as f: #f.write(str((float)(stamp_lines[stamp_i][-13:].strip('\n'))) + "\n") f.write(str(cur_stamp) + "\n") for detection in detection_results.detections: f.write(str(detection.bbox.center.x) + " ") f.write(str(detection.bbox.center.y) + " ") f.write(str(detection.bbox.size_x) + " ") f.write(str(detection.bbox.size_y) + " ") f.write(str(detection.results[0].id) + " ") f.write(str(detection.results[0].score) + "\n") f.close() rate.sleep() print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
def train(args): assert args.weight_path, 'must indicate the path of initial weight' if (os.path.exists(f'{args.weight_path}/train_log.txt')): os.remove(f'{args.weight_path}/train_log.txt') if (os.path.exists(f'{args.weight_path}/pre_trained_weight.pth')): os.remove(f'{args.weight_path}/pre_trained_weight.pth') print("Hi") present_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) params = Params(f'projects/eye.yml') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu train_params = {'batch_size': args.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': args.num_workers} val_params = {'batch_size': args.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': args.num_workers} input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=args.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) init_weights(model) # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model) model = model.cuda() if args.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), args.lr) else: optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=args.patience, verbose=True) # unit is epoch img_list = glob.glob(f"{args.dataset_path}/train/*") normal_img_list = [] yellow_img_list = [] for img in img_list: if (img.find("n_") != -1): normal_img_list.append(img) else: yellow_img_list.append(img) random.shuffle(normal_img_list) random.shuffle(yellow_img_list) normal_val_num = int(len(normal_img_list) / 5) yellow_val_num = int(len(yellow_img_list) / 5) train_img_list = normal_img_list[normal_val_num:] + yellow_img_list[yellow_val_num:] val_img_list = normal_img_list[:normal_val_num] + yellow_img_list[:yellow_val_num] train_anno_txt_path = f"{args.dataset_path}/train.txt" val_anno_txt_path = f"{args.dataset_path}/train.txt" train_transform = transforms.Compose([# Normalizer(mean=params.mean, std=params.std), Augmenter(), randomScaleWidth(), randomBlur(), # randomBrightness(), # randomHue(), # randomSaturation(), Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[args.compound_coef])]) val_transform = transforms.Compose([# Normalizer(mean=params.mean, std=params.std), Augmenter(), Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[args.compound_coef])]) train_set = EyeDataset(train_img_list, train_anno_txt_path, train_transform) val_set = EyeDataset(val_img_list, val_anno_txt_path, val_transform) train_generator = DataLoader(train_set, **train_params) val_generator = DataLoader(val_set, **val_params) model.model.load_state_dict(torch.load(f'{args.weight_path}/init_weight.pth')["model_state_dict"]) optimizer.load_state_dict(torch.load(f'{args.weight_path}/init_weight.pth')["optimizer_state_dict"]) scheduler.load_state_dict(torch.load(f'{args.weight_path}/init_weight.pth')["scheduler_state_dict"]) model.train() best_val_loss = 1e5 for epoch in range(args.epoch): model.train() total_loss_ls = [] total_correct = 0 total = 0 for data in train_generator: imgs = data['img'].cuda() annot = data['annot'].cuda() optimizer.zero_grad() reg_loss, cls_head_loss, cls_correct_num, total_num = model(imgs, annot, obj_list=params.obj_list) total_correct += cls_correct_num total += total_num reg_loss = reg_loss.mean() loss = cls_head_loss + reg_loss total_loss_ls.append(loss.item()) if (loss == 0 or not torch.isfinite(loss)): continue loss.backward() optimizer.step() total_loss = np.mean(total_loss_ls) scheduler.step(total_loss) with open(f'{args.weight_path}/train_log.txt', 'a') as fp: fp.write(f'Epoch: {epoch} loss: {total_loss:.6f} | acc: {total_correct / total * 100:.2f}\n') model.eval() with torch.no_grad(): total = 0 total_correct = 0 total_loss_ls = [] for data in val_generator: imgs = data['img'].cuda() annot = data['annot'].cuda() reg_loss, cls_head_loss, cls_correct_num, total_num = model(imgs, annot, obj_list=params.obj_list) total += total_num total_correct += cls_correct_num reg_loss = reg_loss.mean() loss = cls_head_loss + reg_loss total_loss_ls.append(loss.item()) total_loss = np.mean(total_loss_ls) with open(f'{args.weight_path}/train_log.txt', 'a') as fp: fp.write(f'Epoch: {epoch} loss: {total_loss:.6f} | acc: {total_correct / total * 100:.2f}\n\n') if (total_loss < best_val_loss): best_val_loss = total_loss torch.save({ "model_state_dict": model.model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "scheduler_state_dict": scheduler.state_dict(), }, f"{args.weight_path}/pre_trained_weight.pth")
def main(args): print("Hi") if (os.path.exists(f"{args.weight_path}/test_log.out")): os.remove(f"{args.weight_path}/test_log.out") assert args.weight_path, 'must indicate the path of pre-trained weight' params = Params(f'projects/eye.yml') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu test_params = { 'batch_size': args.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': args.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=args.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) init_weights(model) model = ModelWithLoss(model) model = model.cuda() if args.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), args.lr) else: optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, verbose=True) model.model.load_state_dict( torch.load(f'{args.weight_path}/pre_trained_weight.pth') ['model_state_dict']) optimizer.load_state_dict( torch.load(f'{args.weight_path}/pre_trained_weight.pth') ['optimizer_state_dict']) scheduler.load_state_dict( torch.load(f'{args.weight_path}/pre_trained_weight.pth') ['scheduler_state_dict']) test_img_list = glob.glob(f'{args.dataset_path}/test/*') test_anno_txt_path = f'{args.dataset_path}/test.txt' test_transform = transforms.Compose( [ # Normalizer(mean=params.mean, std=params.std), Augmenter(), Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[args.compound_coef]) ]) test_set = EyeDataset(test_img_list, test_anno_txt_path, test_transform) test_generator = DataLoader(test_set, **test_params) model.eval() with torch.no_grad(): total = 0 total_correct = 0 total_loss_ls = [] for data in test_generator: imgs = data['img'].cuda() annot = data['annot'].cuda() reg_loss, cls_head_loss, cls_correct_num, total_num = model( imgs, annot, obj_list=params.obj_list) total_correct += cls_correct_num total += total_num reg_loss = reg_loss.mean() loss = reg_loss + cls_head_loss total_loss_ls.append(loss.item()) total_loss = np.mean(total_loss_ls) with open(f"{args.weight_path}/test_log.out", 'a') as fp: fp.write( f'Testing loss: {total_loss:.6f} | acc: {total_correct / total * 100:.2f}\n' )
num_classes=len(obj_list_2), ratios=anchor_ratios, scales=anchor_scales) model_2.load_state_dict( torch.load( f'/data/efdet/logs/{project}/crop/weights/{save_time2}/efficientdet-d{compound_coef}_{number}.pth', map_location='cpu')) model_1.requires_grad_(False) model_1.eval() model_2.requires_grad_(False) model_2.eval() if use_cuda: model_1 = model_1.cuda() model_2 = model_2.cuda() if use_float16: model_1 = model_1.half() model_2 = model_2.half() def display(out_1, out_2, imgs, imshow=True, showtime=0, imwrite=False): # if len(preds[i]['rois']) == 0: # if model dosen't detect object, not show image # continue for img, out_1 in zip(imgs, out_1): img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for i in range(len(out_1['rois'])): ox1, oy1, ox2, oy2 = out_1['rois'][i].astype(np.int) obj_1 = obj_list_1[out_1['class_ids'][i]]
def train_det(opt, cfg): # # Write history # if 'backlog' not in opt.config: # with open(os.path.join(opt.saved_path, f'{opt.project}_backlog.yml'), 'w') as f: # doc = open(f'projects/{opt.project}.yml', 'r') # f.write('#History log file') # f.write(f'\n__backlog__: {now.strftime("%Y/%m/%d %H:%M:%S")}\n') # f.write(doc.read()) # f.write('\n# Manual seed used') # f.write(f'\nmanual_seed: {cfg.manual_seed}') # else: # with open(os.path.join(opt.saved_path, f'{opt.project}_history.yml'), 'w') as f: # doc = open(f'projects/{opt.project}.yml', 'r') # f.write(doc.read()) training_params = { 'batch_size': cfg.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': cfg.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] training_set = DataGenerator( data_path=os.path.join(opt.data_path, 'Train'), class_ids=cfg.dictionary_class_name.keys(), transform=transforms.Compose([ Augmenter(), Normalizer(mean=cfg.mean, std=cfg.std), Resizer(input_sizes[cfg.compound_coef]) ]), pre_augments=['', *[f'{aug}_' for aug in cfg.augment_list]] if cfg.augment_list else None) training_generator = DataLoader(training_set, **training_params) val_set = DataGenerator( # root_dir=os.path.join(opt.data_path, cfg.project_name), data_path=os.path.join(opt.data_path, 'Validation'), class_ids=cfg.dictionary_class_name.keys(), transform=transforms.Compose([ Normalizer(mean=cfg.mean, std=cfg.std), Resizer(input_sizes[cfg.compound_coef]) ])) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_classes=len(cfg.dictionary_class_name), compound_coef=cfg.compound_coef, ratios=eval(cfg.anchor_ratios), scales=eval(cfg.anchor_scales)) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, ' 'this might be because you load a pretrained weights with different number of classes. ' 'The rest of the weights should be loaded already.') print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_only if cfg.training_layer.lower() == 'heads': def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if cfg.num_gpus > 1 and cfg.batch_size // cfg.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if cfg.num_gpus > 0: model = model.cuda() if cfg.num_gpus > 1: model = CustomDataParallel(model, cfg.num_gpus) if use_sync_bn: patch_replication_callback(model) if cfg.optimizer.lower() == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), cfg.learning_rate) if cfg.optimizer.lower() == 'srsgd': optimizer = SRSGD(model.parameters(), lr=cfg.learning_rate, weight_decay=5e-4, iter_count=100) else: optimizer = torch.optim.SGD(model.parameters(), cfg.learning_rate, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # Setup complete, then start training now = datetime.datetime.now() opt.saved_path = opt.saved_path + f'/trainlogs_{now.strftime("%Y%m%d_%H%M%S")}' if opt.log_path is None: opt.log_path = opt.saved_path os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) # Write history if 'backlog' not in opt.config: with open( os.path.join(opt.saved_path, f'{now.strftime("%Y%m%d%H%M%S")}.backlog.json'), 'w') as f: backlog = dict(cfg.to_pascal_case()) backlog['__metadata__'] = 'Backlog at ' + now.strftime( "%Y/%m/%d %H:%M:%S") json.dump(backlog, f) else: with open( os.path.join(opt.saved_path, f'{now.strftime("%Y%m%d%H%M%S")}.history.json'), 'w') as f: history = dict(cfg.to_pascal_case()) history['__metadata__'] = now.strftime("%Y/%m/%d %H:%M:%S") json.dump(history, f) writer = SummaryWriter(opt.log_path + f'/tensorboard') epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(cfg.no_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.set_description( f'Skip {iter} < {step} - {last_epoch} * {num_iter_per_epoch}' ) progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] if cfg.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss = model( imgs, annot, obj_list=cfg.dictionary_class_name.keys()) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. ' 'Total loss: {:.5f}'.format(step, epoch, cfg.no_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classification_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if cfg.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model( imgs, annot, obj_list=cfg.dictionary_class_name.keys()) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss progress_bar.set_description( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}.' ' Total loss: {:1.5f}'.format(epoch, cfg.no_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classification_loss', {'val': cls_loss}, step) if cfg.only_best_weights: if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f"{opt.saved_path}/det_d{cfg.compound_coef}_{epoch}_{step}.pth" ) else: if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f"{opt.saved_path}/det_d{cfg.compound_coef}_{epoch}_{step}.pth" ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break print( f'[Info] Finished training. Best loss achieved {best_loss} at epoch {best_epoch}.' ) except KeyboardInterrupt: save_checkpoint( model, f"{opt.saved_path}/d{cfg.compound_coef}_{epoch}_{step}.pth") writer.close() writer.close()
def effdet_detection(content, effdet): video_src = 0 # set int to use webcam, set str to read from a video file compound_coef = 0 force_input_size = None # set None to use default size threshold = 0.5 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict( torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() # function for display def display(preds, imgs, content, effdet): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: return imgs[i] for j in range(len(preds[i]['rois'])): (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int) #cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) if obj == content: effdet.send_message_to_scratch( (x1 + x2) * 0.5 * 0.625 - 200) #发送指定类别的识别框位置到scratch print((x1 + x2) * 0.5 * 0.625 - 200) cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2) cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score), (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1) return imgs[i] # Box regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) while True: ret, frame = cap.read() if not ret: break # frame preprocessing ori_imgs, framed_imgs, framed_metas = preprocess_video( frame, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # result out = invert_affine(framed_metas, out) img_show = display(out, ori_imgs, content, effdet) # show frame by frame cv2.imshow('frame', img_show) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
print('BBox') coco_eval = COCOeval(coco_gt, coco_pred, 'bbox') coco_eval.params.imgIds = image_ids coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() if __name__ == '__main__': SET_NAME = params['val_set'] VAL_GT = f'datasets/{params["project_name"]}/annotations/instances_{SET_NAME}.json' VAL_IMGS = f'datasets/{params["project_name"]}/{SET_NAME}/' MAX_IMAGES = 10000 coco_gt = COCO(VAL_GT) image_ids = coco_gt.getImgIds()[:MAX_IMAGES] if not os.path.exists(f'{SET_NAME}_bbox_results.json'): model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(weights_path)) model.requires_grad_(False) model.eval() if use_cuda: model.cuda() image_ids = evaluate_coco(VAL_IMGS, SET_NAME, image_ids, coco_gt, model) eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json') else: eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json')
def train(opt): params = Params(f'projects/{opt.project}.yml') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] train_df = pd.read_csv(os.path.join(params.data_dir, 'train.csv')) train_df, val_df = get_train_val(train_df) training_set = WheatDataset(dataframe=train_df, image_dir=os.path.join(params.data_dir, params.train_set), transforms=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ])) training_generator = DataLoader(training_set, **training_params) val_set = WheatDataset(dataframe=val_df, image_dir=os.path.join(params.data_dir, params.train_set), transforms=transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ])) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_We wish we could give free compute without any bounds, because they help a lot of people do deep learning who otherwise lack access to GPUs. Unfortunately, we have a finite budget, and we've started hitting our limit.only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adam': optimizer = torch.optim.Adam(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['image'] annot = data['bboxes'] if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['image'] annot = data['bboxes'] if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
def getImageDetections(imagePath, weights, nms_threshold, confidenceParam, coefficient): """ Runs the detections and returns all detection into a single structure. Parameters ---------- imagePath : str Path to all images. weights : str path to the weights. nms_threshold : float non-maximum supression threshold. confidenceParam : float confidence score for the detections (everything above this threshold is considered a valid detection). coefficient : int coefficient of the current efficientdet model (from d1 to d7). Returns ------- detectionsList : List return a list with all predicted bounding-boxes. """ compound_coef = coefficient force_input_size = None # set None to use default size img_path = imagePath threshold = confidenceParam iou_threshold = nms_threshold use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['class_name'] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), # replace this part with your project's anchor config ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)], scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) model.load_state_dict(torch.load(rootDir+'logs/' + project + '/' + weights)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) for i in range(len(ori_imgs)): if len(out[i]['rois']) == 0: continue detectionsList = [] for j in range(len(out[i]['rois'])): (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int) detectionsList.append((float(out[i]['scores'][j]), x1, y1, x2, y2)) return detectionsList
def train(opt): params = Params(opt.config) if params.num_gpus == 0: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = params.logdir opt.log_path = os.path.join(params.logdir, "tensorboard") os.makedirs(opt.saved_path, exist_ok=True) os.makedirs(opt.log_path, exist_ok=True) training_params = { "batch_size": opt.batch_size, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": opt.num_workers, } val_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": True, "collate_fn": collater, "num_workers": opt.num_workers, } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] training_set = CocoDataset( image_dir=params.image_dir, json_path=params.train_annotations, transform=transforms.Compose( [ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]), ] ), ) training_generator = DataLoader(training_set, **training_params) if params.val_image_dir is None: params.val_image_dir = params.image_dir val_set = CocoDataset( image_dir=params.val_image_dir, json_path=params.val_annotations, transform=transforms.Compose( [Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef])] ), ) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone( num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales), ) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith(".pth"): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int(os.path.basename(weights_path).split("_")[-1].split(".")[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f"[Warning] Ignoring {e}") print( "[Warning] Don't panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already." ) print( f"[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}" ) else: last_step = 0 print("[Info] initializing weights...") init_weights(model) # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ["EfficientNet", "BiFPN"]: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print("[Info] freezed backbone") # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter(opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == "adamw": optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data["img"] annot = data["annot"] if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( "Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}".format( step, epoch, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item(), ) ) writer.add_scalars("Loss", {"train": loss}, step) writer.add_scalars("Regression_loss", {"train": reg_loss}, step) writer.add_scalars("Classfication_loss", {"train": cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]["lr"] writer.add_scalar("learning_rate", current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f"efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth" ) print("checkpoint...") except Exception as e: print("[Error]", traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data["img"] annot = data["annot"] if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( "Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}".format( epoch, opt.num_epochs, cls_loss, reg_loss, loss ) ) writer.add_scalars("Loss", {"val": loss}, step) writer.add_scalars("Regression_loss", {"val": reg_loss}, step) writer.add_scalars("Classfication_loss", {"val": cls_loss}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint(model, f"efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth") model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "[Info] Stop training at epoch {}. The lowest loss achieved is {}".format( epoch, best_loss ) ) break except KeyboardInterrupt: save_checkpoint(model, f"efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth") writer.close() writer.close()
def train(opt): ''' Input: get_args() Function: Train the model. ''' params = Params(f'projects/{opt.project}.yml') if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) # evaluation json file pred_folder = f'{OPT.data_path}/{OPT.project}/predictions' os.makedirs(pred_folder, exist_ok=True) evaluation_pred_file = f'{pred_folder}/instances_bbox_results.json' training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] training_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.train_set, transform=torchvision.transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ])) training_generator = DataLoader(training_set, **training_params) val_set = CocoDataset(root_dir=os.path.join(opt.data_path, params.project_name), set=params.val_set, transform=torchvision.transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef]) ])) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except Exception as exception: last_step = 0 try: _ = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as rerror: print(f'[Warning] Ignoring {rerror}') print('[Warning] Don\'t panic if you see this, '\ 'this might be because you load a pretrained weights with different number of classes.'\ ' The rest of the weights should be loaded already.') print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_only if opt.head_only: def freeze_backbone(mdl): classname = mdl.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in mdl.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) num_val_iter_per_epoch = len(val_generator) # Limit the no.of preds to #images in val. # Here, I averaged the #obj to 5 for computational efficacy if opt.max_preds_toeval > 0: opt.max_preds_toeval = len(val_generator) * opt.batch_size * 5 try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iternum, data in enumerate(progress_bar): if iternum < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() if iternum % int(num_iter_per_epoch * (opt.eval_percent_epoch / 100)) != 0: model.debug = False cls_loss, reg_loss, _ = model(imgs, annot, obj_list=params.obj_list) else: model.debug = True cls_loss, reg_loss, imgs_labelled = model( imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, opt.num_epochs, iternum + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) if iternum % int( num_iter_per_epoch * (opt.eval_percent_epoch / 100)) == 0 and step > 0: # create grid of images imgs_labelled = np.asarray(imgs_labelled) imgs_labelled = torch.from_numpy( imgs_labelled) # (N, H, W, C) imgs_labelled.transpose_(1, 3) # (N, C, H, W) imgs_labelled.transpose_(2, 3) img_grid = torchvision.utils.make_grid(imgs_labelled) # write to tensorboard writer.add_image('Training_images', img_grid, global_step=step) #########################################################start EVAL##################################################### model.eval() model.debug = False # Don't print images in tensorboard now. # remove json if os.path.exists(evaluation_pred_file): os.remove(evaluation_pred_file) loss_regression_ls = [] loss_classification_ls = [] model.evalresults = [ ] # Empty the results for next evaluation. imgs_to_viz = [] num_validation_steps = int( num_val_iter_per_epoch * (opt.eval_sampling_percent / 100)) for valiternum, valdata in enumerate(val_generator): with torch.no_grad(): imgs = valdata['img'] annot = valdata['annot'] resizing_imgs_scales = valdata['scale'] new_ws = valdata['new_w'] new_hs = valdata['new_h'] imgs_ids = valdata['img_id'] if params.num_gpus >= 1: imgs = imgs.cuda() annot = annot.cuda() if valiternum % (num_validation_steps // (opt.num_visualize_images // opt.batch_size)) != 0: model.debug = False cls_loss, reg_loss, _ = model( imgs, annot, obj_list=params.obj_list, resizing_imgs_scales= resizing_imgs_scales, new_ws=new_ws, new_hs=new_hs, imgs_ids=imgs_ids) else: model.debug = True cls_loss, reg_loss, val_imgs_labelled = model( imgs, annot, obj_list=params.obj_list, resizing_imgs_scales= resizing_imgs_scales, new_ws=new_ws, new_hs=new_hs, imgs_ids=imgs_ids) imgs_to_viz += list(val_imgs_labelled) loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) if valiternum > (num_validation_steps): break cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) # create grid of images val_imgs_labelled = np.asarray(imgs_to_viz) val_imgs_labelled = torch.from_numpy( val_imgs_labelled) # (N, H, W, C) val_imgs_labelled.transpose_(1, 3) # (N, C, H, W) val_imgs_labelled.transpose_(2, 3) val_img_grid = torchvision.utils.make_grid( val_imgs_labelled, nrow=2) # write to tensorboard writer.add_image('Eval_Images', val_img_grid, \ global_step=(step)) if opt.max_preds_toeval > 0: json.dump(model.evalresults, open(evaluation_pred_file, 'w'), indent=4) try: val_results = calc_mAP_fin(params.project_name,\ params.val_set, evaluation_pred_file, \ val_gt=f'{OPT.data_path}/{OPT.project}/annotations/instances_{params.val_set}.json') for catgname in val_results: metricname = 'Average Precision (AP) @[ IoU = 0.50 | area = all | maxDets = 100 ]' evalscore = val_results[catgname][ metricname] writer.add_scalars( f'mAP@IoU=0.5 and area=all', {f'{catgname}': evalscore}, step) except Exception as exption: print("Unable to perform evaluation", exption) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break #########################################################EVAL##################################################### # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) print('checkpoint...') except Exception as exception: print('[Error]', traceback.format_exc()) print(exception) continue scheduler.step(np.mean(epoch_loss)) except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
def train(opt): params = Params(f'projects/{opt.project}.yml') if opt.project == "vcoco": num_obj_class = 90 num_union_action = 25 num_inst_action = 51 else: assert opt.project == "hico-det" num_obj_class = 90 num_union_action = 117 num_inst_action = 234 if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers, 'pin_memory': False } val_params = { 'batch_size': opt.batch_size * 2, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers, 'pin_memory': False } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] train_transform = transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ]) val_transform = transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[opt.compound_coef]) ]) if opt.project == "vcoco": # training_set = VCOCO_Dataset(root_dir="./datasets/vcoco", set=params.train_set, color_prob=1, # transform=train_transform) # val_set = VCOCO_Dataset(root_dir="./datasets/vcoco", set=params.val_set, # transform=val_transform) exit(-999) else: training_set = HICO_DET_Dataset(root_dir="data/hico_20160224_det", set="train", color_prob=1, transform=train_transform) val_set = HICO_DET_Dataset(root_dir="data/hico_20160224_det", set="test", transform=val_transform) training_generator = DataLoader(training_set, **training_params) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_classes=num_obj_class, num_union_classes=num_union_action, num_inst_classes=num_inst_action, compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) model.train() print("num_classes:", num_obj_class) print("num_union_classes:", num_union_action) print("instance_action_list", num_inst_action) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: # last_step = int(os.path.basename(weights_path).split('_')[-1].split('.')[0]) # last_epoch = int(os.path.basename(weights_path).split('_')[-2].split('.')[0]) + 1 last_epoch = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) last_step = last_epoch * len(training_generator) except: last_step = 0 try: init_weights(model) print(weights_path) model_dict = model.state_dict() pretrained_dict = torch.load(weights_path, map_location=torch.device('cpu')) new_pretrained_dict = {} for k, v in pretrained_dict.items(): if k in model_dict: new_pretrained_dict[k] = v elif ("instance_branch.object_" + k) in model_dict: new_pretrained_dict["instance_branch.object_" + k] = v # print("instance_branch.object_"+k) ret = model.load_state_dict(new_pretrained_dict, strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_only if opt.head_only: model.apply(freeze_backbone) freeze_bn_backbone(model) print('[Info] freezed backbone') if opt.freeze_object_detection: freeze_object_detection(model) freeze_bn_object_detection(model) # model.apply(freeze_object_detection) print('[Info] freezed object detection branch') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 8: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False # if os.path.exists('nohup.out'): # os.remove('nohup.out') # f = open('nohup.out', 'w') # f.close() if os.path.exists(opt.log_path): import shutil shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, dataset=opt.project, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.head_only: print('[Info] freezed SyncBN backbone') freeze_bn_backbone(model.module.model) if opt.freeze_object_detection: print('[Info] freezed SyncBN object detection') freeze_bn_object_detection(model.module.model) if opt.optim == 'adamw': # optimizer = torch.optim.AdamW(model.parameters(), opt.lr) optimizer = torch.optim.AdamW( filter(lambda p: p.requires_grad, model.parameters()), opt.lr) elif opt.optim == "adam": # optimizer = torch.optim.Adam(model.parameters(), opt.lr) optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), opt.lr) else: # optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, verbose=True, min_lr=1e-7) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) num_iter_per_epoch = (len(training_generator) + opt.accumulate_batch - 1) // opt.accumulate_batch start_time = time.time() try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch + 1 if epoch < last_epoch: continue if epoch in [12, 16]: optimizer.param_groups[0][ 'lr'] = optimizer.param_groups[0]['lr'] / 10 epoch_loss = [] for iter, data in enumerate(training_generator): try: imgs = data['img'] annot = data['annot'] # torch.cuda.empty_cache() if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() for key in annot: annot[key] = annot[key].cuda() union_act_cls_loss, union_sub_reg_loss, union_obj_reg_loss, union_diff_reg_loss, \ inst_act_cls_loss, inst_obj_cls_loss, inst_obj_reg_loss = model(imgs, annot["instance"], annot["interaction"]) union_act_cls_loss = union_act_cls_loss.mean() union_sub_reg_loss = union_sub_reg_loss.mean() union_obj_reg_loss = union_obj_reg_loss.mean() union_diff_reg_loss = union_diff_reg_loss.mean() inst_act_cls_loss = inst_act_cls_loss.mean() inst_obj_cls_loss = inst_obj_cls_loss.mean() inst_obj_reg_loss = inst_obj_reg_loss.mean() union_loss = union_act_cls_loss + union_sub_reg_loss + union_obj_reg_loss + union_diff_reg_loss instance_loss = inst_act_cls_loss + inst_obj_cls_loss + inst_obj_reg_loss loss = union_loss + inst_act_cls_loss if loss == 0 or not torch.isfinite(loss): continue batch_loss = loss / opt.accumulate_batch batch_loss.backward() if (iter + 1) % opt.accumulate_batch == 0 or iter == len( training_generator) - 1: optimizer.step() optimizer.zero_grad() step += 1 loss = loss.item() union_loss = union_loss.item() instance_loss = instance_loss.item() epoch_loss.append(float(loss)) current_lr = optimizer.param_groups[0]['lr'] if step % opt.log_interval == 0: writer.add_scalars('Union Action Classification Loss', {'train': union_act_cls_loss}, step) writer.add_scalars('Union Subject Regression Loss', {'train': union_sub_reg_loss}, step) writer.add_scalars('Union Object Regression Loss', {'train': union_obj_reg_loss}, step) writer.add_scalars('Union Diff Regression Loss', {'train': union_diff_reg_loss}, step) writer.add_scalars( 'Instance Action Classification Loss', {'train': inst_act_cls_loss}, step) writer.add_scalars( 'Instance Object Classification Loss', {'train': inst_obj_cls_loss}, step) writer.add_scalars('Instance Regression Loss', {'train': inst_obj_reg_loss}, step) writer.add_scalars('Total Loss', {'train': loss}, step) writer.add_scalars('Union Loss', {'train': union_loss}, step) writer.add_scalars('Instance Loss', {'train': instance_loss}, step) # log learning_rate writer.add_scalar('learning_rate', current_lr, step) if iter % 20 == 0: end_time = time.time() print( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Union loss: {:.5f}. Instance loss: {:.5f}. ' ' Total loss: {:.5f}. Learning rate: {:.5f} Time: {:.2f}s' .format(step, epoch, opt.num_epochs, (iter + 1) // opt.accumulate_batch, num_iter_per_epoch, union_loss, instance_loss, loss, current_lr, end_time - start_time)) start_time = time.time() except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue # scheduler.step(np.mean(epoch_loss)) save_checkpoint(model, f'efficientdet-d{opt.compound_coef}_{epoch}.pth') print('checkpoint...') if epoch % opt.val_interval == 0: # model.eval() union_loss_ls = [] instance_loss_ls = [] union_act_cls_loss_ls = [] union_obj_cls_loss_ls = [] union_act_reg_loss_ls = [] union_sub_reg_loss_ls = [] union_obj_reg_loss_ls = [] union_diff_reg_loss_ls = [] inst_act_cls_loss_ls = [] inst_obj_cls_loss_ls = [] inst_obj_reg_loss_ls = [] val_loss = [] for iter, data in enumerate(val_generator): if (iter + 1) % 50 == 0: print("%d/%d" % (iter + 1, len(val_generator))) with torch.no_grad(): imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: imgs = imgs.cuda() for key in annot: annot[key] = annot[key].cuda() union_act_cls_loss, union_sub_reg_loss, union_obj_reg_loss, union_diff_reg_loss, \ inst_act_cls_loss, inst_obj_cls_loss, inst_obj_reg_loss = model(imgs, annot["instance"], annot["interaction"]) union_act_cls_loss = union_act_cls_loss.mean() union_sub_reg_loss = union_sub_reg_loss.mean() union_obj_reg_loss = union_obj_reg_loss.mean() union_diff_reg_loss = union_diff_reg_loss.mean() inst_act_cls_loss = inst_act_cls_loss.mean() inst_obj_cls_loss = inst_obj_cls_loss.mean() inst_obj_reg_loss = inst_obj_reg_loss.mean() union_loss = union_act_cls_loss + union_sub_reg_loss + union_obj_reg_loss + union_diff_reg_loss instance_loss = inst_act_cls_loss + inst_obj_cls_loss + inst_obj_reg_loss loss = union_loss + inst_act_cls_loss if loss == 0 or not torch.isfinite(loss): continue val_loss.append(loss.item()) union_act_cls_loss_ls.append(union_act_cls_loss.item()) union_sub_reg_loss_ls.append(union_sub_reg_loss.item()) union_obj_reg_loss_ls.append(union_obj_reg_loss.item()) union_diff_reg_loss_ls.append( union_diff_reg_loss.item()) # union_obj_cls_loss_ls.append(union_obj_cls_loss.item()) # union_act_reg_loss_ls.append(union_act_reg_loss.item()) inst_act_cls_loss_ls.append(inst_act_cls_loss.item()) inst_obj_cls_loss_ls.append(inst_obj_cls_loss.item()) inst_obj_reg_loss_ls.append(inst_obj_reg_loss.item()) union_loss_ls.append(union_loss.item()) instance_loss_ls.append(instance_loss.item()) union_loss = np.mean(union_loss_ls) instance_loss = np.mean(instance_loss_ls) union_act_cls_loss = np.mean(union_act_cls_loss_ls) union_sub_reg_loss = np.mean(union_sub_reg_loss_ls) union_obj_reg_loss = np.mean(union_obj_reg_loss_ls) union_diff_reg_loss = np.mean(union_diff_reg_loss_ls) inst_act_cls_loss = np.mean(inst_act_cls_loss_ls) inst_obj_cls_loss = np.mean(inst_obj_cls_loss_ls) inst_obj_reg_loss = np.mean(inst_obj_reg_loss_ls) loss = union_loss + inst_act_cls_loss print( 'Val. Epoch: {}/{}. Union loss: {:1.5f}. Instance loss: {:1.5f}. ' 'Total loss: {:1.5f}'.format(epoch, opt.num_epochs, union_loss, instance_loss, loss)) writer.add_scalars('Union Action Classification Loss', {'val': union_act_cls_loss}, step) writer.add_scalars('Union Subject Regression Loss', {'val': union_sub_reg_loss}, step) writer.add_scalars('Union Object Regression Loss', {'val': union_obj_reg_loss}, step) writer.add_scalars('Union Diff Regression Loss', {'val': union_diff_reg_loss}, step) writer.add_scalars('Instance Action Classification Loss', {'val': inst_act_cls_loss}, step) writer.add_scalars('Instance Object Classification Loss', {'val': inst_obj_cls_loss}, step) writer.add_scalars('Instance Regression Loss', {'val': inst_obj_reg_loss}, step) writer.add_scalars('Total Loss', {'val': loss}, step) writer.add_scalars('Union Loss', {'val': union_loss}, step) writer.add_scalars('Instance Loss', {'val': instance_loss}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{best_epoch}_best.pth' ) # model.train() # scheduler.step() scheduler.step(np.mean(val_loss)) if optimizer.param_groups[0]['lr'] < opt.lr / 100: break # Early stopping # if epoch - best_epoch > opt.es_patience > 0: # print('[Info] Stop training at epoch {}. The lowest loss achieved is {}'.format(epoch, loss)) # break except KeyboardInterrupt: # save_checkpoint(model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
def train(opt): params = Params(f'projects/{opt.project}.yml') global_validation_it = 0 if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': TUMuchTrafficDataset.collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': TUMuchTrafficDataset.collater, 'num_workers': opt.num_workers } advprop = opt.advprop if advprop: # for models using advprop pretrained weights normalize = transforms.Lambda( lambda mem: { "img": (mem["img"] * 2.0 - 1.0).astype(np.float32), "annot": mem["annot"] }) else: # for other models normalize = Normalizer(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) tfs = transforms.Compose([ TopCutter(886), transforms.RandomApply([Negate()], p=0.1), transforms.RandomApply([ContrastEnhancementWithNoiseReduction()], p=0.1), Resize(384), RandomCrop(384, 768), normalize, HorizontalFlip(prob=0.5), transforms.RandomApply([AddGaussianNoise(0, 2.55)], p=0.5), transforms.RandomApply([AddSaltAndPepperNoise(prob=0.0017)], p=0.5), ToTensor() ]) tfrecord_paths = [opt.data_path ] if opt.data_path.endswith(".tfrecord") else [ str(x.absolute()) for x in Path(opt.data_path).rglob('*.tfrecord') ] training_set = TUMuchTrafficDataset(tfrecord_paths=tfrecord_paths, transform=tfs) training_generator = DataLoader(training_set, **training_params) tfrecord_paths = [opt.data_path ] if opt.data_path.endswith(".tfrecord") else [ str(x.absolute()) for x in Path(opt.val_path).rglob('*.tfrecord') ] val_set = TUMuchTrafficDataset(tfrecord_paths=tfrecord_paths, transform=tfs) val_generator = DataLoader(val_set, **val_params) if not opt.load_backbone: load_weights = False else: load_weights = True model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales), load_weights=load_weights) pytorch_total_params = sum(p.numel() for p in model.parameters()) print("# Params: {:08d}".format(pytorch_total_params)) # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # freeze backbone (only efficientnet) if train no_effnet if opt.no_effnet: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print("# Training Parameters: {:06}".format(pytorch_total_params)) # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1e6, verbose=True) # use apex for mixed precision training # model, optimizer = amp.initialize(model, optimizer) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for it, data in enumerate(progress_bar): if it < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() global_validation_it += 1 optimizer.zero_grad() cls_loss, reg_loss = model(imgs, annot) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, opt.num_epochs, it + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) # sleep for 30 seconds, to reduce overheating import time time.sleep(30) if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for it, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() if it < 12: plot_tensorboard(imgs, annot, model, writer, global_validation_it, it, "") global_validation_it += 1 if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
def infer(self, image): img = np.array(image) img = img[:, :, ::-1] #rgb 2 bgr anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] threshold = 0.25 iou_threshold = 0.25 force_input_size = None use_cuda = False use_float16 = False cudnn.fastest = False cudnn.benchmark = False input_size = 512 ori_imgs, framed_imgs, framed_metas = preprocess(img, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=0, num_classes=len(self.labels), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(self.path, map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) pred = invert_affine(framed_metas, out) results = [] for i in range(len(ori_imgs)): if len(pred[i]['rois']) == 0: continue ori_imgs[i] = ori_imgs[i].copy() for j in range(len(pred[i]['rois'])): xt1, yt1, xbr, ybr = pred[i]['rois'][j].astype(np.float64) xt1 = float(xt1) yt1 = float(yt1) xbr = float(xbr) yb4 = float(ybr) obj = str(pred[i]['class_ids'][j]) obj_label = self.labels.get(obj) obj_score = str(pred[i]['scores'][j]) results.append({ "confidence": str(obj_score), "label": obj_label, "points": [xt1, yt1, xbr, ybr], "type": "rectangle", }) return results
if __name__ == '__main__': SET_NAME = params['val_set'] VAL_GT = f'datasets/{params["project_name"]}/annotations/instances_{SET_NAME}.json' VAL_IMGS = f'datasets/{params["project_name"]}/{SET_NAME}/' MAX_IMAGES = 10000 coco_gt = COCO(VAL_GT) image_ids = coco_gt.getImgIds()[:MAX_IMAGES] if override_prev_results or not os.path.exists( f'{SET_NAME}_bbox_results.json'): model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) model.load_state_dict( torch.load(weights_path, map_location=torch.device('cpu'))) model.requires_grad_(False) model.eval() if use_cuda: model.cuda(gpu) if use_float16: model.half() evaluate_coco(VAL_IMGS, SET_NAME, image_ids, coco_gt, model) _eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json')
class ObjectDetectionService(PTServingBaseService): def __init__(self, model_name, model_path): # effdet self.model_name = model_name self.model_path = os.path.join(os.path.dirname(__file__), 'models_best.pth') self.input_image_key = 'images' self.anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] self.anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] self.compound_coef = 0 self.threshold = 0.2 self.iou_threshold = 0.2 self.obj_list = [ '一次性快餐盒', '书籍纸张', '充电宝', '剩饭剩菜', '包', '垃圾桶', '塑料器皿', '塑料玩具', '塑料衣架', '大骨头', '干电池', '快递纸袋', '插头电线', '旧衣服', '易拉罐', '枕头', '果皮果肉', '毛绒玩具', '污损塑料', '污损用纸', '洗护用品', '烟蒂', '牙签', '玻璃器皿', '砧板', '筷子', '纸盒纸箱', '花盆', '茶叶渣', '菜帮菜叶', '蛋壳', '调料瓶', '软膏', '过期药物', '酒瓶', '金属厨具', '金属器皿', '金属食品罐', '锅', '陶瓷器皿', '鞋', '食用油桶', '饮料瓶', '鱼骨' ] self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = self.input_sizes[self.compound_coef] self.model = EfficientDetBackbone(compound_coef=self.compound_coef, num_classes=len(self.obj_list), ratios=self.anchor_ratios, scales=self.anchor_scales) self.model.load_state_dict( torch.load(self.model_path, map_location='cpu')) self.model.requires_grad_(False) self.model.eval() def _preprocess(self, data): preprocessed_data = {} for k, v in data.items(): for file_name, file_content in v.items(): ori_imgs, framed_imgs, framed_metas = preprocess( file_content, max_size=self.input_size) preprocessed_data[k] = [framed_imgs, framed_metas] return preprocessed_data def _inference(self, data): """ model inference function Here are a inference example of resnet, if you use another model, please modify this function """ framed_imgs, framed_metas = data[self.input_image_key] if torch.cuda.is_available(): x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) self.model = self.model.cuda() else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) #if use_float16: # model = model.half() with torch.no_grad(): features, regression, classification, anchors = self.model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, self.threshold, self.iou_threshold) out = invert_affine(framed_metas, out) result = OrderedDict() result['detection_classes'] = [] result['detection_scores'] = [] result['detection_boxes'] = [] for i in range(len(out)): if len(out[i]['rois']) == 0: continue for j in range(len(out[i]['rois'])): x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int) result['detection_boxes'].append([x1, y1, x2, y2]) obj = self.obj_list[out[i]['class_ids'][j]] result['detection_classes'].append(obj) score = float(out[i]['scores'][j]) result['detection_scores'].append(score) return result def _postprocess(self, data): return data def inference(self, data): ''' Wrapper function to run preprocess, inference and postprocess functions. Parameters ---------- data : map of object Raw input from request. Returns ------- list of outputs to be sent back to client. data to be sent back ''' pre_start_time = time.time() data = self._preprocess(data) infer_start_time = time.time() # Update preprocess latency metric pre_time_in_ms = (infer_start_time - pre_start_time) * 1000 logger.info('preprocess time: ' + str(pre_time_in_ms) + 'ms') if self.model_name + '_LatencyPreprocess' in MetricsManager.metrics: MetricsManager.metrics[self.model_name + '_LatencyPreprocess'].update(pre_time_in_ms) data = self._inference(data) infer_end_time = time.time() infer_in_ms = (infer_end_time - infer_start_time) * 1000 logger.info('infer time: ' + str(infer_in_ms) + 'ms') data = self._postprocess(data) # Update inference latency metric post_time_in_ms = (time.time() - infer_end_time) * 1000 logger.info('postprocess time: ' + str(post_time_in_ms) + 'ms') if self.model_name + '_LatencyInference' in MetricsManager.metrics: MetricsManager.metrics[self.model_name + '_LatencyInference'].update(post_time_in_ms) # Update overall latency metric if self.model_name + '_LatencyOverall' in MetricsManager.metrics: MetricsManager.metrics[self.model_name + '_LatencyOverall'].update(pre_time_in_ms + post_time_in_ms) logger.info('latency: ' + str(pre_time_in_ms + infer_in_ms + post_time_in_ms) + 'ms') data['latency_time'] = str( round(pre_time_in_ms + infer_in_ms + post_time_in_ms, 1)) + ' ms' return data
] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() # function for display def display(preds, imgs): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: return imgs[i] for j in range(len(preds[i]['rois'])): (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int) cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j])
def train(opt): params = Params(f'projects/{opt.project}.yml') params.num_gpus = 4 # opt.log_path = 'C:/Users/giang/Desktop/result_temp/' if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size * 4, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] # root_train = 'D:/Etri_tracking_data/Etri_full/train_1024/' # side_train = 'D:/Etri_tracking_data/Etri_full/train_Sejin_1024/' # ground_truth_train = 'D:/Etri_tracking_data/Etri_full/train_1024.txt' root_train = '/home/../../data3/giangData/train_1024/' side_train = '/home/../../data3/giangData/train_Sejin_1024/' ground_truth_train = '/home/../../data3/giangData/train_1024.txt' training_set = TobyCustom(root_dir=root_train, side_dir = side_train, \ annot_path = ground_truth_train, \ transform=ComposeAlb([Flip_X(), \ Flip_Y(), \ Equalize(), \ Brightness(), \ Constrast(), \ Resizer(input_sizes[opt.compound_coef], num_channels=3), \ Normalizer(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])) training_generator = DataLoader(training_set, **training_params) # root_val = 'D:/Etri_tracking_data/Etri_full/val_1024/' # side_val = 'D:/Etri_tracking_data/Etri_full/val_Sejin_1024/' # ground_truth_val = 'D:/Etri_tracking_data/Etri_full/val_1024.txt' root_val = '/home/../../data3/giangData/val_1024/' side_val = '/home/../../data3/giangData/val_Sejin_1024/' ground_truth_val = '/home/../../data3/giangData/val_1024.txt' val_set = TobyCustom(root_dir=root_val, side_dir = side_val, \ annot_path = ground_truth_val, \ transform=ComposeAlb([Resizer(input_sizes[opt.compound_coef], num_channels=3), Normalizer(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) from efficientdet.model import Classifier # model.backbone_net.model._conv_stem.conv = nn.Conv2d(4, 48, kernel_size=(3, 3), stride=(2, 2), bias=False) # model.classifier.header.pointwise_conv.conv = nn.Conv2d(224, 9, kernel_size=(1, 1), stride=(1, 1)) model.classifier = Classifier( in_channels=model.fpn_num_filters[opt.compound_coef], num_anchors=model.num_anchors, num_classes=1, num_layers=model.box_class_repeats[opt.compound_coef], pyramid_levels=model.pyramid_levels[opt.compound_coef]) # opt.load_weights = 'C:/Users/giang/Desktop/efficientdet-d4_107_15228_6.1788892433756875.pth' opt.load_weights = './../result_3channel_21/save/coco/efficientdet-d4_21_3000.pth' # block' # for EfficientNetB5, please test again with B4 # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) ''' ============================================ Modify model ''' # from efficientdet.model import Classifier # model.backbone_net.model._conv_stem.conv = nn.Conv2d(4, 48, kernel_size=(3, 3), stride=(2, 2), bias=False) # model.classifier.header.pointwise_conv.conv = nn.Conv2d(224, 9, kernel_size=(1, 1), stride=(1, 1)) # model.classifier = Classifier(in_channels=model.fpn_num_filters[opt.compound_coef], num_anchors=model.num_anchors, # num_classes=1, # num_layers=model.box_class_repeats[opt.compound_coef], # pyramid_levels=model.pyramid_levels[opt.compound_coef]) ''' ============================================= ''' # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] image_path = data['image_path'] # print(image_path) if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) print('\n') if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_loss = round(loss, 4) save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}_{loss}.pth' ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
class EfficientDet(object): obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] def __init__(self, weightfile, score_thresh, nms_thresh, is_xywh=True, use_cuda=True, use_float16=False): print('Loading weights from %s... Done!' % (weightfile)) # constants self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.use_cuda = use_cuda self.is_xywh = is_xywh compound_coef = 0 force_input_size = None # set None to use default size self.use_float16 = False cudnn.fastest = True cudnn.benchmark = True # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = input_sizes[compound_coef] if \ force_input_size is None else force_input_size # load model self.model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(self.obj_list)) # f'weights/efficientdet-d{compound_coef}.pth' self.model.load_state_dict(torch.load(weightfile)) self.model.requires_grad_(False) self.model.eval() if self.use_cuda: self.model = self.model.cuda() if self.use_float16: self.model = self.model.half() # Box self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() def __call__(self, imgs): # frame preprocessing _, framed_imgs, framed_metas = preprocess(imgs, max_size=self.input_size) if self.use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) dtype = torch.float32 if not self.use_float16 else torch.float16 x = x.to(dtype).permute(0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = self.model(x) out = postprocess(x, anchors, regression, classification, self.regressBoxes, self.clipBoxes, self.score_thresh, self.nms_thresh) # result out = invert_affine(framed_metas, out) if len(out) == 0: return None, None, None rois = [o['rois'] for o in out] scores = [o['scores'] for o in out] class_ids = [o['class_ids'] for o in out] if self.is_xywh: return xyxy_to_xywh(rois), scores, class_ids else: return rois, scores, class_ids
def train(args): print("Hi") present_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime()) params = Params(f'projects/eye.yml') os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.cuda.manual_seed(20) torch.cuda.manual_seed_all(20) np.random.seed(20) random.seed(20) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False prepare_dir(args, present_time) training_params = { 'batch_size': args.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': args.num_workers } val_params = { 'batch_size': args.batch_size, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': args.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=args.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) # load last weights ''' if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int(os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print(f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}') else: last_step = 0 print('[Info] initializing weights...') init_weights(model) ''' init_weights(model) # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model) model = model.cuda() if args.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), args.lr) else: optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=args.patience, verbose=True) # unit is epoch torch.save( { "model_state_dict": model.model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "scheduler_state_dict": scheduler.state_dict(), }, f"{args.saved_path}/init_weight.pth") k = 10 train_img_list = glob.glob(f"{args.dataset_path}/train/*") normal_img_list = [] yellow_img_list = [] for img in train_img_list: if (img.find('n_') != -1): normal_img_list.append(img) else: yellow_img_list.append(img) random.shuffle(normal_img_list) random.shuffle(yellow_img_list) normal_part_num = math.ceil(len(normal_img_list) / k) yellow_part_num = math.ceil(len(yellow_img_list) / k) last_acc = [] last_loss = [] for i in range(k): best_loss = 1e5 model.model.load_state_dict( torch.load(f"{args.saved_path}/init_weight.pth") ["model_state_dict"]) optimizer.load_state_dict( torch.load(f"{args.saved_path}/init_weight.pth") ["optimizer_state_dict"]) scheduler.load_state_dict( torch.load(f"{args.saved_path}/init_weight.pth") ["scheduler_state_dict"]) model.train() sub_train_img_list = normal_img_list[:i * normal_part_num] + normal_img_list[ (i + 1) * normal_part_num:] + yellow_img_list[:i * yellow_part_num] + yellow_img_list[ (i + 1) * yellow_part_num:] sub_test_img_list = normal_img_list[i * normal_part_num:( i + 1) * normal_part_num] + yellow_img_list[i * yellow_part_num: (i + 1) * yellow_part_num] random.shuffle(sub_train_img_list) random.shuffle(sub_test_img_list) print("---") for img in sub_test_img_list: print(img) print("---") train_anno_txt_path = f"{args.dataset_path}/train.txt" test_anno_txt_path = f"{args.dataset_path}/train.txt" train_transform = transforms.Compose( [ # Normalizer(mean=params.mean, std=params.std), Augmenter(), randomScaleWidth(), randomBlur(), randomBrightness(), randomHue(), randomSaturation(), Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[args.compound_coef]) ]) test_transform = transforms.Compose( [ # Normalizer(mean=params.mean, std=params.std), Augmenter(), Normalizer(mean=params.mean, std=params.std), Resizer(input_sizes[args.compound_coef]) ]) train_set = EyeDataset(sub_train_img_list, train_anno_txt_path, train_transform) test_set = EyeDataset(sub_test_img_list, test_anno_txt_path, test_transform) training_generator = DataLoader(train_set, **training_params) val_generator = DataLoader(test_set, **val_params) for epoch in range(args.epoch): model.train() total_correct = 0 total = 0 total_loss_ls = [] for data in training_generator: imgs = data['img'] annot = data['annot'] imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() reg_loss, cls_head_loss, cls_correct_num, total_num = model( imgs, annot, obj_list=params.obj_list) total_correct += cls_correct_num total += total_num reg_loss = reg_loss.mean() loss = cls_head_loss + reg_loss total_loss_ls.append(loss.item()) if loss == 0 or not torch.isfinite(loss): continue loss.backward() optimizer.step() total_loss = np.mean(total_loss_ls) scheduler.step(total_loss) with open(f'./logs/{present_time}/cv_log.txt', 'a') as fp: fp.write(f"Epoch: {i}/{epoch}/{args.epoch}\n") fp.write( f"Training loss: {total_loss:.6f} | acc: {total_correct / total * 100:.2f}\n" ) model.eval() with torch.no_grad(): total = 0 total_correct = 0 total_loss_ls = [] for data in val_generator: imgs = data['img'].cuda() annot = data['annot'].cuda() reg_loss, cls_head_loss, cls_correct_num, total_num = model( imgs, annot, obj_list=params.obj_list) total_correct += cls_correct_num total += total_num reg_loss = reg_loss.mean() loss = reg_loss + cls_head_loss total_loss_ls.append(loss.item()) total_loss = np.mean(total_loss_ls) with open(f'./logs/{present_time}/cv_log.txt', 'a') as fp: fp.write( f"Testing loss: {total_loss:.6f} | acc: {total_correct / total * 100:.2f}\n\n" ) if (epoch == args.epoch - 1): last_loss.append(total_loss) last_acc.append(total_correct / total * 100) with open(f'./logs/{present_time}/cv_log.txt', 'a') as fp: fp.write("\n===========\n\n") fp.write(f"Avg. loss: {np.mean(np.array(last_loss)):.2f}\n") fp.write(f"Avg. accuracy: {np.mean(np.array(last_acc)):.2f}\n")