def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--model_path', help='Path to model', type=str) parser = parser.parse_args(args) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) dataset_val.image_ids = dataset_val.image_ids[:50] # TEST # Create the model retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet) retinanet.training = False retinanet.eval() retinanet.module.freeze_bn() coco_eval.evaluate_coco(dataset_val, retinanet)
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--model_path', help='Path to model', type=str) parser = parser.parse_args(args) model_path = './model_final.pt' test_path = './test' dataset_test = AIZOODataset(test_path, transforms=transforms.Compose([Normalizer(), Resizer()])) # Create the model retinanet = model.resnet50(num_classes=3, pretrained=False) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.load(model_path) #retinanet.load_state_dict(checkpoint.module) retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet.load_state_dict(torch.load(model_path)) retinanet = torch.nn.DataParallel(retinanet) retinanet.training = False retinanet.eval() #retinanet.freeze_bn() coco_eval.evaluate_coco(dataset_test, retinanet)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--model_path', help='Path to the model file.') parser.add_argument('--configfile', help='Path to the config file.') parser.add_argument('--model_out_path', help='Path to the output model file') parser = parser.parse_args(args) configs = configparser.ConfigParser() configs.read(parser.configfile) try: depth = int(configs['TRAINING']['depth']) num_classes = int(configs['TRAINING']['num_classes']) try: ratios = json.loads(configs['MODEL']['ratios']) scales = json.loads(configs['MODEL']['scales']) except Exception as e: print(e) print('USING DEFAULT RATIOS AND SCALES') ratios = None scales = None except Exception as e: print(e) print( 'CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt' ) sys.exit() # Create the model if depth == 18: retinanet = model.resnet18(num_classes=num_classes, pretrained=False, ratios=ratios, scales=scales) elif depth == 50: retinanet = model.resnet50(num_classes=num_classes, pretrained=True, ratios=ratios, scales=scales) else: print(f"DEPTH FROM : {parser.configfile} INACCURATE. MUST BE 18 or 50") sys.exit(0) if torch.cuda.is_available(): retinanet = retinanet.cuda() retinanet.load_state_dict(torch.load(parser.model_path)) else: retinanet.load_state_dict( torch.load(parser.model_path, map_location=torch.device('cpu'))) torch.save(retinanet.state_dict(), parser.model_out_path, _use_new_zipfile_serialization=False)
def Model(self, model_name="resnet18", gpu_devices=[0]): ''' User function: Set Model parameters Available Models resnet18 resnet34 resnet50 resnet101 resnet152 Args: model_name (str): Select model from available models gpu_devices (list): List of GPU Device IDs to be used in training Returns: None ''' num_classes = self.system_dict["local"]["dataset_train"].num_classes() if model_name == "resnet18": retinanet = model.resnet18(num_classes=num_classes, pretrained=True) elif model_name == "resnet34": retinanet = model.resnet34(num_classes=num_classes, pretrained=True) elif model_name == "resnet50": retinanet = model.resnet50(num_classes=num_classes, pretrained=True) elif model_name == "resnet101": retinanet = model.resnet101(num_classes=num_classes, pretrained=True) elif model_name == "resnet152": retinanet = model.resnet152(num_classes=num_classes, pretrained=True) if self.system_dict["params"]["use_gpu"]: self.system_dict["params"]["gpu_devices"] = gpu_devices if len(self.system_dict["params"]["gpu_devices"]) == 1: os.environ["CUDA_VISIBLE_DEVICES"] = str( self.system_dict["params"]["gpu_devices"][0]) else: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([ str(id) for id in self.system_dict["params"]["gpu_devices"] ]) self.system_dict["local"][ "device"] = 'cuda' if torch.cuda.is_available() else 'cpu' retinanet = retinanet.to(self.system_dict["local"]["device"]) retinanet = torch.nn.DataParallel(retinanet).to( self.system_dict["local"]["device"]) retinanet.training = True retinanet.train() retinanet.module.freeze_bn() self.system_dict["local"]["model"] = retinanet
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--csv_annotations_path', help='Path to CSV annotations') parser.add_argument('--model_path', help='Path to model', type=str) parser.add_argument('--images_path', help='Path to images directory', type=str) parser.add_argument('--class_list_path', help='Path to classlist csv', type=str) parser.add_argument('--iou_threshold', help='IOU threshold used for evaluation', type=str, default='0.5') parser = parser.parse_args(args) #dataset_val = CocoDataset(parser.coco_path, set_name='val2017',transform=transforms.Compose([Normalizer(), Resizer()])) dataset_val = CSVDataset(parser.csv_annotations_path, parser.class_list_path, transform=transforms.Compose( [Normalizer(), Resizer()])) # Create the model #retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) config = dict({"scales": None, "ratios": None}) config = load_config("config2.yaml", config) retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=False, ratios=config["ratios"], scales=config["scales"]) retinanet, _, _ = load_ckpt(parser.model_path, retinanet) use_gpu = True if use_gpu: print("Using GPU for validation process") if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet.cuda()) else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = False retinanet.eval() retinanet.module.freeze_bn() print( csv_eval.evaluate(dataset_val, retinanet, score_threshold=0.4, iou_threshold=float(parser.iou_threshold)))
def export( checkpoint: str, output_path, num_classes: Optional[int] = 1, model_arch: Optional[str] = "resnet-50", input_size: Optional[Tuple[int, int]] = (512, 512), batch_size: Optional[int] = 1, verbose: Optional[bool] = False, ): assert output_path.endswith( ".onnx"), "`output_path` must be path to the output `onnx` file" if model_arch == "resnet-18": net = model.resnet18(num_classes) elif model_arch == "resnet-34": net = model.resnet34(num_classes) elif model_arch == "resnet-50": net = model.resnet50(num_classes) elif model_arch == "resnet-101": net = model.resnet101(num_classes) elif model_arch == "resnet-152": net = model.resnet152(num_classes) else: raise NotImplementedError device = torch.device( "cuda:0") if torch.cuda.is_available() else torch.device("cpu") logger.info(f"using device: {device}") net = net.to(device) state_dict = torch.load(checkpoint, map_location=device) state_dict = remove_module(state_dict) net.load_state_dict(state_dict) logger.info(f"successfully loaded saved checkpoint.") dummy_input = torch.randn(batch_size, 3, input_size[0], input_size[1]) net.eval() net.export = True dummy_input = dummy_input.to(device) logger.info(f"exporting to {output_path}...") torch.onnx.export( net, dummy_input, output_path, opset_version=11, verbose=verbose, input_names=["input"], output_names=["anchors", "classification", "regression"], ) logger.info("export complete")
def load_model(model_path, configfile, no_nms=False): configs = configparser.ConfigParser() configs.read(configfile) try: depth = int(configs['TRAINING']['depth']) input_shape = json.loads(configs['MODEL']['input_shape']) num_classes = int(configs['TRAINING']['num_classes']) try: ratios = json.loads(configs['MODEL']['ratios']) scales = json.loads(configs['MODEL']['scales']) except Exception as e: print(e) print('USING DEFAULT RATIOS AND SCALES') ratios = None scales = None except Exception as e: print(e) print('CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt') sys.exit() # Create the model if depth == 18: retinanet = model.resnet18(num_classes=num_classes, pretrained=False, ratios=ratios, scales=scales, no_nms=no_nms) elif depth == 50: retinanet = model.resnet50(num_classes=num_classes, pretrained=True, ratios=ratios, scales=scales, no_nms=no_nms) else: print(f"DEPTH FROM : {configfile} INACCURATE. MUST BE 18 or 50") sys.exit(0) if torch.cuda.is_available(): retinanet = retinanet.cuda() retinanet.load_state_dict(torch.load(model_path)) else: retinanet.load_state_dict(torch.load(model_path, map_location=torch.device('cpu'))) retinanet.training = False retinanet.eval() return retinanet
def Model(self, model_name="resnet18", gpu_devices=[0]): num_classes = self.system_dict["local"]["dataset_train"].num_classes() if model_name == "resnet18": retinanet = model.resnet18(num_classes=num_classes, pretrained=True) elif model_name == "resnet34": retinanet = model.resnet34(num_classes=num_classes, pretrained=True) elif model_name == "resnet50": retinanet = model.resnet50(num_classes=num_classes, pretrained=True) elif model_name == "resnet101": retinanet = model.resnet101(num_classes=num_classes, pretrained=True) elif model_name == "resnet152": retinanet = model.resnet152(num_classes=num_classes, pretrained=True) if self.system_dict["params"]["use_gpu"]: self.system_dict["params"]["gpu_devices"] = gpu_devices if len(self.system_dict["params"]["gpu_devices"]) == 1: os.environ["CUDA_VISIBLE_DEVICES"] = str( self.system_dict["params"]["gpu_devices"][0]) else: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([ str(id) for id in self.system_dict["params"]["gpu_devices"] ]) self.system_dict["local"][ "device"] = 'cuda' if torch.cuda.is_available() else 'cpu' retinanet = retinanet.to(self.system_dict["local"]["device"]) retinanet = torch.nn.DataParallel(retinanet).to( self.system_dict["local"]["device"]) retinanet.training = True retinanet.train() retinanet.module.freeze_bn() self.system_dict["local"]["model"] = retinanet
def main(args=None): sys.argv.append('--coco_path') sys.argv.append( '/home/jht/github/deep-high-resolution-net.pytorch/data/coco') sys.argv.append('--model_path') sys.argv.append('coco_resnet_50_map_0_335_state_dict.pt') parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--model_path', help='Path to model', type=str) parser = parser.parse_args(args) retinanet = model.resnet50(num_classes=80, pretrained=True) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet) retinanet.training = False retinanet.eval() retinanet.module.freeze_bn() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) # 初始化,并读取第一帧,rval表示是否成功获取帧,frame是捕获到的图像 vc = cv2.VideoCapture('/home/jht/16_2.MP4') rval, frame = vc.read() # 获取视频fps fps = vc.get(cv2.CAP_PROP_FPS) # 获取视频总帧数 frame_all = vc.get(cv2.CAP_PROP_FRAME_COUNT) print("[INFO] 视频FPS: {}".format(fps)) print("[INFO] 视频总帧数: {}".format(frame_all)) print("[INFO] 视频时长: {}s".format(frame_all / fps)) mean = np.array([[[0.485, 0.456, 0.406]]]) std = np.array([[[0.229, 0.224, 0.225]]]) while rval: with torch.no_grad(): st = time.time() rval, img_bgr = vc.read() img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) img_rgb = img_rgb.astype(np.float32) / 255.0 frame = (img_rgb - mean) / std rows, cols, cns = frame.shape pad_w = 32 - rows % 32 pad_h = 32 - cols % 32 rows = rows + pad_w cols = cols + pad_h new_image = cv2.resize(frame, (cols, rows)) img = torch.from_numpy(new_image) if torch.cuda.is_available(): scores, labels, boxes = retinanet( img.permute(2, 0, 1).cuda().float().unsqueeze(dim=0)) else: scores, labels, boxes = retinanet( img.permute(2, 0, 1).float().unsqueeze(dim=0)) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores.cpu() > 0.7) tensor = img * std + mean img = tensor.mul(255).clamp(0, 255).byte().cpu().numpy() img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for j in range(idxs[0].shape[0]): bbox = boxes[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) print(int(labels[idxs[0][j]])) if int(labels[idxs[0][j]]) == 0: draw_caption(img, (x1, y1, x2, y2), "person") cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) cv2.imshow('img', img) cv2.waitKey(0)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='csv') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)', default='data/train_retinanet.csv') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)', default='data/class_retinanet.csv') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)', default='data/val_retinanet.csv') parser.add_argument('--model_path', default='coco_resnet_50_map_0_335_state_dict.pt', help='Path to file containing pretrained retinanet') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs_detection', help='Number of epochs for detection', type=int, default=50) parser.add_argument('--epochs_classification', help='Number of epochs for classification', type=int, default=50) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if parser.model_path is not None: print('loading ', parser.model_path) if 'coco' in parser.model_path: retinanet.load_state_dict(torch.load(parser.model_path), strict=False) else: retinanet = torch.load(parser.model_path) print('Pretrained model loaded!') if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) #Here training the detection retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) loss_style_classif = nn.CrossEntropyLoss() retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) mAP_list = [] mAPbest = 0 for epoch_num in range(parser.epochs_detection): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): [classification_loss, regression_loss], style = retinanet( [data['img'].cuda().float(), data['annot']]) else: [classification_loss, regression_loss ], style = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() if torch.cuda.is_available(): style_loss = loss_style_classif( style, torch.tensor(data['style']).cuda()) else: style_loss = loss_style_classif( style, torch.tensor(data['style'])) loss = classification_loss + regression_loss + style_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.4f} | Regression loss: {:1.4f} | Style loss: {:1.4f} | Running loss: {:1.4f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), float(style_loss), np.mean(loss_hist))) del classification_loss del regression_loss del style_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAPclasses, mAP, accu = csv_eval.evaluate(dataset_val, retinanet) mAP_list.append(mAP) print('mAP_list', mAP_list) if mAP > mAPbest: print('Saving best checkpoint') torch.save(retinanet, 'model_best.pt') mAPbest = mAP scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt') # Here we aggregate all the data to don't have to appy the Retinanet during training. retinanet.load_state_dict(torch.load('model_best.pt').state_dict()) List_feature = [] List_target = [] retinanet.training = False retinanet.eval() retinanet.module.style_inference = True retinanet.module.freeze_bn() epoch_loss = [] with torch.no_grad(): for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): _, _, feature_vec = retinanet(data['img'].cuda().float()) else: _, _, feature_vec = retinanet(data['img'].float()) List_feature.append(torch.squeeze(feature_vec).cpu()) List_target.append(data['style'][0]) except Exception as e: print(e) continue print('END of preparation of the data for classification of style') # Here begins Style training. Need to set to style_train. They are using the same loader, as it was expected to train both at the same time. batch_size_classification = 64 dataloader_train_style = torch.utils.data.DataLoader( StyleDataset(List_feature, List_target), batch_size=batch_size_classification) retinanet.load_state_dict(torch.load('model_best.pt').state_dict()) # Here training the detection retinanet.module.style_inference = False retinanet.module.style_train(True) retinanet.training = True retinanet.train() optimizer = optim.Adam( retinanet.module.styleClassificationModel.parameters(), lr=5e-3, weight_decay=1e-3) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) loss_style_classif = nn.CrossEntropyLoss() retinanet.train() retinanet.module.freeze_bn() criterion = nn.CrossEntropyLoss() accu_list = [] accubest = 0 for epoch_num in range(parser.epochs_classification): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] total = 0 correct = 0 for iter_num, data in enumerate(dataloader_train_style): try: optimizer.zero_grad() inputs, targets = data if torch.cuda.is_available(): inputs, targets = inputs.cuda(), targets.cuda() outputs = retinanet.module.styleClassificationModel( inputs, 0, 0, 0, True) loss = criterion(outputs, targets) loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) total += targets.size(0) _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum() print( '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % (epoch_num, parser.epochs_classification, iter_num + 1, (len(dataloader_train_style) // batch_size_classification) + 1, loss.item(), 100. * correct / total)) except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAPclasses, mAP, accu = csv_eval.evaluate(dataset_val, retinanet) accu_list.append(accu) print('mAP_list', mAP_list, 'accu_list', accu_list) if accu > accubest: print('Saving best checkpoint') torch.save(retinanet.module, 'model_best_classif.pt') accubest = accu scheduler.step(accu) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet.module, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model_save_path', help='Path to save model', type=str) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # add draw tensorboard code writer = SummaryWriter(log_dir='./logs/416*416/', flush_secs=60) # if Cuda: # graph_inputs = torch.from_numpy(np.random.rand(1, 3, input_shape[0], input_shape[1])).type( # torch.FloatTensor).cuda() # else: # graph_inputs = torch.from_numpy(np.random.rand(1, 3, input_shape[0], input_shape[1])).type(torch.FloatTensor) # writer.add_graph(model, (graph_inputs,)) # add gap save model count variable n = 0 for epoch_num in range(parser.epochs): n += 1 retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] ### begin calculate train loss for iter_num, data in enumerate(dataloader_train): # try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss # except Exception as e: # print(e) # continue ### begin calculate valid loss for iter_num, data in enumerate(dataloader_val): # try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss_hist.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Valid-Classification loss: {:1.5f} | Valid-Regression loss: {:1.5f} | Running Valid loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) print('Epoch: {} | mAP: {:.3f}'.format(epoch_num, float(mAP))) scheduler.step(np.mean(epoch_loss)) if n % 10 == 0: torch.save( retinanet.module, parser.model_save_path + '/' + '{}_retinanet_{}_{:.3f}.pt'.format( parser.dataset, epoch_num, mAP)) retinanet.eval() torch.save(retinanet, parser.model_save_path + '/' + 'model_final.pt')
def __init__(self, detection_model_path, score_model_path, private_key, device_name='cpu', backend='pytorch', det_vino_device=DETECTION_VINO_DEVICE, score_vino_device=SCORE_VINO_DEVICE): self.detection_model_path = detection_model_path self.score_model_path = score_model_path self.private_key = private_key self.device_name = device_name self.backend = backend root_core_path, _ = os.path.split(os.path.abspath(__file__)) self.font = ImageFont.truetype( os.path.join(root_core_path, 'resource/arial.ttf'), 24) # TODO : normalization self.detect_trans = detection_aug.Compose([ detection_aug.Pad(), detection_aug.Resize(IMAGE_SIZE, IMAGE_SIZE), detection_aug.AutoLevel(min_level_rate=1, max_level_rate=1), detection_aug.AutoContrast(), detection_aug.Contrast(1.25), detection_aug.ToTensor() ]) self.score_trans = transforms.Compose([ score_aug.AutoLevel(), score_aug.AutoContrast(), score_aug.Contrast(contrast=1.2), score_aug.Pad(), # pad to square transforms.ToPILImage(), transforms.Resize(IMAGE_SIZE if self.backend == 'pytorch' else VINO_CROP_SIZE), transforms.ToTensor() ]) if self.backend == 'pytorch': self.device = torch.device(self.device_name) # TODO : decrypt model with private_key self.detection_net = detection_model.resnet50( num_classes=NUM_CLASSES) self.score_net = score_model.resnet50(num_classes=1) self.detection_net.load_state_dict( torch.load(self.detection_model_path)) self.score_net.load_state_dict(torch.load(self.score_model_path)) self.detection_net.to(self.device) self.score_net.to(self.device) elif self.backend == 'openvino': # IR expects BGR, but our transform pipeline exports RGB # remember to convert model with --reverse_input_channels parameter # our normalization is implemented in transform # so do NOT specify --scale_values, --mean_values # after ToTensor(), we got (n, c, h, w) tensor so .numpy() should be ok from openvino.inference_engine import IECore self.ie = IECore() self.detection_model_bin = os.path.splitext( self.detection_model_path)[0] + '.bin' self.score_model_bin = os.path.splitext( self.score_model_path)[0] + '.bin' self.detection_net = self.ie.read_network( self.detection_model_path, self.detection_model_bin) self.score_net = self.ie.read_network(self.score_model_path, self.score_model_bin) self.detection_input_layer = next(iter(self.detection_net.inputs)) self.detection_output_layers = sorted( iter(self.detection_net.outputs)) self.score_input_layer = next(iter(self.score_net.inputs)) self.score_output_layer = next(iter(self.score_net.outputs)) self.detection_exec_model = self.ie.load_network( self.detection_net, det_vino_device) self.score_exec_models = [] if score_vino_device == 'MULTI': for dev in self.ie.available_devices: if 'MYRIAD' in dev: self.score_exec_models.append( self.ie.load_network(self.score_net, dev)) print('det device: {}, score MYRIAD device(s): {}'.format( det_vino_device, len(self.score_exec_models))) else: self.score_exec_model = self.ie.load_network( self.score_net, score_vino_device) print('det device: {}, score device: {}'.format( det_vino_device, score_vino_device)) self.detection_post_processor = DetectionPostProcessor( NMS_THRESHOLD, SCORE_THRESHOLD) else: print('unknown backend {}'.format(self.backend))
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', default='csv', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', default='dataset/pascal_train.csv', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', default='dataset/classes.csv', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', default='dataset/pascal_val.csv', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--weights_folder', help='path to save weight', type=str, required=True) parser = parser.parse_args(args) if not os.path.exists(parser.weights_folder): os.makedirs(parser.weights_folder) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=5, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=8, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # import ipdb; ipdb.set_trace() for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] total_loss = 0 total_regression_loss = 0 total_classification_loss = 0 with tqdm(dataloader_train, unit="batch") as tepoch: for data in tepoch: # for iter_num, data in tepoch:#enumerate(dataloader_train): tepoch.set_description(f"Epoch {epoch_num}") try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss total_loss = total_loss + loss total_regression_loss = total_regression_loss + regression_loss total_classification_loss = total_classification_loss + classification_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # print( # 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( # epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) tepoch.set_postfix(cls_loss="{:1.5f}".format(classification_loss), reg_loss="{:1.5f}".format(regression_loss)) time.sleep(0.1) del classification_loss del regression_loss except Exception as e: print(e) continue tb.add_scalar('Training loss', total_loss, epoch_num) tb.add_scalar('Training regression loss', total_regression_loss, epoch_num) tb.add_scalar('Training accuracy loss', total_classification_loss, epoch_num) if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}/{}_retinanet_{}.pt'.format(parser.weights_folder,parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, '{}/model_final.pt'.format(parser.weights_folder))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') # parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--dataset_root', default='/root/data/VOCdevkit/', help= 'Dataset root directory path [/root/data/VOCdevkit/, /root/data/coco/, /root/data/FLIR_ADAS]' ) parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--batch_size', default=16, type=int, help='Batch size for training') parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--lr', '--learning_rate', default=1e-4, type=float, help='initial learning rate') parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay') parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument("--log", default=False, action="store_true", help="Write log file.") parser = parser.parse_args(args) network_name = 'RetinaNet-Res{}'.format(parser.depth) # print('network_name:', network_name) net_logger = logging.getLogger('Network Logger') formatter = logging.Formatter(LOGGING_FORMAT) streamhandler = logging.StreamHandler() streamhandler.setFormatter(formatter) net_logger.addHandler(streamhandler) if parser.log: net_logger.setLevel(logging.INFO) # logging.basicConfig(level=logging.DEBUG, format=LOGGING_FORMAT, # filename=os.path.join('log', '{}.log'.format(network_name)), filemode='a') filehandler = logging.FileHandler(os.path.join( 'log', '{}.log'.format(network_name)), mode='a') filehandler.setFormatter(formatter) net_logger.addHandler(filehandler) net_logger.info('Network Name: {:>20}'.format(network_name)) # Create the data loaders if parser.dataset == 'coco': if parser.dataset_root is None: raise ValueError( 'Must provide --dataset_root when training on COCO,') dataset_train = CocoDataset(parser.dataset_root, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.dataset_root, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'FLIR': if parser.dataset_root is None: raise ValueError( 'Must provide --dataset_root when training on FLIR,') _scale = 1.2 dataset_train = FLIRDataset(parser.dataset_root, set_name='train', transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer(min_side=int(512 * _scale), max_side=int(640 * _scale), logger=net_logger) ])) dataset_val = FLIRDataset(parser.dataset_root, set_name='val', transform=transforms.Compose([ Normalizer(), Resizer(min_side=int(512 * _scale), max_side=int(640 * _scale)) ])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be FLIR, COCO or csv), exiting.' ) # Original RetinaNet code # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) # dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) # if dataset_val is not None: # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) # dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) dataloader_train = DataLoader(dataset_train, batch_size=parser.batch_size, num_workers=parser.workers, shuffle=True, collate_fn=collater, pin_memory=True) dataloader_val = DataLoader(dataset_val, batch_size=1, num_workers=parser.workers, shuffle=False, collate_fn=collater, pin_memory=True) build_param = {'logger': net_logger} if parser.resume is not None: net_logger.info('Loading Checkpoint : {}'.format(parser.resume)) retinanet = torch.load(parser.resume) s_b = parser.resume.rindex('_') s_e = parser.resume.rindex('.') start_epoch = int(parser.resume[s_b + 1:s_e]) + 1 net_logger.info('Continue on {} Epoch'.format(start_epoch)) else: # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, **build_param) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, **build_param) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True, **build_param) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True, **build_param) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True, **build_param) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') start_epoch = 0 use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True net_logger.info('Weight Decay : {}'.format(parser.weight_decay)) net_logger.info('Learning Rate : {}'.format(parser.lr)) # optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr, weight_decay=parser.weight_decay) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() # print('Num training images: {}'.format(len(dataset_train))) net_logger.info('Num Training Images: {}'.format(len(dataset_train))) for epoch_num in range(start_epoch, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() # print(data['img'][0,:,:,:].shape) # print(data['annot']) if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if (iter_num % 10 == 0): _log = 'Epoch: {} | Iter: {} | Class loss: {:1.5f} | BBox loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)) net_logger.info(_log) del classification_loss del regression_loss except Exception as e: print(e) continue if (epoch_num + 1) % 1 == 0: test(dataset_val, retinanet, epoch_num, parser, net_logger) # if parser.dataset == 'coco': # print('Evaluating dataset') # coco_eval.evaluate_coco(dataset_val, retinanet) # elif parser.dataset == 'csv' and parser.csv_val is not None: # print('Evaluating dataset') # mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) print('Learning Rate:', str(scheduler._last_lr)) torch.save( retinanet.module, os.path.join( 'saved', '{}_{}_{}.pt'.format(parser.dataset, network_name, epoch_num))) retinanet.eval() torch.save(retinanet, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description="Simple training script for training a RetinaNet network." ) parser.add_argument("--dataset", help="Dataset type, must be one of csv or coco.") parser.add_argument("--model", default=None, help="Path to trained model") parser.add_argument("--coco_path", help="Path to COCO directory") parser.add_argument( "--csv_train", help="Path to file containing training annotations (see readme)" ) parser.add_argument( "--csv_classes", help="Path to file containing class list (see readme)" ) parser.add_argument( "--csv_val", help="Path to file containing validation annotations (optional, see readme)", ) parser.add_argument( "--depth", help="Resnet depth, must be one of 18, 34, 50, 101, 152", type=int, default=50, ) parser.add_argument("--epochs", help="Number of epochs", type=int, default=100) parser.add_argument( "--result_dir", default="results", help="Path to store training results", type=str, ) parser.add_argument( "--batch_num", default=8, help="Number of samples in a batch", type=int ) parser = parser.parse_args(args) print(parser) # parameters BATCH_SIZE = parser.batch_num IMAGE_MIN_SIDE = 1440 IMAGE_MAX_SIDE = 2560 # Create the data loaders if parser.dataset == "coco": if parser.coco_path is None: raise ValueError("Must provide --coco_path when training on COCO,") # TODO: parameterize arguments for Resizer, and other transform functions # resizer: min_side=608, max_side=1024 dataset_train = CocoDataset( parser.coco_path, # set_name="train2017", set_name="train_images_full", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer(passthrough=True),] ), ) dataset_val = CocoDataset( parser.coco_path, # set_name="val2017", set_name="val_images_full", transform=transforms.Compose([Normalizer(), Resizer(passthrough=True),]), ) elif parser.dataset == "csv": if parser.csv_train is None: raise ValueError("Must provide --csv_train when training on COCO,") if parser.csv_classes is None: raise ValueError("Must provide --csv_classes when training on COCO,") dataset_train = CSVDataset( train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]), ) if parser.csv_val is None: dataset_val = None print("No validation annotations provided.") else: dataset_val = CSVDataset( train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]), ) else: raise ValueError("Dataset type not understood (must be csv or coco), exiting.") sampler = AspectRatioBasedSampler( dataset_train, batch_size=BATCH_SIZE, drop_last=False ) dataloader_train = DataLoader( dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler ) if dataset_val is not None: sampler_val = AspectRatioBasedSampler( dataset_val, batch_size=BATCH_SIZE, drop_last=False ) dataloader_val = DataLoader( dataset_val, num_workers=16, collate_fn=collater, batch_sampler=sampler_val ) # Create the model if parser.depth == 18: retinanet = model.resnet18( num_classes=dataset_train.num_classes(), pretrained=True ) elif parser.depth == 34: retinanet = model.resnet34( num_classes=dataset_train.num_classes(), pretrained=True ) elif parser.depth == 50: retinanet = model.resnet50( num_classes=dataset_train.num_classes(), pretrained=True ) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True ) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True ) else: raise ValueError("Unsupported model depth, must be one of 18, 34, 50, 101, 152") if parser.model: retinanet = torch.load(parser.model) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=3, verbose=True ) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print("Num training images: {}".format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] p_bar = tqdm(dataloader_train) for iter_num, data in enumerate(p_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data["img"].cuda().float(), data["annot"]] ) else: classification_loss, regression_loss = retinanet( [data["img"].float(), data["annot"]] ) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) mean_loss = np.mean(loss_hist) p_bar.set_description( f"Epoch: {epoch_num} | Iteration: {iter_num} | " f"Class loss: {float(classification_loss.item()):.5f} | " f"Regr loss: {float(regression_loss.item()):.5f} | " f"Running loss: {mean_loss:.5f}" ) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == "coco": print("Evaluating dataset") coco_eval.evaluate_coco( dataset_val, retinanet, result_dir=parser.result_dir ) elif parser.dataset == "csv" and parser.csv_val is not None: print("Evaluating dataset") mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) # TODO: Fix string formating mix (adopt homogeneous format) torch.save( retinanet.module, f"{parser.result_dir}/" + "{}_retinanet_{}.pt".format(parser.dataset, epoch_num), ) retinanet.eval() torch.save(retinanet, "model_final.pt")
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='show') parser.add_argument('--coco_path', help='Path to COCO directory', default='/mnt/marathon') parser.add_argument('--image_size', help='image size', type=int, nargs=2, default=IMAGE_SIZE) parser.add_argument('--limit', help='limit', type=int, nargs=2, default=(0, 0)) parser.add_argument('--batch_size', help='batch size', type=int, default=BATCH_SIZE) parser.add_argument('--num_works', help='num works', type=int, default=NUM_WORKERS) parser.add_argument('--num_classes', help='num classes', type=int, default=3) parser.add_argument('--merge_val', help='merge_val', type=int, default=MERGE_VAL) parser.add_argument('--do_aug', help='do_aug', type=int, default=DO_AUG) parser.add_argument('--lr_choice', default=LR_CHOICE, choices=['lr_scheduler', 'lr_map', 'lr_fn'], type=str) parser.add_argument('--lr', help='lr', type=float, default=LR) parser.add_argument("--lr_map", dest="lr_map", action=StoreDictKeyPair, default=LR_MAP) parser.add_argument("--lr_fn", dest="lr_fn", action=StoreDictKeyPair, default=LR_FN) parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=DEPTH) parser.add_argument('--epochs', help='Number of epochs', type=int, default=EPOCHS) parser = parser.parse_args(args) print('dataset:', parser.dataset) print('depth:', parser.depth) print('epochs:', parser.epochs) print('image_size:', parser.image_size) print('batch_size:', parser.batch_size) print('num_works:', parser.num_works) print('merge_val:', parser.merge_val) print('do_aug:', parser.do_aug) print('lr_choice:', parser.lr_choice) print('lr:', parser.lr) print('lr_map:', parser.lr_map) print('lr_fn:', parser.lr_fn) print('num_classes:', parser.num_classes) print('limit:', parser.limit) # Create the data loaders # dataset_train, _ = torch.utils.data.random_split(dataset_train, [NUM_COCO_DATASET_TRAIN, len(dataset_train) - NUM_COCO_DATASET_TRAIN]) # dataset_val, _ = torch.utils.data.random_split(dataset_val, [NUM_COCO_DATASET_VAL, len(dataset_val) - NUM_COCO_DATASET_VAL]) transform_train = None transform_vail = None collate_fn = None if parser.do_aug: transform_train = get_augumentation('train', parser.image_size[0], parser.image_size[1]) transform_vail = get_augumentation('test', parser.image_size[0], parser.image_size[1]) collate_fn = detection_collate else: transform_train = transforms.Compose([ # Normalizer(), # Augmenter(), Resizer(*parser.image_size)]) transform_vail = transforms.Compose([ # Normalizer(), Resizer(*parser.image_size)]) collate_fn = collater if parser.dataset == 'h5': dataset_train = H5CoCoDataset('{}/train_small.hdf5'.format(parser.coco_path), 'train_small') dataset_val = H5CoCoDataset('{}/test.hdf5'.format(parser.coco_path), 'test') else: dataset_train = CocoDataset(parser.coco_path, set_name='train_small', do_aug=parser.do_aug, transform=transform_train, limit_len=parser.limit[0]) dataset_val = CocoDataset(parser.coco_path, set_name='test', do_aug=parser.do_aug, transform=transform_vail, limit_len=parser.limit[1]) # 混合val if parser.merge_val: dataset_train += dataset_val print('training images: {}'.format(len(dataset_train))) print('val images: {}'.format(len(dataset_val))) steps_pre_epoch = len(dataset_train) // parser.batch_size print('steps_pre_epoch:', steps_pre_epoch) sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, batch_size=1, num_workers=parser.num_works, shuffle=False, collate_fn=collate_fn, batch_sampler=sampler) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=parser.num_classes, pretrained=PRETRAINED) elif parser.depth == 34: retinanet = model.resnet34(num_classes=parser.num_classes, pretrained=PRETRAINED) elif parser.depth == 50: retinanet = model.resnet50(num_classes=parser.num_classes, pretrained=PRETRAINED) elif parser.depth == 101250: retinanet = model.resnet101with50weight(num_classes=parser.num_classes, pretrained=PRETRAINED) elif parser.depth == 101: retinanet = model.resnet101(num_classes=parser.num_classes, pretrained=PRETRAINED) elif parser.depth == 152: retinanet = model.resnet152(num_classes=parser.num_classes, pretrained=PRETRAINED) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True if parser.lr_choice == 'lr_map': lr_now = lr_change_map(1, 0, parser.lr_map) elif parser.lr_choice == 'lr_fn': lr_now = float(parser.lr_fn['LR_START']) elif parser.lr_choice == 'lr_scheduler': lr_now = parser.lr # optimizer = optim.Adam(retinanet.parameters(), lr=lr_now) optimizer = optim.AdamW(retinanet.parameters(), lr=lr_now) # optimizer = optim.SGD(retinanet.parameters(), lr=lr_now, momentum=0.9, weight_decay=5e-4) # optimizer = optim.SGD(retinanet.parameters(), lr=lr_now) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=PATIENCE, factor=FACTOR, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() iteration_loss_path = 'iteration_loss.csv' if os.path.isfile(iteration_loss_path): os.remove(iteration_loss_path) epoch_loss_path = 'epoch_loss.csv' if os.path.isfile(epoch_loss_path): os.remove(epoch_loss_path) eval_train_path = 'eval_train_result.csv' if os.path.isfile(eval_train_path): os.remove(eval_train_path) eval_val_path = 'eval_val_result.csv' if os.path.isfile(eval_val_path): os.remove(eval_val_path) USE_KAGGLE = True if os.environ.get('KAGGLE_KERNEL_RUN_TYPE', False) else False if USE_KAGGLE: iteration_loss_path = '/kaggle/working/' + iteration_loss_path epoch_loss_path = '/kaggle/working/' + epoch_loss_path eval_val_path = '/kaggle/working/' + eval_val_path eval_train_path = '/kaggle/working/' + eval_train_path with open(epoch_loss_path, 'a+') as epoch_loss_file, \ open(iteration_loss_path, 'a+') as iteration_loss_file, \ open(eval_train_path, 'a+') as eval_train_file, \ open(eval_val_path, 'a+') as eval_val_file: epoch_loss_file.write('epoch_num,mean_epoch_loss\n') iteration_loss_file.write('epoch_num,iteration,classification_loss,regression_loss,iteration_loss\n') eval_train_file.write('epoch_num,map50\n') eval_val_file.write('epoch_num,map50\n') for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): optimizer.zero_grad() classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) iteration_loss = np.mean(loss_hist) print('\rEpoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num+1, iter_num+1, float(classification_loss), float(regression_loss), iteration_loss), end=' ' * 50) iteration_loss_file.write('{},{},{:1.5f},{:1.5f},{:1.5f}\n'.format(epoch_num+1, epoch_num * steps_pre_epoch + (iter_num+1), float(classification_loss), float(regression_loss), iteration_loss)) iteration_loss_file.flush() del classification_loss del regression_loss mean_epoch_loss = np.mean(epoch_loss) epoch_loss_file.write('{},{:1.5f}\n'.format(epoch_num+1, mean_epoch_loss)) epoch_loss_file.flush() if parser.lr_choice == 'lr_map': lr_now = lr_change_map(epoch_num+1, lr_now, parser.lr_map) adjust_learning_rate(optimizer, lr_now) elif parser.lr_choice == 'lr_fn': lr_now = lrfn(epoch_num+1, parser.lr_fn) adjust_learning_rate(optimizer, lr_now) elif parser.lr_choice == 'lr_scheduler': scheduler.step(mean_epoch_loss) # if parser.dataset != 'show': # print('Evaluating dataset_train') # coco_eval.evaluate_coco(dataset_train, retinanet, parser.dataset, parser.do_aug, eval_train_file, epoch_num) print('Evaluating dataset_val') coco_eval.evaluate_coco(dataset_val, retinanet, parser.dataset, parser.do_aug, eval_val_file, epoch_num) return parser
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') # parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--HW2_path', help='Path to HW2 directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'HW2': if parser.HW2_path is None: raise ValueError('Must provide --HW2_path when training on HW2,') dataset_train = HW2Dataset(parser.HW2_path, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) #dataset_val = HW2Dataset(parser.HW2_path, # transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, batch_size=8, num_workers=3, collate_fn=collater) # if dataset_val is not None: # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) # dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) #retinanet.load_state_dict(torch.load('coco_resnet_50_map_0_335_state_dict.pt')) #retinanet_state = retinanet.state_dict() #loaded = torch.load('coco_resnet_50_map_0_335_state_dict.pt') #pretrained = {k:v for k, v in loaded.items() if k in retinanet_state} #retinanet_state.update(pretrained) #retinanet.load_state_dict(retinanet_state) retinanet = torch.load('saved_models_3/HW2_retinanet_0.pt') elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(pre_epoch, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue # if parser.dataset == 'coco': # print('Evaluating dataset') # coco_eval.evaluate_coco(dataset_val, retinanet) # elif parser.dataset == 'csv' and parser.csv_val is not None: # print('Evaluating dataset') # mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, 'saved_models_3/{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) # retinanet.eval() torch.save(retinanet, 'saved_models_3/model_final.pt')
def detect_image(video_path, model_path): # with open(class_list, 'r') as f: # classes = load_classes(csv.reader(f, delimiter=',')) classes = { 0: u'__background__', 1: u'person', 2: u'bicycle', 3: u'car', 4: u'motorcycle', 5: u'airplane', 6: u'bus', 7: u'train', 8: u'truck', 9: u'boat', 10: u'traffic light', 11: u'fire hydrant', 12: u'stop sign', 13: u'parking meter', 14: u'bench', 15: u'bird', 16: u'cat', 17: u'dog', 18: u'horse', 19: u'sheep', 20: u'cow', 21: u'elephant', 22: u'bear', 23: u'zebra', 24: u'giraffe', 25: u'backpack', 26: u'umbrella', 27: u'handbag', 28: u'tie', 29: u'suitcase', 30: u'frisbee', 31: u'skis', 32: u'snowboard', 33: u'sports ball', 34: u'kite', 35: u'baseball bat', 36: u'baseball glove', 37: u'skateboard', 38: u'surfboard', 39: u'tennis racket', 40: u'bottle', 41: u'wine glass', 42: u'cup', 43: u'fork', 44: u'knife', 45: u'spoon', 46: u'bowl', 47: u'banana', 48: u'apple', 49: u'sandwich', 50: u'orange', 51: u'broccoli', 52: u'carrot', 53: u'hot dog', 54: u'pizza', 55: u'donut', 56: u'cake', 57: u'chair', 58: u'couch', 59: u'potted plant', 60: u'bed', 61: u'dining table', 62: u'toilet', 63: u'tv', 64: u'laptop', 65: u'mouse', 66: u'remote', 67: u'keyboard', 68: u'cell phone', 69: u'microwave', 70: u'oven', 71: u'toaster', 72: u'sink', 73: u'refrigerator', 74: u'book', 75: u'clock', 76: u'vase', 77: u'scissors', 78: u'teddy bear', 79: u'hair drier', 80: u'toothbrush' } vidcap = cv2.VideoCapture(video_path) success, image = vidcap.read() count = 0 retinanet = resnet50(num_classes=80, ) retinanet.load_state_dict( torch.load(model_path, map_location=torch.device('cpu'))) model = retinanet labels = {} for key, value in classes.items(): labels[key] = value if torch.cuda.is_available(): model = model.cuda() model.training = False model.eval() rows, cols, cns = image.shape size = (cols, rows) out = cv2.VideoWriter('output3.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, size) while success: success, image = vidcap.read() if (not success) or (image is None): continue image_orig = image.copy() rows, cols, cns = image.shape smallest_side = min(rows, cols) # rescale the image so the smallest side is min_side min_side = 608 max_side = 1024 scale = min_side / smallest_side # check if the largest side is now greater than max_side, which can happen # when images have a large aspect ratio largest_side = max(rows, cols) if largest_side * scale > max_side: scale = max_side / largest_side # resize the image with the computed scale image = cv2.resize(image, (int(round(cols * scale)), int(round( (rows * scale))))) rows, cols, cns = image.shape pad_w = 32 - rows % 32 pad_h = 32 - cols % 32 new_image = np.zeros( (rows + pad_w, cols + pad_h, cns)).astype(np.float32) new_image[:rows, :cols, :] = image.astype(np.float32) image = new_image.astype(np.float32) image /= 255 image -= [0.485, 0.456, 0.406] image /= [0.229, 0.224, 0.225] image = np.expand_dims(image, 0) image = np.transpose(image, (0, 3, 1, 2)) with torch.no_grad(): image = torch.from_numpy(image) if torch.cuda.is_available(): image = image.cuda() st = time.time() print(image.shape, image_orig.shape, scale) scores, classification, transformed_anchors = model( image.cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores.cpu() > 0.5) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0] / scale) y1 = int(bbox[1] / scale) x2 = int(bbox[2] / scale) y2 = int(bbox[3] / scale) label_name = labels[int(classification[idxs[0][j]])] #print(int(classification[idxs[0][j]])) label_name = str(int(classification[idxs[0][j]])) print(bbox, classification.shape) score = scores[j] caption = '{} {:.3f}'.format(label_name, score) # draw_caption(img, (x1, y1, x2, y2), label_name) draw_caption(image_orig, (x1, y1, x2, y2), caption) cv2.rectangle(image_orig, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) out.write(image_orig) out.release()
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--config', help='Config file path that contains scale and ratio values', type=str) parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) parser.add_argument('--init-lr', help='Initial learning rate for training process', type=float, default=1e-3) parser.add_argument('--batch-size', help='Number of input images per step', type=int, default=1) parser.add_argument('--num-workers', help='Number of worker used in dataloader', type=int, default=1) # For resuming training from saved checkpoint parser.add_argument('--resume', help='Whether to resume training from checkpoint', action='store_true') parser.add_argument('--saved-ckpt', help='Resume training from this checkpoint', type=str) parser.add_argument('--multi-gpus', help='Allow to use multi gpus for training task', action='store_true') parser.add_argument('--snapshots', help='Location to save training snapshots', type=str, default="snapshots") parser.add_argument('--log-dir', help='Location to save training logs', type=str, default="logs") parser.add_argument('--expr-augs', help='Allow to use use experiment augmentation methods', action='store_true') parser.add_argument('--aug-methods', help='(Experiment) Augmentation methods to use, separate by comma symbol', type=str, default="rotate,hflip,brightness,contrast") parser.add_argument('--aug-prob', help='Probability of applying (experiment) augmentation in range [0.,1.]', type=float, default=0.5) parser = parser.parse_args(args) train_transforms = [Normalizer(), Resizer(), Augmenter()] # Define transform methods if parser.expr_augs: aug_map = get_aug_map(p=parser.aug_prob) aug_methods = parser.aug_methods.split(",") for aug in aug_methods: if aug in aug_map.keys(): train_transforms.append(aug_map[aug]) else: print(f"{aug} is not available.") # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose(train_transforms)) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose(train_transforms)) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler_val) config = dict({"scales": None, "ratios": None}) if parser.config: config = load_config(parser.config, config) if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') optimizer = optim.Adam(retinanet.parameters(), lr=parser.init_lr) if parser.resume: if not parser.saved_ckpt: print("No saved checkpoint provided for resuming training. Exiting now...") return if not os.path.exists(parser.saved_ckpt): print("Invalid saved checkpoint path. Exiting now...") return # Restore last state retinanet, optimizer, start_epoch = load_ckpt(parser.saved_ckpt, retinanet, optimizer) if parser.epochs <= start_epoch: print("Number of epochs must be higher than number of trained epochs of saved checkpoint.") return use_gpu = True if use_gpu: print("Using GPU for training process") if torch.cuda.is_available(): if parser.multi_gpus: print("Using multi-gpus for training process") retinanet = torch.nn.DataParallel(retinanet.cuda(), device_ids=[0,1]) else: retinanet = torch.nn.DataParallel(retinanet.cuda()) else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # Tensorboard writer writer = SummaryWriter(parser.log_dir) # Save snapshots dir if not os.path.exists(parser.snapshots): os.makedirs(parser.snapshots) best_mAP = 0 start_epoch = 0 if not parser.resume else start_epoch for epoch_num in range(start_epoch, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] epoch_csf_loss = [] epoch_reg_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): with torch.cuda.device(0): classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss epoch_csf_loss.append(float(classification_loss)) epoch_reg_loss.append(float(regression_loss)) if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( '\rEpoch: {}/{} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( (epoch_num + 1), parser.epochs, (iter_num + 1), len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist)), end='') del classification_loss del regression_loss except Exception as e: print(e) continue # writer.add_scalar("Loss/train", loss, epoch_num) _epoch_loss = np.mean(epoch_loss) _epoch_csf_loss = np.mean(epoch_reg_loss) _epoch_reg_loss = np.mean(epoch_reg_loss) if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) scheduler.step(_epoch_loss) elif parser.dataset == 'csv' and parser.csv_val is not None: print('\nEvaluating dataset') APs = csv_eval.evaluate(dataset_val, retinanet) mAP = round(mean(APs[ap][0] for ap in APs.keys()), 5) print("mAP: %f" %mAP) writer.add_scalar("validate/mAP", mAP, epoch_num) # Handle lr_scheduler wuth mAP value scheduler.step(mAP) lr = get_lr(optimizer) writer.add_scalar("train/classification-loss", _epoch_csf_loss, epoch_num) writer.add_scalar("train/regression-loss", _epoch_reg_loss, epoch_num) writer.add_scalar("train/loss", _epoch_loss, epoch_num) writer.add_scalar("train/learning-rate", lr, epoch_num) # Save model file, optimizer and epoch number checkpoint = { 'epoch': epoch_num, 'state_dict': retinanet.state_dict(), 'optimizer': optimizer.state_dict(), } # torch.save(retinanet.module, os.path.join(parser.snapshots, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))) # Check whether this epoch's model achieves highest mAP value is_best = False if best_mAP < mAP: best_mAP = mAP is_best = True save_ckpt(checkpoint, is_best, parser.snapshots, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1)) print('\n') retinanet.eval() torch.save(retinanet, 'model_final.pt') writer.flush()
def main(args=None): parser = argparse.ArgumentParser( description= 'Simple paps training script for training a RetinaNet network.') parser.add_argument('--batch_size', help='Number of batchs', type=int, default=0) parser.add_argument('--test_data', help='test data file', default='data/test.npy') parser.add_argument('--model_dir', help='pretrained model dir', default='trained_models/resnet50_640/model.pt') parser.add_argument('--threshold', help='pretrained model dir', type=float, default=0.1) parser = parser.parse_args(args) GPU_NUM = 0 # 원하는 GPU 번호 입력 device = torch.device( f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu') torch.cuda.set_device(device) # change allocation of current GPU print('device', device) retinanet = model.resnet50(num_classes=2, device=device) retinanet = torch.nn.DataParallel(retinanet, device_ids=[GPU_NUM], output_device=GPU_NUM).to(device) retinanet.load_state_dict(torch.load(parser.model_dir)) # retinanet.to(device) dataset_val = PapsDataset('data/', set_name='val_2class', transform=val_transforms) val_data_loader = DataLoader(dataset_val, batch_size=1, shuffle=False, num_workers=4, collate_fn=collate_fn) retinanet.eval() start_time = time.time() threshold = parser.threshold results = [] GT_results = [] image_ids = [] cnt = 0 for index, data in enumerate(tqdm(val_data_loader)): if cnt > 100: break cnt += 1 with torch.no_grad(): images, tbox, tlabel, targets = data batch_size = len(images) # print(tbox) # print(len(tbox[0])) c, h, w = images[0].shape images = torch.cat(images).view(-1, c, h, w).to(device) outputs = retinanet(images) scores, labels, boxes = (outputs) scores = scores.cpu() labels = labels.cpu() boxes = boxes.cpu() if boxes.shape[0] > 0: # change to (x, y, w, h) (MS COCO standard) boxes[:, 2] -= boxes[:, 0] boxes[:, 3] -= boxes[:, 1] # print(boxes) # compute predicted labels and scores #for box, score, label in zip(boxes[0], scores[0], labels[0]): for box_id in range(boxes.shape[0]): score = float(scores[box_id]) label = int(labels[box_id]) box = boxes[box_id, :] # scores are sorted, so we can break if score < threshold: break # append detection for each positively labeled class image_result = { 'image_id': dataset_val.image_ids[index], 'category_id': dataset_val.label_to_coco_label(label), 'score': float(score), 'bbox': box.tolist(), } # append detection to results results.append(image_result) if len(tbox[0]) > 0: # compute predicted labels and scores #for box, score, label in zip(boxes[0], scores[0], labels[0]): for box_id in range(len(tbox[0])): score = float(0.99) label = (tlabel[0][box_id]) box = list(tbox[0][box_id]) box[2] -= box[0] box[3] -= box[1] # append detection for each positively labeled class image_result = { 'image_id': dataset_val.image_ids[index], 'category_id': dataset_val.label_to_coco_label(label), 'score': float(score), 'bbox': list(box), } # append detection to results GT_results.append(image_result) # append image to list of processed images image_ids.append(dataset_val.image_ids[index]) # print progress print('{}/{}'.format(index, len(dataset_val)), end='\r') if not len(results): print('No object detected') print('GT_results', len(GT_results)) print('pred_results', len(results)) # write output json.dump(results, open( 'trained_models/eval/{}_bbox_results.json'.format( dataset_val.set_name), 'w'), indent=4) # write GT json.dump(GT_results, open( 'trained_models/eval/{}_GTbbox_results.json'.format( dataset_val.set_name), 'w'), indent=4) print('validation time :', time.time() - start_time) # load results in COCO evaluation tool coco_true = dataset_val.coco coco_pred = coco_true.loadRes( 'trained_models/eval/{}_bbox_results.json'.format( dataset_val.set_name)) coco_gt = coco_true.loadRes( 'trained_models/eval/{}_GTbbox_results.json'.format( dataset_val.set_name)) # run COCO evaluation # coco_eval = COCOeval(coco_true, coco_pred, 'bbox') coco_eval = COCOeval(coco_gt, coco_pred, 'bbox') coco_eval.params.imgIds = image_ids # coco_eval.params.catIds = [0] coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='coco') parser.add_argument('--coco_path', help='Path to COCO directory', default='cocodataset') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model_path', help='Path to model (.pt) file.', type=str, default='coco_resnet_50_map_0_335_state_dict.pt') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model_path) # Create the model retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.load_state_dict(torch.load(parser.model_path)) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet( data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores.cpu() > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name) cv2.imshow('img', img) cv2.waitKey(0)
import torch from retinanet import model from icecream import ic retinanet = model.resnet50(num_classes=81, pretrained=False).cuda() retinanet.eval() x = torch.rand([10, 3, 128, 128]).cuda() retinanet(x)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--model', help='Path to model (.pt) file.') parser.add_argument('--finetune', help='if load trained retina model', type=bool, default=False) parser.add_argument('--gpu', help='', type=bool, default=False) parser.add_argument('--batch_size', help='', type=int, default=2) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') #sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) sampler = AspectRatioBasedSampler(dataset_train, parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model ''' if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') ''' use_gpu = parser.gpu #import pdb #pdb.set_trace() #读coco预训练模型 retinanet = model.resnet50(num_classes=80, pretrained=True) retinanet.load_state_dict(torch.load(parser.model)) for param in retinanet.parameters(): param.requires_grad = False retinanet.regressionModel = model.RegressionModel(256) retinanet.classificationModel = model.ClassificationModel( 256, num_classes=dataset_train.num_classes()) prior = 0.01 retinanet.classificationModel.output.weight.data.fill_(0) retinanet.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) retinanet.regressionModel.output.weight.data.fill_(0) retinanet.regressionModel.output.bias.data.fill_(0) # for m in retinanet.classificationModel.modules(): # if isinstance(m, nn.Conv2d): # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels # m.weight.data.normal_(0, math.sqrt(2. / n)) # elif isinstance(m, nn.BatchNorm2d): # m.weight.data.fill_(1) # m.bias.data.zero_() # for m in retinanet.regressionModel.modules(): # if isinstance(m, nn.Conv2d): # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels # m.weight.data.normal_(0, math.sqrt(2. / n)) # elif isinstance(m, nn.BatchNorm2d): # m.weight.data.fill_(1) # m.bias.data.zero_() if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if use_gpu and torch.cuda.is_available(): #retinanet.load_state_dict(torch.load(parser.model)) retinanet = torch.nn.DataParallel(retinanet).cuda() else: #retinanet.load_state_dict(torch.load(parser.model)) retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam( [{ 'params': retinanet.module.regressionModel.parameters() }, { 'params': retinanet.module.classificationModel.parameters() }], 1e-6) #optimizer = optim.Adam(retinanet.parameters(), lr=1e-6) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: #import pdb #pdb.set_trace() optimizer.zero_grad() if use_gpu and torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot'].cuda()]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if epoch_num % 5 == 0: torch.save( retinanet.module, '{}_freezinetune_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--exp_name', help='Path to folder for saving the model and log', type=str) parser.add_argument('--output_folder', help='Path to folder for saving all the experiments', type=str) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) # 100 parser.add_argument('--batch_size', help='Batch size', type=int, default=2) parser.add_argument('--lr', help='Number of epochs', type=float, default=1e-5) parser.add_argument('--caption', help='Any thing in particular about the experiment', type=str) parser.add_argument('--server', help='seerver name', type=str, default='ultron') parser.add_argument('--detector', help='detection algo', type=str, default='RetinaNet') parser.add_argument('--arch', help='model architecture', type=str) parser.add_argument('--pretrain', default=False, action='store_true') parser.add_argument('--freeze_batchnorm', default=False, action='store_true') parser = parser.parse_args(args) output_folder_path = os.path.join(parser.output_folder, parser.exp_name) if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) PARAMS = { 'dataset': parser.dataset, 'exp_name': parser.exp_name, 'depth': parser.depth, 'epochs': parser.epochs, 'batch_size': parser.batch_size, 'lr': parser.lr, 'caption': parser.caption, 'server': parser.server, 'arch': parser.arch, 'pretrain': parser.pretrain, 'freeze_batchorm': parser.freeze_batchnorm } exp = neptune.create_experiment( name=parser.exp_name, params=PARAMS, tags=[parser.arch, parser.detector, parser.dataset, parser.server]) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18 and parser.arch == 'Resnet': retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 10 and parser.arch == 'Resnet': retinanet = model.resnet10(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 18 and parser.arch == 'BiRealNet18': checkpoint_path = None if parser.pretrain: checkpoint_path = '/media/Rozhok/Bi-Real-net/pytorch_implementation/BiReal18_34/models/imagenet_baseline/checkpoint.pth.tar' retinanet = birealnet18(checkpoint_path, num_classes=dataset_train.num_classes()) elif parser.depth == 34 and parser.arch == 'Resnet': retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 50 and parser.arch == 'Resnet': retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 101 and parser.arch == 'Resnet': retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.arch == 'ofa': print("Model is ResNet50D.") bn_momentum = 0.1 bn_eps = 1e-5 retinanet = ResNet50D( n_classes=dataset_train.num_classes(), bn_param=(bn_momentum, bn_eps), dropout_rate=0, width_mult=1.0, depth_param=3, expand_ratio=0.35, ) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') print(retinanet) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() if parser.freeze_batchnorm: retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): exp.log_metric('Current lr', float(optimizer.param_groups[0]['lr'])) exp.log_metric('Current epoch', int(epoch_num)) retinanet.train() if parser.freeze_batchnorm: retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) exp.log_metric('Training: Classification loss', float(classification_loss)) exp.log_metric('Training: Regression loss', float(regression_loss)) exp.log_metric('Training: Totalloss', float(loss)) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, output_folder_path, exp=exp) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, os.path.join( output_folder_path, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))) retinanet.eval() torch.save(retinanet, os.path.join(output_folder_path, 'model_final.pt'))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--model_path', help='Path to model', type=str) parser = parser.parse_args(args) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) # Create the model retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet) retinanet.training = False retinanet.eval() retinanet.module.freeze_bn() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataset_val): with torch.no_grad(): st = time.time() # run network if torch.cuda.is_available(): scores, labels, boxes = retinanet(data['img'].permute( 2, 0, 1).cuda().float().unsqueeze(dim=0)) else: scores, labels, boxes = retinanet(data['img'].permute( 2, 0, 1).float().unsqueeze(dim=0)) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores.cpu() > 0.5) tensor = data['img'] * np.array( [[[0.229, 0.224, 0.225]]]) + np.array([[[0.485, 0.456, 0.406]] ]) img = tensor.mul(255).clamp(0, 255).byte().cpu().numpy() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = boxes[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(labels[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name) cv2.imshow('img', img) cv2.waitKey(0)
# -*- coding: utf-8 -*- """ Created on Fri Aug 21 12:30:54 2020 @author: worklab """ from retinanet.model import resnet50 import time import torch device = torch.device("cuda:0") detector = resnet50(13,pretrained = True) detector = detector.to(device) detector.eval() detector.freeze_bn() transfer_times = [] detect_times = [] batch_sizes = [1,2,3,5,7,10,12,16,20,24,30,40,50,60,75,90,100] for b in batch_sizes: transfer_time = 0 detect_time = 0 for i in range(0,1000): data = torch.randn([b,3,960,540]) #data = torch.randn([b,3,2000,1000]) start = time.time()
def main(args=None): parser = argparse.ArgumentParser( description= 'RegiGraph Pytorch Implementation Training Script. - Ahmed Nassar (ETHZ, IRISA).' ) parser.add_argument("--batch_size", type=int, default=4, help="The number of images per batch") parser.add_argument("--lr", type=float, default=1e-4) parser.add_argument( '--dataset_root', default='../datasets', help= 'Dataset root directory path [../datasets/VOC, ../datasets/mapillary]') parser.add_argument('--dataset', default='Pasadena', choices=['Pasadena', 'Pasadena_Aerial', 'mapillary'], type=str, help='Pasadena, Pasadena_Aerial or mapillary') parser.add_argument("--overfit", type=int, default="0") parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument("--num_epochs", type=int, default=100) parser.add_argument("--log_path", type=str, default="tensorboard/") parser.add_argument("--saved_path", type=str, default="trained_models") parser.add_argument("--test_interval", type=int, default=1, help="Number of epoches between testing phases") parser.add_argument( "--es_min_delta", type=float, default=0.0, help= "Early stopping's parameter: minimum change loss to qualify as an improvement" ) parser.add_argument( "--es_patience", type=int, default=0, help= "Early stopping's parameter: number of epochs with no improvement after which training will be stopped. Set to 0 to disable this technique." ) parser.add_argument("--cluster", type=int, default=0) opt = parser.parse_args(args) if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) if (opt.dataset == 'Pasadena' or opt.dataset == 'mapillary' or opt.dataset == 'Pasadena_Aerial'): train_dataset = VOCDetection(root=opt.dataset_root, overfit=opt.overfit, image_sets="trainval", transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ]), dataset_name=opt.dataset) valid_dataset = VOCDetection(root=opt.dataset_root, overfit=opt.overfit, image_sets="val", transform=transforms.Compose( [Normalizer(), Resizer()]), dataset_name=opt.dataset) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # sampler = AspectRatioBasedSampler(train_dataset, batch_size=2, drop_last=False) training_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": True, "collate_fn": collater, "num_workers": 4 } training_generator = DataLoader(train_dataset, **training_params) if valid_dataset is not None: test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 4 } # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) test_generator = DataLoader(valid_dataset, **test_params) # Create the model if opt.depth == 18: retinanet = model.resnet18(num_classes=train_dataset.num_classes(), pretrained=True) elif opt.depth == 34: retinanet = model.resnet34(num_classes=train_dataset.num_classes(), pretrained=True) elif opt.depth == 50: retinanet = model.resnet50(num_classes=train_dataset.num_classes(), pretrained=True) elif opt.depth == 101: retinanet = model.resnet101(num_classes=train_dataset.num_classes(), pretrained=True) elif opt.depth == 152: retinanet = model.resnet152(num_classes=train_dataset.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) retinanet.training = True writer = SummaryWriter(opt.log_path + "regigraph_bs_" + str(opt.batch_size) + "_dataset_" + opt.dataset + "_backbone_" + str(opt.depth)) optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) best_loss = 1e5 best_epoch = 0 retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(train_dataset))) num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss, graph_loss = retinanet( [ data['img'].cuda().float(), data['annot'], data['geo'], data['batch_map'] ]) else: classification_loss, regression_loss, graph_loss = retinanet( [ data['img'].float(), data['annot'], data['geo'], data['batch_map'] ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() graph_loss = graph_loss.mean() loss = classification_loss + regression_loss + graph_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) if opt.cluster == 0: progress_bar.set_description( 'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Graph loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}' .format(epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, classification_loss, regression_loss, graph_loss, float(loss), total_loss)) writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', regression_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classfication_loss (focal loss)', classification_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Graph_loss', graph_loss, epoch * num_iter_per_epoch + iter) del classification_loss del regression_loss del graph_loss except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: retinanet.eval() loss_regression_ls = [] loss_classification_ls = [] loss_graph_ls = [] for iter, data in enumerate(test_generator): with torch.no_grad(): if torch.cuda.is_available(): classification_loss, regression_loss, graph_loss = retinanet( [ data['img'].cuda().float(), data['annot'], data['geo'], data['batch_map'] ]) else: classification_loss, regression_loss, graph_loss = retinanet( [ data['img'].float(), data['annot'], data['geo'], data['batch_map'] ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() graph_loss = graph_loss.mean() loss_classification_ls.append(float(classification_loss)) loss_regression_ls.append(float(regression_loss)) loss_graph_ls.append(float(graph_loss)) # print(len(loss_classification_ls),len(loss_regression_ls),len(loss_graph_ls)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) gph_loss = np.mean(loss_graph_ls) loss = cls_loss + reg_loss + gph_loss print( '- Val Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. * Graph loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss, gph_loss, np.mean(loss))) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Graph_loss (graph loss)', gph_loss, epoch) writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch # mAP = csv_eval.evaluate(valid_dataset, retinanet) # print(mAP) torch.save( retinanet.module, os.path.join( opt.saved_path, "regigraph_bs_" + str(opt.batch_size) + "_dataset_" + opt.dataset + "_epoch_" + str(epoch + 1) + "_backbone_" + str(opt.depth) + ".pth")) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break writer.close()
def initialize(self, opt): BaseModel.initialize(self, opt) # self.opt = opt self.isTrain = opt.isTrain # define tensors self.input_A = self.Tensor(opt.batchSize, opt.input_nc, opt.fineSize, opt.fineSize) self.input_B = self.Tensor(opt.batchSize, opt.output_nc, opt.fineSize, opt.fineSize) transform_list = [ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] self.transform = transforms.Compose(transform_list) self.det = model.resnet50(num_classes=2, pretrained=True).cuda() # load/define networks self.netG = networks.define_G(opt.input_nc, opt.output_nc, opt.ngf, opt.which_model_netG, opt.norm, not opt.no_dropout, self.gpu_ids) #self.netG = networks.define_G(3, 3,opt.ngf, "PATN", "instance", not True, "normal", #0,n_downsampling=2) if self.isTrain: use_sigmoid = opt.no_lsgan self.netD_image = networks.define_image_D( opt.input_nc + opt.output_nc, opt.ndf, opt.which_model_netD, opt.n_layers_D, opt.norm, use_sigmoid, self.gpu_ids) use_sigmoid = not opt.no_lsgan self.det.training = True self.det.train() self.netD_person = networks.define_person_D_AC( opt.input_nc, opt.ndf, opt, use_sigmoid, self.gpu_ids) if not self.isTrain or opt.continue_train: #print(opt.which_epoch) self.load_network(self.netG, 'G', opt.which_epoch) if self.isTrain: self.load_network(self.netD_image, 'D_image', opt.which_epoch) self.load_network(self.netD_person, 'D_person', opt.which_epoch) if self.isTrain: self.fake_AB_pool = ImagePool(opt.pool_size) self.old_lr = opt.lr # define loss functions #print('haha'+ str(opt.no_lsgan)) # self.criterionGAN = networks.GANLoss(use_lsgan=not opt.no_lsgan, tensor=self.Tensor) self.criterionGAN_image = networks.GANLoss( use_lsgan=not opt.no_lsgan, tensor=self.Tensor) self.criterionGAN_person = networks.GANLoss(use_lsgan=opt.no_lsgan, tensor=self.Tensor) self.criterionL1 = torch.nn.L1Loss() # initialize optimizers self.optimizer_G = torch.optim.Adam(self.netG.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizer_D_image = torch.optim.Adam( self.netD_image.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizer_D_person = torch.optim.Adam( self.netD_person.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) self.optimizer_det = torch.optim.Adam(self.det.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) print('---------- Networks initialized -------------') networks.print_network(self.netG) if self.isTrain: networks.print_network(self.netD_image) networks.print_network(self.netD_person) print('-----------------------------------------------')
transform=transforms.Compose([Normalizer(), Resizer()])) # In[ ]: sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model retinanet = model.resnet50(num_classes=dataset_train.num_classes()) use_gpu = True # Initialising the checkpoint num_classes = 8 PATH_TO_WEIGHTS = "../pretrained_weights.pt" retinanet = retinanet_model.resnet50(80) checkpoint = torch.load(PATH_TO_WEIGHTS) retinanet.load_state_dict(checkpoint) retinanet.classificationModel.fc = nn.Linear(720, num_classes) if use_gpu: retinanet = retinanet.cuda() print("Model retinanet : ",retinanet)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='coco') parser.add_argument( '--coco_path', help='Path to COCO directory', default= '/media/zhuzhu/ec114170-f406-444f-bee7-a3dc0a86cfa2/dataset/coco') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--use-gpu', help='training on cpu or gpu', action='store_false', default=True) parser.add_argument('--device-ids', help='GPU device ids', default=[0]) args = parser.parse_args() # ------------------------------ Create the data loaders ----------------------------- if args.dataset == 'coco': if args.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(args.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(args.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) sampler_train = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler_train) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if args.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=False) elif args.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if args.use_gpu: retinanet = nn.DataParallel(retinanet, device_ids=args.device_ids).cuda() # retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(args.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) # 梯度的最大范数为0.1 optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if args.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(args.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt')