def __init__(self, num_anchors=9, num_classes=80, compound_coef=0, load_weights=False, **kwargs): super(EfficientDetBackbone, self).__init__() self.compound_coef = compound_coef self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384] self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8] self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5] self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.] self.aspect_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] self.num_scales = 3 conv_channel_coef = { # the channels of P3/P4/P5. 0: [40, 112, 320], 1: [40, 112, 320], 2: [48, 120, 352], 3: [48, 136, 384], 4: [56, 160, 448], 5: [64, 176, 512], 6: [72, 200, 576], } num_anchors = len(self.aspect_ratios) * self.num_scales self.bifpn = nn.Sequential(*[ BiFPN(self.fpn_num_filters[self.compound_coef], conv_channel_coef[compound_coef], True if _ == 0 else False, attention=True if compound_coef < 6 else False) for _ in range(self.fpn_cell_repeats[compound_coef]) ]) self.num_classes = num_classes self.regressor = Regressor( in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors, num_layers=self.box_class_repeats[self.compound_coef]) self.classifier = Classifier( in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors, num_classes=num_classes, num_layers=self.box_class_repeats[self.compound_coef]) self.anchors = Anchors(anchor_scale=self.anchor_scale[compound_coef], **kwargs) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() self.backbone_net = EfficientNet(compound_coef, load_weights)
def __init__(self, num_classes=80, compound_coef=0, load_weights=False, onnx_export=False, **kwargs): super(EfficientDetBackbone, self).__init__() self.compound_coef = compound_coef self.backbone_compound_coef = [0, 1, 2, 3, 4, 5, 6, 6] self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384] self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8] self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5] self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5.] self.aspect_ratios = kwargs.get('ratios', [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]) self.num_scales = len( kwargs.get('scales', [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)])) conv_channel_coef = { # the channels of P3/P4/P5. 0: [40, 112, 320], 1: [40, 112, 320], 2: [48, 120, 352], 3: [48, 136, 384], 4: [56, 160, 448], 5: [64, 176, 512], 6: [72, 200, 576], 7: [72, 200, 576], } num_anchors = len(self.aspect_ratios) * self.num_scales self.bifpn = nn.Sequential(*[ BiFPN(self.fpn_num_filters[self.compound_coef], conv_channel_coef[compound_coef], True if _ == 0 else False, onnx_export=onnx_export, attention=True if compound_coef < 6 else False) for _ in range(self.fpn_cell_repeats[compound_coef]) ]) self.num_classes = num_classes self.regressor = Regressor( in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors, num_layers=self.box_class_repeats[self.compound_coef], onnx_export=onnx_export) self.classifier = Classifier( in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors, num_classes=num_classes, num_layers=self.box_class_repeats[self.compound_coef], onnx_export=onnx_export) self.anchors = Anchors(anchor_scale=self.anchor_scale[compound_coef], **kwargs) self.backbone_net = EfficientNet( self.backbone_compound_coef[compound_coef], load_weights)
def reload_cls_reg(self): num_anchors = len(self.aspect_ratios) * self.num_scales self.regressor = Regressor( in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors, num_layers=self.box_class_repeats[self.compound_coef]) self.classifier = Classifier( in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors, num_classes=self.num_classes, num_layers=self.box_class_repeats[self.compound_coef])
def __init__(self, in_channels, num_anchors, num_layers, num_inst_classes, num_obj_classes): super(Instance_Branch, self).__init__() self.num_layers = num_layers self.num_anchors = num_anchors self.in_channels = in_channels self.num_inst_classes = num_inst_classes self.num_obj_classes = num_obj_classes self.action_classifier = Classifier(in_channels=self.in_channels, num_anchors=self.num_anchors, num_classes=self.num_inst_classes, num_layers=self.num_layers) self.object_classifier = Classifier(in_channels=self.in_channels, num_anchors=self.num_anchors, num_classes=self.num_obj_classes, num_layers=self.num_layers) self.object_regressor = Regressor(in_channels=self.in_channels, num_anchors=self.num_anchors, num_layers=self.num_layers)
def __init__(self, in_channels, num_anchors, num_layers, num_union_classes, num_obj_classes): super(Union_Branch, self).__init__() self.num_layers = num_layers self.num_anchors = num_anchors self.in_channels = in_channels self.num_union_classes = num_union_classes self.num_obj_classes = num_obj_classes self.action_classifier = Classifier(in_channels=self.in_channels, num_anchors=self.num_anchors, num_classes=self.num_union_classes, num_layers=self.num_layers) self.union_sub_regressor = Regressor(in_channels=self.in_channels, num_anchors=self.num_anchors, num_layers=self.num_layers) self.union_obj_regressor = Regressor(in_channels=self.in_channels, num_anchors=self.num_anchors, num_layers=self.num_layers)
def __init__(self, num_classes=80, compound_coef=0, load_weights=False, **kwargs): super(EfficientDetBackbone, self).__init__() self.compound_coef = compound_coef self.backbone_compound_coef = [0, 1, 2, 3, 4, 5, 6, 6, 7] self.fpn_num_filters = [64, 88, 112, 160, 224, 288, 384, 384, 384] self.fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8, 8] self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] self.box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5, 5] self.pyramid_levels = [5, 5, 5, 5, 5, 5, 5, 5, 6] # self.anchor_scale不同Stride级别的金字塔尺度,都是两倍两倍缩放 self.anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5., 4.] self.aspect_ratios = kwargs.get('ratios', [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]) # 同一个金字塔尺度上再分3个小尺度3个小比例 self.num_scales = len( kwargs.get('scales', [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)])) conv_channel_coef = { # the channels of P3/P4/P5. 0: [40, 112, 320], 1: [40, 112, 320], 2: [48, 120, 352], 3: [48, 136, 384], 4: [56, 160, 448], 5: [64, 176, 512], 6: [72, 200, 576], 7: [72, 200, 576], 8: [80, 224, 640], } num_anchors = len(self.aspect_ratios) * self.num_scales # print(len(self.aspect_ratios)) # print(self.num_scales) # exit() self.bifpn = nn.Sequential(*[ BiFPN(self.fpn_num_filters[self.compound_coef], conv_channel_coef[compound_coef], True if _ == 0 else False, attention=True if compound_coef < 6 else False, use_p8=compound_coef > 7) for _ in range(self.fpn_cell_repeats[compound_coef]) ]) self.num_classes = num_classes self.regressor = Regressor( in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors, num_layers=self.box_class_repeats[self.compound_coef], pyramid_levels=self.pyramid_levels[self.compound_coef]) self.classifier = Classifier( in_channels=self.fpn_num_filters[self.compound_coef], num_anchors=num_anchors, num_classes=num_classes, num_layers=self.box_class_repeats[self.compound_coef], pyramid_levels=self.pyramid_levels[self.compound_coef]) self.anchors = Anchors( anchor_scale=self.anchor_scale[compound_coef], pyramid_levels=( torch.arange(self.pyramid_levels[self.compound_coef]) + 3).tolist(), **kwargs) self.backbone_net = EfficientNet( self.backbone_compound_coef[compound_coef], load_weights)
def train(opt): params = Params(f'projects/{opt.project}.yml') params.num_gpus = 4 # opt.log_path = 'C:/Users/giang/Desktop/result_temp/' if params.num_gpus == 0: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' if torch.cuda.is_available(): torch.cuda.manual_seed(42) else: torch.manual_seed(42) opt.saved_path = opt.saved_path + f'/{params.project_name}/' opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/' os.makedirs(opt.log_path, exist_ok=True) os.makedirs(opt.saved_path, exist_ok=True) training_params = { 'batch_size': opt.batch_size, 'shuffle': True, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } val_params = { 'batch_size': opt.batch_size * 4, 'shuffle': False, 'drop_last': True, 'collate_fn': collater, 'num_workers': opt.num_workers } input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] # root_train = 'D:/Etri_tracking_data/Etri_full/train_1024/' # side_train = 'D:/Etri_tracking_data/Etri_full/train_Sejin_1024/' # ground_truth_train = 'D:/Etri_tracking_data/Etri_full/train_1024.txt' root_train = '/home/../../data3/giangData/train_1024/' side_train = '/home/../../data3/giangData/train_Sejin_1024/' ground_truth_train = '/home/../../data3/giangData/train_1024.txt' training_set = TobyCustom(root_dir=root_train, side_dir = side_train, \ annot_path = ground_truth_train, \ transform=ComposeAlb([Flip_X(), \ Flip_Y(), \ Equalize(), \ Brightness(), \ Constrast(), \ Resizer(input_sizes[opt.compound_coef], num_channels=3), \ Normalizer(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])) training_generator = DataLoader(training_set, **training_params) # root_val = 'D:/Etri_tracking_data/Etri_full/val_1024/' # side_val = 'D:/Etri_tracking_data/Etri_full/val_Sejin_1024/' # ground_truth_val = 'D:/Etri_tracking_data/Etri_full/val_1024.txt' root_val = '/home/../../data3/giangData/val_1024/' side_val = '/home/../../data3/giangData/val_Sejin_1024/' ground_truth_val = '/home/../../data3/giangData/val_1024.txt' val_set = TobyCustom(root_dir=root_val, side_dir = side_val, \ annot_path = ground_truth_val, \ transform=ComposeAlb([Resizer(input_sizes[opt.compound_coef], num_channels=3), Normalizer(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])])) val_generator = DataLoader(val_set, **val_params) model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=opt.compound_coef, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) from efficientdet.model import Classifier # model.backbone_net.model._conv_stem.conv = nn.Conv2d(4, 48, kernel_size=(3, 3), stride=(2, 2), bias=False) # model.classifier.header.pointwise_conv.conv = nn.Conv2d(224, 9, kernel_size=(1, 1), stride=(1, 1)) model.classifier = Classifier( in_channels=model.fpn_num_filters[opt.compound_coef], num_anchors=model.num_anchors, num_classes=1, num_layers=model.box_class_repeats[opt.compound_coef], pyramid_levels=model.pyramid_levels[opt.compound_coef]) # opt.load_weights = 'C:/Users/giang/Desktop/efficientdet-d4_107_15228_6.1788892433756875.pth' opt.load_weights = './../result_3channel_21/save/coco/efficientdet-d4_21_3000.pth' # block' # for EfficientNetB5, please test again with B4 # load last weights if opt.load_weights is not None: if opt.load_weights.endswith('.pth'): weights_path = opt.load_weights else: weights_path = get_last_weights(opt.saved_path) try: last_step = int( os.path.basename(weights_path).split('_')[-1].split('.')[0]) except: last_step = 0 try: ret = model.load_state_dict(torch.load(weights_path), strict=False) except RuntimeError as e: print(f'[Warning] Ignoring {e}') print( '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.' ) print( f'[Info] loaded weights: {os.path.basename(weights_path)}, resuming checkpoint from step: {last_step}' ) else: last_step = 0 print('[Info] initializing weights...') init_weights(model) ''' ============================================ Modify model ''' # from efficientdet.model import Classifier # model.backbone_net.model._conv_stem.conv = nn.Conv2d(4, 48, kernel_size=(3, 3), stride=(2, 2), bias=False) # model.classifier.header.pointwise_conv.conv = nn.Conv2d(224, 9, kernel_size=(1, 1), stride=(1, 1)) # model.classifier = Classifier(in_channels=model.fpn_num_filters[opt.compound_coef], num_anchors=model.num_anchors, # num_classes=1, # num_layers=model.box_class_repeats[opt.compound_coef], # pyramid_levels=model.pyramid_levels[opt.compound_coef]) ''' ============================================= ''' # freeze backbone if train head_only if opt.head_only: def freeze_backbone(m): classname = m.__class__.__name__ for ntl in ['EfficientNet', 'BiFPN']: if ntl in classname: for param in m.parameters(): param.requires_grad = False model.apply(freeze_backbone) print('[Info] freezed backbone') # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4 # useful when gpu memory is limited. # because when bn is disable, the training will be very unstable or slow to converge, # apply sync_bn can solve it, # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus. # but it would also slow down the training by a little bit. if params.num_gpus > 1 and opt.batch_size // params.num_gpus < 4: model.apply(replace_w_sync_bn) use_sync_bn = True else: use_sync_bn = False writer = SummaryWriter( opt.log_path + f'/{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}/') # warp the model with loss function, to reduce the memory usage on gpu0 and speedup model = ModelWithLoss(model, debug=opt.debug) if params.num_gpus > 0: model = model.cuda() if params.num_gpus > 1: model = CustomDataParallel(model, params.num_gpus) if use_sync_bn: patch_replication_callback(model) if opt.optim == 'adamw': optimizer = torch.optim.AdamW(model.parameters(), opt.lr) else: optimizer = torch.optim.SGD(model.parameters(), opt.lr, momentum=0.9, nesterov=True) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) epoch = 0 best_loss = 1e5 best_epoch = 0 step = max(0, last_step) model.train() num_iter_per_epoch = len(training_generator) try: for epoch in range(opt.num_epochs): last_epoch = step // num_iter_per_epoch if epoch < last_epoch: continue epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): if iter < step - last_epoch * num_iter_per_epoch: progress_bar.update() continue try: imgs = data['img'] annot = data['annot'] image_path = data['image_path'] # print(image_path) if params.num_gpus == 1: # if only one gpu, just send it to cuda:0 # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here imgs = imgs.cuda() annot = annot.cuda() optimizer.zero_grad() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) progress_bar.set_description( 'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}' .format(step, epoch, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss.item(), reg_loss.item(), loss.item())) writer.add_scalars('Loss', {'train': loss}, step) writer.add_scalars('Regression_loss', {'train': reg_loss}, step) writer.add_scalars('Classfication_loss', {'train': cls_loss}, step) # log learning_rate current_lr = optimizer.param_groups[0]['lr'] writer.add_scalar('learning_rate', current_lr, step) step += 1 if step % opt.save_interval == 0 and step > 0: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth' ) print('checkpoint...') except Exception as e: print('[Error]', traceback.format_exc()) print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.val_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(val_generator): with torch.no_grad(): imgs = data['img'] annot = data['annot'] if params.num_gpus == 1: imgs = imgs.cuda() annot = annot.cuda() cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0 or not torch.isfinite(loss): continue loss_classification_ls.append(cls_loss.item()) loss_regression_ls.append(reg_loss.item()) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch, opt.num_epochs, cls_loss, reg_loss, loss)) writer.add_scalars('Loss', {'val': loss}, step) writer.add_scalars('Regression_loss', {'val': reg_loss}, step) writer.add_scalars('Classfication_loss', {'val': cls_loss}, step) print('\n') if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch save_loss = round(loss, 4) save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}_{loss}.pth' ) model.train() # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( '[Info] Stop training at epoch {}. The lowest loss achieved is {}' .format(epoch, best_loss)) break except KeyboardInterrupt: save_checkpoint( model, f'efficientdet-d{opt.compound_coef}_{epoch}_{step}.pth') writer.close() writer.close()
compound_coef] if force_input_size is None else force_input_size model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=4, ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales)) # print(model) # weights_path = './weights/efficientdet-d4.pth' weights_path = 'C:/Users/giang/Desktop/efficientdet-d4_24_3500.pth' # model.load_state_dict(torch.load(weights_path, map_location = 'cpu'), strict=False) from efficientdet.model import Classifier # model.backbone_net.model._conv_stem.conv = nn.Conv2d(4, 48, kernel_size=(3, 3), stride=(2, 2), bias=False) # model.classifier.header.pointwise_conv.conv = nn.Conv2d(224, 9, kernel_size=(1, 1), stride=(1, 1)) model.classifier = Classifier(in_channels=model.fpn_num_filters[4], num_anchors=model.num_anchors, num_classes=1, num_layers=model.box_class_repeats[4], pyramid_levels=model.pyramid_levels[4]) model.load_state_dict(torch.load(weights_path), strict=False) # model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() # model.train(False) if use_cuda: model = model.cuda() if use_float16: model = model.half() params.num_gpus = 1 if params.num_gpus == 0: