def __init__(self, model_cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes super(Model, self).__init__() if type(model_cfg) is dict: self.md = model_cfg # model dict else: # is *.yaml with open(model_cfg) as f: self.md = yaml.load(f, Loader=yaml.FullLoader) # model dict # Define model if nc: self.md['nc'] = nc # override yaml value self.model, self.save = parse_model(self.md, ch=[ch]) # model, savelist, ch_out # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[-1] # Detect() m.stride = torch.tensor([ 64 / x.shape[-2] for x in self.forward(torch.zeros(1, ch, 64, 64)) ]) # forward m.anchors /= m.stride.view(-1, 1, 1) self.stride = m.stride # Init weights, biases torch_utils.initialize_weights(self) self._initialize_biases() # only run once torch_utils.model_info(self) print('')
def __init__(self, model_config): """ :param model_config: """ super(FlexibleModel, self).__init__() if type(model_config) is str: model_config = yaml.load(open(model_config, 'r')) model_config = Dict(model_config) backbone_type = model_config.backbone.pop('type') self.backbone = build_backbone(backbone_type, **model_config.backbone) backbone_out = self.backbone.out_shape self.fpn = build_neck('FPN', **backbone_out) fpn_out = self.fpn.out_shape fpn_out['version'] = model_config.backbone.version self.pan = build_neck('PAN', **fpn_out) # self.seghead = build_head('DecoderHead', **model_config.seghead) pan_out = self.pan.out_shape model_config.head['ch'] = pan_out self.detection = build_head('YOLOHead', **model_config.head) self.stride = self.detection.stride self._initialize_biases() initialize_weights(self)
def model_create(self, cfg): if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict self.model, self.save = parse_model(deepcopy(self.yaml), ch=[self.ch ]) # model, savelist, ch_out # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 128 # 2x min stride m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, self.ch, s, s)) ]) # forward m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # print('Strides: %s' % m.stride.tolist()) initialize_weights(self)
def __init__(self, cfg='yolov3.yaml', ch=3, nc=None): # model, input channels, number of classes super(Model, self).__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict # Define model if nc and nc != self.yaml['nc']: logger.info('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc)) self.yaml['nc'] = nc # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist, ch_out self.names = [str(i) for i in range(self.yaml['nc'])] # default names # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 128 # 2x min stride m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # print('Strides: %s' % m.stride.tolist()) # Init weights, biases initialize_weights(self) self.info() logger.info('')
def __init__(self, cfg, ch=3): # model, input channels, number of classes super(Model, self).__init__() self.md = cfg # Define model self.model, self.save = parse_model(self.md, ch=[ch]) # model, savelist, ch_out # Init weights, biases torch_utils.initialize_weights(self) # torch_utils.model_info(self) print('')
def __init__(self, opt, hyp, cfg='yolov5s.yaml', ch=3, nc=5, tb_writer=None): # model, input channels, number of classes super(Model, self).__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict self.nc = nc self.hyp = hyp self.opt = opt self.tb_writer = tb_writer #init seeds init_seeds(2 + self.opt.global_rank) # Define model if nc and nc != self.yaml['nc']: print('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc)) self.yaml['nc'] = nc # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist, ch_out # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 128 # 2x min stride m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # print('Strides: %s' % m.stride.tolist()) initialize_weights(self) self.info() self.config() self.run_save() self.load_model() #self.wandb_logging() self.optimizer, self.lr_scheduler = self.configure_optimizers() self.optimizer, self.lr_scheduler = self.optimizer[ 0], self.lr_scheduler[0] self.resume()
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super(Model, self).__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load( f, Loader=yaml.SafeLoader) # yaml파일 내용을 딕셔너리로 읽어들임 # Define model ch = self.yaml['ch'] = self.yaml.get( 'ch', ch) # input channels:3 을 self.yaml 딕셔너리에 추가 if nc and nc != self.yaml['nc']: # num_classes가 다르면 오버라이드 logger.info( f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}" ) # 예상대로 진행되는지에 대한 확인 self.yaml['nc'] = nc # override yaml value if anchors: logger.info( f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round( anchors) # 새로 입력된 앵커박스가 있다면 yaml dict에 override self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 256 # 2x min stride m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward / 8, 16, 32 m.anchors /= m.stride.view(-1, 1, 1) # 각 스케일에 대한 비율로 anchor box 조정 check_anchor_order(m) self.stride = m.stride # 8, 16, 32 self._initialize_biases() # only run once # print('Strides: %s' % m.stride.tolist()) # Init weights, biases initialize_weights(self) self.info() logger.info('')
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super(Model, self).__init__() if isinstance(cfg, dict): # &&& looking for parameter file self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.safe_load(f) # model dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml[ 'nc']: ## &&& loading number of classes and anchors logger.info( f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value if anchors: logger.info( f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round(anchors) # override yaml value self.model, self.save = parse_model(deepcopy( self.yaml), ch=[ch]) # model, savelist &&& here it loads model self.names = [str(i) for i in range(self.yaml['nc'])] # default names # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[ -1] # Detect() # &&&& stripped last layer, what is Detect???? its class defined above if isinstance(m, Detect): s = 256 # 2x min stride &&&& what is x below and size , what is stride https://discuss.pytorch.org/t/pytorch-tensor-stride-how-it-works/90537 m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward &&&& m.stride = tensor([ 8., 16., 32.]) m.anchors /= m.stride.view( -1, 1, 1) # &&& this perform division and scale down of anchor sizes check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # logger.info('Strides: %s' % m.stride.tolist()) # Init weights, biases initialize_weights(self) self.info() logger.info('')
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super(Model, self).__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.safe_load(f) # model dict # Define model if ch == 3: # If using default num channels, check model config to make sure this is the right number of channels ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: logger.info( f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value if anchors: logger.info( f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round(anchors) # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names self.inplace = self.yaml.get('inplace', True) # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 256 # 2x min stride m.inplace = self.inplace m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # logger.info('Strides: %s' % m.stride.tolist()) # Init weights, biases initialize_weights(self) self.info() logger.info('')
def __init__(self, cfg='EDSR.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super(Model, self).__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: logger.info( f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value if anchors: logger.info( f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round(anchors) # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names print([x.shape for x in self.forward(torch.zeros(1, ch, 128, 128))]) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 256 # 2x min stride m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward m.anchors /= m.stride.view(-1, 1, 1) # if SR_mixed model -> * scale check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # print('Strides: %s' % m.stride.tolist()) # Init weights, biases initialize_weights(self) self.info() logger.info('')
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super().__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg, encoding='ascii', errors='ignore') as f: self.yaml = yaml.safe_load(f) # model dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: LOGGER.info( f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value if anchors: LOGGER.info( f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round(anchors) # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names self.inplace = self.yaml.get('inplace', True) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 256 # 2x min stride m.inplace = self.inplace m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward check_anchor_order(m) # must be in pixel-space (not grid-space) m.anchors /= m.stride.view(-1, 1, 1) self.stride = m.stride self._initialize_biases() # only run once # Init weights, biases initialize_weights(self) self.info() LOGGER.info('')
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes super(Model, self).__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict # Define model if nc and nc != self.yaml['nc']: print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc)) self.yaml['nc'] = nc # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist, ch_out # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 128 # 2x min stride m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[0] ]) # forward m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once self.emb_dim, self.nID = self.yaml['emb_dim'], self.yaml['nID'] self.classifier = nn.Linear(self.emb_dim, self.nID) if self.nID > 0 else None # Init weights, biases initialize_weights(self) self.info() print('')
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, nas=False, nas_stage=1): super(Model, self).__init__() self.nas_stage = nas_stage if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict # Define model if nc and nc != self.yaml['nc']: print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc)) self.yaml['nc'] = nc # override yaml value self.model, self.save = parse_model( deepcopy(self.yaml), ch=[ch], nas_flag=nas) # model, savelist, ch_out # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 128 # 2x min stride m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # print('Strides: %s' % m.stride.tolist()) # Init weights, biases initialize_weights(self) self.info() print('')
def __init__(self, model_cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes super(Model, self).__init__() if type(model_cfg) is dict: self.md = model_cfg # model dict else: # is *.yaml import yaml # for torch hub with open(model_cfg) as f: self.md = yaml.load(f, Loader=yaml.FullLoader) # model dict # Define model if nc and nc != self.md['nc']: print('Overriding %s nc=%g with nc=%g' % (model_cfg, self.md['nc'], nc)) self.md['nc'] = nc # override yaml value self.model, self.save = parse_model(self.md, ch=[ch]) # model, savelist, ch_out # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 128 # 2x min stride m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # print('Strides: %s' % m.stride.tolist()) # Init weights, biases torch_utils.initialize_weights(self) self._initialize_biases() # only run once torch_utils.model_info(self) print('')
def __init__(self, cfg='EDSR.yaml', ch=3): # model, input channels, number of classes super(SR_Model, self).__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names #print([x.shape for x in self.forward(torch.zeros(1, ch, 128, 128))]) # Init weights, biases initialize_weights(self) # self.info() logger.info('')
def __init__(self, cfg, ch=3): super(YoloV6, self).__init__() with open(cfg) as f: self.md = yaml.load(f, Loader=yaml.FullLoader) self.nc = self.md['nc'] self.anchors = self.md['anchors'] self.na = len(self.anchors[0]) // 2 # number of anchors print('model num classes is: {}'.format(self.nc)) print('model anchors is: {}'.format(self.anchors)) # divid by, fixed currently due to SpineNet backbone cd = 2 wd = 3 self.backbone = SpineNet(arch='49S', output_level=[3, 4, 5]) # PANet self.conv5 = ConvBase(256 // cd, 256 // cd) # self.conv5 = SimBottleneckCSP(1024//cd, 1024//cd, n=3//wd, shortcut=False) self.up1 = nn.Upsample(scale_factor=2) self.csp5 = SimBottleneckCSP(512 // cd, 256 // cd, n=3 // wd, shortcut=False) self.conv6 = ConvBase(256 // cd, 256 // cd) self.up2 = nn.Upsample(scale_factor=2) self.csp6 = SimBottleneckCSP(512 // cd, 256 // cd, n=3 // wd, shortcut=False) self.conv7 = ConvBase(256 // cd, 256 // cd, 3, 2) self.csp7 = SimBottleneckCSP(512 // cd, 512 // cd, n=3 // wd, shortcut=False) self.conv8 = ConvBase(512 // cd, 512 // cd, 3, 2) self.csp8 = SimBottleneckCSP(512 // cd, 1024 // cd, n=3 // wd, shortcut=False) self.detect = Detect(self.nc, self.anchors, [256 // cd, 512 // cd, 1024 // cd]) # forward to get Detect lay params dynamically s = 512 # 2x min stride self.detect.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward self.detect.anchors /= self.detect.stride.view(-1, 1, 1) check_anchor_order(self.detect) self.stride = self.detect.stride self._initialize_biases() initialize_weights(self) # for compatible, in check_anchors in train.py self.model = [self.detect]
def train(hyp, opt, device, tb_writer=None): print(f'Hyperparameters {hyp}') log_dir = tb_writer.log_dir if tb_writer else 'runs/evolve' # run directory wdir = str(Path(log_dir) / 'weights') + os.sep # weights directory os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' best = wdir + 'best.pt' results_file = log_dir + os.sep + 'results.txt' epochs, batch_size, total_batch_size, weights, rank = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.local_rank # TODO: Use DDP logging. Only the first process is allowed to log. # Save run settings with open(Path(log_dir) / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(Path(log_dir) / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict train_path = data_dict['train'] test_path = data_dict['val'] nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Remove previous results if rank in [-1, 0]: for f in glob.glob('*_batch*.jpg') + glob.glob(results_file): os.remove(f) opt.cfg = 'cfg/prune_0.4_keep_0.01_yolov5s.cfg' opt.weights = 'weights/prune_0.4_keep_0.01_last.pt' opt.cfg = 'cfg/yolov5s_tiny.cfg' opt.weights = '' # Create model # model = Model(opt.cfg, nc=nc).to(device) model = Darknet(opt.cfg, (opt.img_size[0], opt.img_size[0])).to(device) initialize_weights(model) # cfg_model = Darknet('cfg/yolov5s.cfg', (416, 416)).to(device) # Image sizes # gs = int(max(model.stride)) # grid size (max stride) gs = int(max([8, 16, 32])) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # Optimizer nbs = 64 # nominal batch size # default DDP implementation is slow for accumulation according to: https://pytorch.org/docs/stable/notes/ddp.html # all-reduce operation is carried out during loss.backward(). # Thus, there would be redundant all-reduce communications in a accumulation procedure, # which means, the result is still right but the training speed gets slower. # TODO: If acceleration is needed, there is an implementation of allreduce_post_accumulation # in https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT/run_pretraining.py accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): if v.requires_grad: if '.bias' in k: pg2.append(v) # biases elif '.weight' in k and '.BatchNorm2d' not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: (( (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.8 + 0.2 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Load Model if weights.endswith('.pt'): model.load_state_dict(torch.load(opt.weights)['model']) start_epoch, best_fitness = 0, 0.0 # with torch_distributed_zero_first(rank): # attempt_download(weights) # start_epoch, best_fitness = 0, 0.0 # if weights.endswith('.pt'): # pytorch format # ckpt = torch.load(weights, map_location=device) # load checkpoint # # # load model # try: # exclude = ['anchor'] # exclude keys # ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items() # if k in model.state_dict() and not any(x in k for x in exclude) # and model.state_dict()[k].shape == v.shape} # model.load_state_dict(ckpt['model'], strict=False) # print('Transferred %g/%g items from %s' % (len(ckpt['model']), len(model.state_dict()), weights)) # except KeyError as e: # s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \ # "Please delete or update %s and try again, or use --weights '' to train from scratch." \ # % (weights, opt.cfg, weights, weights) # raise KeyError(s) from e # # # load optimizer # if ckpt['optimizer'] is not None: # optimizer.load_state_dict(ckpt['optimizer']) # best_fitness = ckpt['best_fitness'] # # # load results # if ckpt.get('training_results') is not None: # with open(results_file, 'w') as file: # file.write(ckpt['training_results']) # write results.txt # # # epochs # start_epoch = ckpt['epoch'] + 1 # if epochs < start_epoch: # print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % # (weights, ckpt['epoch'], epochs)) # epochs += ckpt['epoch'] # finetune additional epochs # # del ckpt # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model.module_list = model.module.module_list model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) print('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[rank], output_device=rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, local_rank=rank, world_size=opt.world_size) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Testloader if rank in [-1, 0]: # local_rank is set to -1. Because only the first process is expected to do evaluation. testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images, rect=True, local_rank=-1, world_size=opt.world_size)[0] # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights model.names = names # Class frequency if rank in [-1, 0]: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # model._initialize_biases(cf.to(device)) plot_labels(labels, save_dir=log_dir) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram('classes', c, 0) # Check anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Start training t0 = time.time() nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = ( 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' scheduler.last_epoch = start_epoch - 1 # do not move # scaler = amp.GradScaler(enabled=cuda) if rank in [0, -1]: print('Image sizes %g train, %g test' % (imgsz, imgsz_test)) print('Using %g dataloader workers' % dataloader.num_workers) print('Starting training for %g epochs...' % epochs) # torch.autograd.set_detect_anomaly(True) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if dataset.image_weights: # Generate indices if rank in [-1, 0]: w = model.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) dataset.indices = random.choices( range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = torch.zeros([dataset.n], dtype=torch.int) if rank == 0: indices[:] = torch.from_tensor(dataset.indices, dtype=torch.int) dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) if rank in [-1, 0]: print( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp( ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward pred = model(imgs) # Loss loss, loss_items = compute_loss_cfg(pred, targets.to(device), model) # scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) return results # Backward loss.backward() # Autocast # with amp.autocast(enabled=cuda): # # Forward # pred = model(imgs) # # Loss # loss, loss_items = compute_loss(pred, targets.to(device), model) # scaled by batch_size # if rank != -1: # loss *= opt.world_size # gradient averaged between devices in DDP mode # # if not torch.isfinite(loss): # # print('WARNING: non-finite loss, ending training ', loss_items) # # return results # # Backward # scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: # scaler.step(optimizer) # optimizer.step # scaler.update() optimizer.step() optimizer.zero_grad() if ema is not None: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if ni < 3: f = str(Path(log_dir) / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema is not None: ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, save_json=final_epoch and opt.data.endswith(os.sep + 'coco.yaml'), model=ema.ema.module if hasattr(ema.ema, 'module') else ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Tensorboard if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss' ] for x, tag in zip(list(mloss[:-1]) + list(results), tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema.module if hasattr(ema, 'module') else ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename ispt = f2.endswith('.pt') # is *.pt strip_optimizer(f2) if ispt else None # strip optimizer os.system('gsutil cp %s gs://%s/weights' % ( f2, opt.bucket)) if opt.bucket and ispt else None # upload # Finish if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes super(Model, self).__init__() if isinstance(cfg, dict): self.yaml = cfg # model dict else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name with open(cfg) as f: self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict # Define model ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels if nc and nc != self.yaml['nc']: logger.info( f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") self.yaml['nc'] = nc # override yaml value if anchors: logger.info( f'Overriding model.yaml anchors with anchors={anchors}') self.yaml['anchors'] = round(anchors) # override yaml value self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist self.names = [str(i) for i in range(self.yaml['nc'])] # default names # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) #upsampling's head self.center = nn.Sequential( nn.Conv2d(512, 512, kernel_size=11, padding=5, bias=False), nn.BatchNorm2d(512), nn.ReLU(inplace=True), ) self.decode1 = ResDecode(384 + 512, 256) #layer11 self.decode2 = ResDecode(256 + 256, 128) #layer8 self.decode3 = ResDecode(128 + 128, 64) #layer6 self.decode4 = ResDecode(64 + 64, 32) #layer3 self.decode5 = ResDecode(32, 16) #layer2 # self.logit = nn.Conv2d(16, 1, kernel_size=3, padding=1) #segmentation output # fc[-1].bias.data.fill_(-2.19) self.logit = nn.Sequential( nn.Conv2d(16, 256, kernel_size=3, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(256, 1, kernel_size=1, stride=1, padding=0, bias=True)) self.logit[-1].bias.data.fill_(-2.19) #~upsampling's head # Build strides, anchors m = self.model[-1] # Detect() if isinstance(m, Detect): s = 256 # 2x min stride m.stride = torch.tensor([ s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s)) ]) # forward m.anchors /= m.stride.view(-1, 1, 1) check_anchor_order(m) self.stride = m.stride self._initialize_biases() # only run once # print('Strides: %s' % m.stride.tolist()) # Init weights, biases initialize_weights(self) self.info() logger.info('')
def train(hyp, opt, device, tb_writer=None, wandb=None): logger.info(f'Hyperparameters {hyp}') save_dir, epochs, batch_size, total_batch_size, weights, rank = \ Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Directories wdir = save_dir / 'weights' wdir.mkdir(parents=True, exist_ok=True) # make dir last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = save_dir / 'results.txt' # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check model = Darknet(opt.cfg, (opt.img_size[0], opt.img_size[0])).to(device) initialize_weights(model) distill = opt.distill if distill: t_cfg = "models/yolov5s.yaml" t_weights = "runs/train/s_hand/weights/last.pt" t_data = "data/coco_hand.yaml" ckpt = torch.load(t_weights, map_location=device) # load checkpoint with open(t_data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict nc = int(data_dict['nc']) t_model = Model(t_cfg, nc=nc).to(device) exclude = ['anchor'] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, t_model.state_dict(), exclude=exclude) # intersect t_model.load_state_dict(state_dict, strict=False) # load t_model.eval() print( '<.....................using knowledge distillation.......................>' ) print('teacher model:', t_weights, '\n') # Model # pretrained = weights.endswith('.pt') # if pretrained: # with torch_distributed_zero_first(rank): # attempt_download(weights) # download if not found locally # ckpt = torch.load(weights, map_location=device) # load checkpoint # if hyp.get('anchors'): # ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor # model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create # exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [] # exclude keys # state_dict = ckpt['model'].float().state_dict() # to FP32 # state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect # model.load_state_dict(state_dict, strict=False) # load # logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report # else: # model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[ 'lrf']) + hyp['lrf'] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Logging # if wandb and wandb.run is None: # wandb_run = wandb.init(config=opt, resume="allow", # project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem, # name=save_dir.stem, # id=ckpt.get('wandb_id') if 'ckpt' in locals() else None) loggers = {'wandb': wandb} # loggers dict # Resume if weights.endswith('.pt'): model.load_state_dict(torch.load(opt.weights)['model']) start_epoch, best_fitness = 0, 0.0 # if pretrained: # # Optimizer # if ckpt['optimizer'] is not None: # optimizer.load_state_dict(ckpt['optimizer']) # best_fitness = ckpt['best_fitness'] # # # Results # if ckpt.get('training_results') is not None: # with open(results_file, 'w') as file: # file.write(ckpt['training_results']) # write results.txt # # # Epochs # start_epoch = ckpt['epoch'] + 1 # if opt.resume: # assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs) # if epochs < start_epoch: # logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % # (weights, ckpt['epoch'], epochs)) # epochs += ckpt['epoch'] # finetune additional epochs # # del ckpt, state_dict # Image sizes # gs = int(max(model.stride)) # grid size (max stride) gs = int(max([8, 16, 32])) nl = 3 imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model.module_list = model.module.module_list model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) # plot_labels(labels, save_dir=save_dir) plot_labels(labels, save_dir, loggers) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram('classes', c, 0) # Anchors # if not opt.noautoanchor: # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset hyp['obj'] *= imgsz**2 / 640.**2 * 3. / nl # scale hyp['obj'] to image size and output layers model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) # model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights model.class_weights = labels_to_class_weights( dataset.labels, nc).to(device) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info('Image sizes %g train, %g test\n' 'Using %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs)) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: # cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 / nc # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses msoft_target = torch.zeros(1).to(device) if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss_cfg( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode if distill: soft_target = 0 reg_ratio = 0 #表示有多少target的回归是不如老师的,这时学生会跟gt再学习 # _,output_t = t_model(imgs) with torch.no_grad(): _, output_t = t_model(imgs) #soft_target = distillation_loss1(pred, output_t, model.nc, imgs.size(0)) #这里把蒸馏策略改为了二,想换回一的可以注释掉loss2,把loss1取消注释 soft_target, reg_ratio = distillation_loss2( model, targets.to(device), pred, output_t) loss += soft_target # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses msoft_target = (msoft_target * i + soft_target) / (i + 1) mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if ni < 3: f = str(save_dir / f'train_batch{ni}.jpg') # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) # if tb_writer and result is not None: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test_cfg( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, plots=epoch == 0 or final_epoch, # plot first and last log_imgs=opt.log_imgs if wandb else 0) # Write with open(results_file, 'a') as f: f.write( s + '%10.4g' * 7 % results + '\n') # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Log tags = [ 'train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard if wandb: wandb.log({tag: x}) # W&B # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict(), 'wandb_id': wandb_run.id if wandb else None } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = opt.name if opt.name.isnumeric() else '' fresults, flast, fbest = save_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): if f1.exists(): os.rename(f1, f2) # rename if str(f2).endswith('.pt'): # is *.pt strip_optimizer(f2) # strip optimizer os.system( 'gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload # Finish if not opt.evolve: plot_results(save_dir=save_dir) # save as results.png logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results