def __init__(self, ARCH, DATA, datadir, logdir, path=None): # parameters self.ARCH = ARCH self.DATA = DATA self.datadir = datadir self.log = logdir self.path = path # put logger where it belongs self.tb_logger = Logger(self.log + "/tb") self.info = { "train_update": 0, "train_loss": 0, "train_acc": 0, "train_iou": 0, "valid_loss": 0, "valid_acc": 0, "valid_iou": 0, "backbone_lr": 0, "decoder_lr": 0, "head_lr": 0, "post_lr": 0 } # get the data parserModule = imp.load_source( "parserModule", booger.TRAIN_PATH + '/tasks/semantic/dataset/' + self.DATA["name"] + '/parser.py') self.parser = parserModule.Parser( root=self.datadir, train_sequences=self.DATA["split"]["train"], valid_sequences=self.DATA["split"]["valid"], test_sequences=None, labels=self.DATA["labels"], color_map=self.DATA["color_map"], learning_map=self.DATA["learning_map"], learning_map_inv=self.DATA["learning_map_inv"], sensor=self.ARCH["dataset"]["sensor"], max_points=self.ARCH["dataset"]["max_points"], batch_size=self.ARCH["train"]["batch_size"], workers=self.ARCH["train"]["workers"], gt=True, shuffle_train=True) # weights for loss (and bias) # weights for loss (and bias) epsilon_w = self.ARCH["train"]["epsilon_w"] content = torch.zeros(self.parser.get_n_classes(), dtype=torch.float) for cl, freq in DATA["content"].items(): x_cl = self.parser.to_xentropy( cl) # map actual class to xentropy class content[x_cl] += freq self.loss_w = 1 / (content + epsilon_w) # get weights for x_cl, w in enumerate( self.loss_w): # ignore the ones necessary to ignore if DATA["learning_ignore"][x_cl]: # don't weigh self.loss_w[x_cl] = 0 print("Loss weights from content: ", self.loss_w.data) # concatenate the encoder and the head with torch.no_grad(): self.model = Segmentator(self.ARCH, self.parser.get_n_classes(), self.path) print(self.model) # GPU? self.gpu = False self.multi_gpu = False self.n_gpus = 0 self.model_single = self.model self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") print("Training in device: ", self.device) if torch.cuda.is_available() and torch.cuda.device_count() > 0: cudnn.benchmark = True cudnn.fastest = True self.gpu = True self.n_gpus = 1 self.model.cuda() if torch.cuda.is_available() and torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") self.model = nn.DataParallel(self.model) # spread in gpus self.model = convert_model(self.model).cuda() # sync batchnorm self.model_single = self.model.module # single model to get weight names self.multi_gpu = True self.n_gpus = torch.cuda.device_count() # loss if "loss" in self.ARCH["train"].keys( ) and self.ARCH["train"]["loss"] == "xentropy": self.criterion = nn.NLLLoss(weight=self.loss_w).to(self.device) else: raise Exception('Loss not defined in config file') # loss as dataparallel too (more images in batch) if self.n_gpus > 1: self.criterion = nn.DataParallel( self.criterion).cuda() # spread in gpus # optimizer if self.ARCH["post"]["CRF"]["use"] and self.ARCH["post"]["CRF"][ "train"]: self.lr_group_names = ["post_lr"] self.train_dicts = [{'params': self.model_single.CRF.parameters()}] else: self.lr_group_names = [] self.train_dicts = [] if self.ARCH["backbone"]["train"]: self.lr_group_names.append("backbone_lr") self.train_dicts.append( {'params': self.model_single.backbone.parameters()}) if self.ARCH["decoder"]["train"]: self.lr_group_names.append("decoder_lr") self.train_dicts.append( {'params': self.model_single.decoder.parameters()}) if self.ARCH["head"]["train"]: self.lr_group_names.append("head_lr") self.train_dicts.append( {'params': self.model_single.head.parameters()}) # Use SGD optimizer to train self.optimizer = optim.SGD(self.train_dicts, lr=self.ARCH["train"]["lr"], momentum=self.ARCH["train"]["momentum"], weight_decay=self.ARCH["train"]["w_decay"]) # Use warmup learning rate # post decay and step sizes come in epochs and we want it in steps steps_per_epoch = self.parser.get_train_size() up_steps = int(self.ARCH["train"]["wup_epochs"] * steps_per_epoch) final_decay = self.ARCH["train"]["lr_decay"]**(1 / steps_per_epoch) self.scheduler = warmupLR(optimizer=self.optimizer, lr=self.ARCH["train"]["lr"], warmup_steps=up_steps, momentum=self.ARCH["train"]["momentum"], decay=final_decay)
def __init__(self, config, logdir, path=None, only_eval=False, block_bn=False): # parameters self.CFG = config self.log = logdir self.path = path self.only_eval = only_eval self.block_bn = block_bn # put logger where it belongs self.tb_logger = Logger(self.log + "/tb") self.info = { "train_update": 0, "train_loss": 0, "train_acc": 0, "train_iou": 0, "valid_loss": 0, "valid_acc": 0, "valid_iou": 0, "valid_loss_avg_models": 0, "valid_acc_avg_models": 0, "valid_iou_avg_models": 0, "feat_lr": 0, "decoder_lr": 0, "head_lr": 0 } # get the data parserModule = imp.load_source( "parserModule", booger.TRAIN_PATH + '/tasks/segmentation/dataset/' + self.CFG["dataset"]["name"] + '/parser.py') self.parser = parserModule.Parser( img_prop=self.CFG["dataset"]["img_prop"], img_means=self.CFG["dataset"]["img_means"], img_stds=self.CFG["dataset"]["img_stds"], classes=self.CFG["dataset"]["labels"], train=True, location=self.CFG["dataset"]["location"], batch_size=self.CFG["train"]["batch_size"], crop_prop=self.CFG["train"]["crop_prop"], workers=self.CFG["dataset"]["workers"]) self.data_h, self.data_w, self.data_d = self.parser.get_img_size() # weights for loss (and bias) self.loss_w = torch.zeros(self.parser.get_n_classes(), dtype=torch.float) for idx, w in self.CFG["dataset"]["labels_w"].items(): self.loss_w[idx] = torch.tensor(w) # get architecture and build backbone (with pretrained weights) self.bbone_cfg = BackboneConfig( name=self.CFG["backbone"]["name"], os=self.CFG["backbone"]["OS"], h=self.data_h, w=self.data_w, d=self.data_d, dropout=self.CFG["backbone"]["dropout"], bn_d=self.CFG["backbone"]["bn_d"], extra=self.CFG["backbone"]["extra"]) self.decoder_cfg = DecoderConfig( name=self.CFG["decoder"]["name"], dropout=self.CFG["decoder"]["dropout"], bn_d=self.CFG["decoder"]["bn_d"], extra=self.CFG["decoder"]["extra"]) self.head_cfg = HeadConfig(n_class=self.parser.get_n_classes(), dropout=self.CFG["head"]["dropout"], weights=self.loss_w) # concatenate the encoder and the head with torch.no_grad(): self.model = Segmentator(self.bbone_cfg, self.decoder_cfg, self.head_cfg, self.path) # train backbone? if not self.CFG["backbone"]["train"]: self.CFG["backbone"]["train"] = False for w in self.model.backbone.parameters(): w.requires_grad = False # train decoder? if not self.CFG["decoder"]["train"]: self.CFG["decoder"]["train"] = False for w in self.model.decoder.parameters(): w.requires_grad = False # print number of parameters and the ones requiring gradients # print number of parameters and the ones requiring gradients weights_total = sum(p.numel() for p in self.model.parameters()) weights_grad = sum(p.numel() for p in self.model.parameters() if p.requires_grad) print("Total number of parameters: ", weights_total) print("Total number of parameters requires_grad: ", weights_grad) # breakdown by layer weights_enc = sum(p.numel() for p in self.model.backbone.parameters()) weights_dec = sum(p.numel() for p in self.model.decoder.parameters()) weights_head = sum(p.numel() for p in self.model.head.parameters()) print("Param encoder ", weights_enc) print("Param decoder ", weights_dec) print("Param head ", weights_head) # GPU? self.gpu = False self.multi_gpu = False self.n_gpus = 0 self.model_single = self.model self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") print("Training in device: ", self.device) if torch.cuda.is_available() and torch.cuda.device_count() > 0: self.gpu = True # cudnn.benchmark = True self.model.cuda() if torch.cuda.is_available() and torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") self.model = nn.DataParallel(self.model) # spread in gpus self.model = convert_model(self.model).cuda() # sync batchnorm self.model_single = self.model.module # single model to get weight names self.multi_gpu = True self.n_gpus = torch.cuda.device_count() # loss if "loss" in self.CFG["train"].keys( ) and self.CFG["train"]["loss"] == "xentropy": self.criterion = nn.CrossEntropyLoss(weight=self.loss_w).to( self.device) elif "loss" in self.CFG["train"].keys( ) and self.CFG["train"]["loss"] == "iou": self.criterion = mIoULoss(weight=self.loss_w).to(self.device) else: raise Exception('Loss not defined in config file') # loss as dataparallel too (more images in batch) if self.n_gpus > 1: self.criterion = nn.DataParallel( self.criterion).cuda() # spread in gpus # optimizer train_dicts = [{'params': self.model_single.head.parameters()}] if self.CFG["backbone"]["train"]: train_dicts.append( {'params': self.model_single.backbone.parameters()}) if self.CFG["decoder"]["train"]: train_dicts.append( {'params': self.model_single.decoder.parameters()}) # Use SGD optimizer to train self.optimizer = optim.SGD(train_dicts, lr=self.CFG["train"]["max_lr"], momentum=self.CFG["train"]["min_momentum"], weight_decay=self.CFG["train"]["w_decay"]) # Use one shot learning rate # post decay and step sizes come in epochs and we want it in steps steps_per_epoch = self.parser.get_train_size() up_steps = int(self.CFG["train"]["up_epochs"] * steps_per_epoch) down_steps = int(self.CFG["train"]["down_epochs"] * steps_per_epoch) final_decay = self.CFG["train"]["final_decay"]**(1 / steps_per_epoch) self.scheduler = OneShot_LR( self.optimizer, base_lr=self.CFG["train"]["min_lr"], max_lr=self.CFG["train"]["max_lr"], step_size_up=up_steps, step_size_down=down_steps, cycle_momentum=True, base_momentum=self.CFG["train"]["min_momentum"], max_momentum=self.CFG["train"]["max_momentum"], post_decay=final_decay) # buffer to save the best N models self.best_n_models = self.CFG["train"]["avg_N"] self.best_backbones = collections.deque(maxlen=self.best_n_models) self.best_decoders = collections.deque(maxlen=self.best_n_models) self.best_heads = collections.deque(maxlen=self.best_n_models)
def __init__(self, ARCH, DATA, datadir, logdir, path=None, model_mode='salsanext'): # parameters self.ARCH = ARCH self.DATA = DATA self.datadir = datadir self.log = logdir self.path = path self.model_mode = model_mode self.batch_time_t = AverageMeter() self.data_time_t = AverageMeter() self.batch_time_e = AverageMeter() self.epoch = 0 # put logger where it belongs self.info = { "train_update": 0, "train_loss": 0, "train_acc": 0, "train_iou": 0, "valid_loss": 0, "valid_acc": 0, "valid_iou": 0, "best_train_iou": 0, "best_val_iou": 0 } # get the data parserModule = imp.load_source( "parserModule", booger.TRAIN_PATH + '/tasks/semantic/dataset/' + self.DATA["name"] + '/parser.py') self.parser = parserModule.Parser( root=self.datadir, train_sequences=self.DATA["split"]["train"], valid_sequences=self.DATA["split"]["valid"], test_sequences=None, labels=self.DATA["labels"], color_map=self.DATA["color_map"], learning_map=self.DATA["learning_map"], learning_map_inv=self.DATA["learning_map_inv"], sensor=self.ARCH["dataset"]["sensor"], max_points=self.ARCH["dataset"]["max_points"], batch_size=self.ARCH["train"]["batch_size"], workers=self.ARCH["train"]["workers"], gt=True, shuffle_train=True) # weights for loss (and bias) # weights for loss (and bias) epsilon_w = self.ARCH["train"]["epsilon_w"] content = torch.zeros(self.parser.get_n_classes(), dtype=torch.float) for cl, freq in DATA["content"].items(): x_cl = self.parser.to_xentropy( cl) # map actual class to xentropy class content[x_cl] += freq self.loss_w = 1 / (content + epsilon_w) # get weights for x_cl, w in enumerate( self.loss_w): # ignore the ones necessary to ignore if DATA["learning_ignore"][x_cl]: # don't weigh self.loss_w[x_cl] = 0 print("Loss weights from content: ", self.loss_w.data) # concatenate the encoder and the head with torch.no_grad(): self.model = SalsaNet(self.ARCH, self.parser.get_n_classes(), self.path) self.tb_logger = Logger(self.log + "/tb", self.model) # GPU? self.gpu = False self.multi_gpu = False self.n_gpus = 0 self.model_single = self.model pytorch_total_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad) for name, param in self.model.named_parameters(): if param.requires_grad: print("{}: {:,}".format(name, param.numel())) print( "Total of Trainable Parameters: {:,}".format(pytorch_total_params)) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") print("Training in device: ", self.device) if torch.cuda.is_available() and torch.cuda.device_count() > 0: cudnn.benchmark = True cudnn.fastest = True self.gpu = True self.n_gpus = 1 self.model.cuda() if torch.cuda.is_available() and torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") self.model = nn.DataParallel(self.model) # spread in gpus self.model = convert_model(self.model).cuda() # sync batchnorm self.model_single = self.model.module # single model to get weight names self.multi_gpu = True self.n_gpus = torch.cuda.device_count() self.criterion = nn.NLLLoss(weight=self.loss_w).to(self.device) self.ls = Lovasz_softmax(ignore=0).to(self.device) # loss as dataparallel too (more images in batch) if self.n_gpus > 1: self.criterion = nn.DataParallel( self.criterion).cuda() # spread in gpus self.ls = nn.DataParallel(self.ls).cuda() self.optimizer = optim.SGD([{ 'params': self.model.parameters() }], lr=self.ARCH["train"]["lr"], momentum=self.ARCH["train"]["momentum"], weight_decay=self.ARCH["train"]["w_decay"]) # Use warmup learning rate # post decay and step sizes come in epochs and we want it in steps steps_per_epoch = self.parser.get_train_size() up_steps = int(self.ARCH["train"]["wup_epochs"] * steps_per_epoch) final_decay = self.ARCH["train"]["lr_decay"]**(1 / steps_per_epoch) self.scheduler = warmupLR(optimizer=self.optimizer, lr=self.ARCH["train"]["lr"], warmup_steps=up_steps, momentum=self.ARCH["train"]["momentum"], decay=final_decay) if self.path is not None: torch.nn.Module.dump_patches = True w_dict = torch.load(path + "/SalsaNet", map_location=lambda storage, loc: storage) self.model.load_state_dict(w_dict['state_dict'], strict=True) self.optimizer.load_state_dict(w_dict['optimizer']) self.epoch = w_dict['epoch'] + 1 self.scheduler.load_state_dict(w_dict['scheduler']) print("dict epoch:", w_dict['epoch']) self.info = w_dict['info'] print("info", w_dict['info'])
def __init__(self, ARCH, DATA, datadir, logdir, path=None): # parameters self.ARCH = ARCH self.DATA = DATA self.datadir = datadir self.log = logdir self.path = path # put logger where it belongs self.tb_logger = Logger(self.log + "/tb") self.info = { "train_update": 0, "train_loss": 0, "train_acc": 0, "train_iou": 0, "valid_loss": 0, "valid_acc": 0, "valid_iou": 0, "backbone_lr": 0, "decoder_lr": 0, "head_lr": 0, "post_lr": 0 } # get the data parserModule = imp.load_source( "parserModule", booger.TRAIN_PATH + '/tasks/mask_regression/dataset/' + self.DATA["name"] + '/parser.py') self.parser = parserModule.Parser( root=self.datadir, train_sequences=self.DATA["split"]["train"], valid_sequences=self.DATA["split"]["valid"], test_sequences=None, labels=self.DATA["labels"], color_map=self.DATA["color_map"], learning_map=self.DATA["learning_map"], learning_map_inv=self.DATA["learning_map_inv"], sensor=self.ARCH["dataset"]["sensor"], max_points=self.ARCH["dataset"]["max_points"], batch_size=self.ARCH["train"]["batch_size"], workers=self.ARCH["train"]["workers"], gt=True, shuffle_train=True) # concatenate the encoder and the head with torch.no_grad(): self.model = MaskRegressor(self.ARCH, self.path) # GPU? self.gpu = False self.multi_gpu = False self.n_gpus = 0 self.model_single = self.model self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") print("Training in device: ", self.device) if torch.cuda.is_available() and torch.cuda.device_count() > 0: cudnn.benchmark = True cudnn.fastest = True self.gpu = True self.n_gpus = 1 self.model.cuda() if torch.cuda.is_available() and torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") self.model = nn.DataParallel(self.model) # spread in gpus self.model = convert_model(self.model).cuda() # sync batchnorm self.model_single = self.model.module # single model to get weight names self.multi_gpu = True self.n_gpus = torch.cuda.device_count() # loss if "loss" in self.ARCH["train"].keys( ) and self.ARCH["train"]["loss"] == "xentropy": w = torch.zeros(1, dtype=torch.float) w[0] = 14.0 self.criterion = nn.BCEWithLogitsLoss(pos_weight=w).to(self.device) else: raise Exception('Loss not defined in config file') # loss as dataparallel too (more images in batch) if self.n_gpus > 1: self.criterion = nn.DataParallel( self.criterion).cuda() # spread in gpus # optimizer self.lr_group_names = [] self.train_dicts = [] if self.ARCH["backbone"]["train"]: self.lr_group_names.append("backbone_lr") self.train_dicts.append( {'params': self.model_single.backbone.parameters()}) if self.ARCH["decoder"]["train"]: self.lr_group_names.append("decoder_lr") self.train_dicts.append( {'params': self.model_single.decoder.parameters()}) if self.ARCH["head"]["train"]: self.lr_group_names.append("head_lr") self.train_dicts.append( {'params': self.model_single.head.parameters()}) # Use SGD optimizer to train self.optimizer = optim.SGD(self.train_dicts, lr=self.ARCH["train"]["lr"], momentum=self.ARCH["train"]["momentum"], weight_decay=self.ARCH["train"]["w_decay"]) # Use warmup learning rate # post decay and step sizes come in epochs and we want it in steps steps_per_epoch = self.parser.get_train_size() up_steps = int(self.ARCH["train"]["wup_epochs"] * steps_per_epoch) final_decay = self.ARCH["train"]["lr_decay"]**(1 / steps_per_epoch) self.scheduler = warmupLR(optimizer=self.optimizer, lr=self.ARCH["train"]["lr"], warmup_steps=up_steps, momentum=self.ARCH["train"]["momentum"], decay=final_decay)