コード例 #1
0
    def __init__(self, ARCH, DATA, datadir, logdir, path=None):
        # parameters
        self.ARCH = ARCH
        self.DATA = DATA
        self.datadir = datadir
        self.log = logdir
        self.path = path

        # put logger where it belongs
        self.tb_logger = Logger(self.log + "/tb")
        self.info = {
            "train_update": 0,
            "train_loss": 0,
            "train_acc": 0,
            "train_iou": 0,
            "valid_loss": 0,
            "valid_acc": 0,
            "valid_iou": 0,
            "backbone_lr": 0,
            "decoder_lr": 0,
            "head_lr": 0,
            "post_lr": 0
        }

        # get the data
        parserModule = imp.load_source(
            "parserModule", booger.TRAIN_PATH + '/tasks/semantic/dataset/' +
            self.DATA["name"] + '/parser.py')
        self.parser = parserModule.Parser(
            root=self.datadir,
            train_sequences=self.DATA["split"]["train"],
            valid_sequences=self.DATA["split"]["valid"],
            test_sequences=None,
            labels=self.DATA["labels"],
            color_map=self.DATA["color_map"],
            learning_map=self.DATA["learning_map"],
            learning_map_inv=self.DATA["learning_map_inv"],
            sensor=self.ARCH["dataset"]["sensor"],
            max_points=self.ARCH["dataset"]["max_points"],
            batch_size=self.ARCH["train"]["batch_size"],
            workers=self.ARCH["train"]["workers"],
            gt=True,
            shuffle_train=True)

        # weights for loss (and bias)
        # weights for loss (and bias)
        epsilon_w = self.ARCH["train"]["epsilon_w"]
        content = torch.zeros(self.parser.get_n_classes(), dtype=torch.float)
        for cl, freq in DATA["content"].items():
            x_cl = self.parser.to_xentropy(
                cl)  # map actual class to xentropy class
            content[x_cl] += freq
        self.loss_w = 1 / (content + epsilon_w)  # get weights
        for x_cl, w in enumerate(
                self.loss_w):  # ignore the ones necessary to ignore
            if DATA["learning_ignore"][x_cl]:
                # don't weigh
                self.loss_w[x_cl] = 0
        print("Loss weights from content: ", self.loss_w.data)

        # concatenate the encoder and the head
        with torch.no_grad():
            self.model = Segmentator(self.ARCH, self.parser.get_n_classes(),
                                     self.path)
            print(self.model)

        # GPU?
        self.gpu = False
        self.multi_gpu = False
        self.n_gpus = 0
        self.model_single = self.model
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        print("Training in device: ", self.device)
        if torch.cuda.is_available() and torch.cuda.device_count() > 0:
            cudnn.benchmark = True
            cudnn.fastest = True
            self.gpu = True
            self.n_gpus = 1
            self.model.cuda()
        if torch.cuda.is_available() and torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            self.model = nn.DataParallel(self.model)  # spread in gpus
            self.model = convert_model(self.model).cuda()  # sync batchnorm
            self.model_single = self.model.module  # single model to get weight names
            self.multi_gpu = True
            self.n_gpus = torch.cuda.device_count()

        # loss
        if "loss" in self.ARCH["train"].keys(
        ) and self.ARCH["train"]["loss"] == "xentropy":
            self.criterion = nn.NLLLoss(weight=self.loss_w).to(self.device)
        else:
            raise Exception('Loss not defined in config file')
        # loss as dataparallel too (more images in batch)
        if self.n_gpus > 1:
            self.criterion = nn.DataParallel(
                self.criterion).cuda()  # spread in gpus

        # optimizer
        if self.ARCH["post"]["CRF"]["use"] and self.ARCH["post"]["CRF"][
                "train"]:
            self.lr_group_names = ["post_lr"]
            self.train_dicts = [{'params': self.model_single.CRF.parameters()}]
        else:
            self.lr_group_names = []
            self.train_dicts = []
        if self.ARCH["backbone"]["train"]:
            self.lr_group_names.append("backbone_lr")
            self.train_dicts.append(
                {'params': self.model_single.backbone.parameters()})
        if self.ARCH["decoder"]["train"]:
            self.lr_group_names.append("decoder_lr")
            self.train_dicts.append(
                {'params': self.model_single.decoder.parameters()})
        if self.ARCH["head"]["train"]:
            self.lr_group_names.append("head_lr")
            self.train_dicts.append(
                {'params': self.model_single.head.parameters()})

        # Use SGD optimizer to train
        self.optimizer = optim.SGD(self.train_dicts,
                                   lr=self.ARCH["train"]["lr"],
                                   momentum=self.ARCH["train"]["momentum"],
                                   weight_decay=self.ARCH["train"]["w_decay"])

        # Use warmup learning rate
        # post decay and step sizes come in epochs and we want it in steps
        steps_per_epoch = self.parser.get_train_size()
        up_steps = int(self.ARCH["train"]["wup_epochs"] * steps_per_epoch)
        final_decay = self.ARCH["train"]["lr_decay"]**(1 / steps_per_epoch)
        self.scheduler = warmupLR(optimizer=self.optimizer,
                                  lr=self.ARCH["train"]["lr"],
                                  warmup_steps=up_steps,
                                  momentum=self.ARCH["train"]["momentum"],
                                  decay=final_decay)
コード例 #2
0
    def __init__(self,
                 config,
                 logdir,
                 path=None,
                 only_eval=False,
                 block_bn=False):
        # parameters
        self.CFG = config
        self.log = logdir
        self.path = path
        self.only_eval = only_eval
        self.block_bn = block_bn

        # put logger where it belongs
        self.tb_logger = Logger(self.log + "/tb")
        self.info = {
            "train_update": 0,
            "train_loss": 0,
            "train_acc": 0,
            "train_iou": 0,
            "valid_loss": 0,
            "valid_acc": 0,
            "valid_iou": 0,
            "valid_loss_avg_models": 0,
            "valid_acc_avg_models": 0,
            "valid_iou_avg_models": 0,
            "feat_lr": 0,
            "decoder_lr": 0,
            "head_lr": 0
        }

        # get the data
        parserModule = imp.load_source(
            "parserModule",
            booger.TRAIN_PATH + '/tasks/segmentation/dataset/' +
            self.CFG["dataset"]["name"] + '/parser.py')
        self.parser = parserModule.Parser(
            img_prop=self.CFG["dataset"]["img_prop"],
            img_means=self.CFG["dataset"]["img_means"],
            img_stds=self.CFG["dataset"]["img_stds"],
            classes=self.CFG["dataset"]["labels"],
            train=True,
            location=self.CFG["dataset"]["location"],
            batch_size=self.CFG["train"]["batch_size"],
            crop_prop=self.CFG["train"]["crop_prop"],
            workers=self.CFG["dataset"]["workers"])

        self.data_h, self.data_w, self.data_d = self.parser.get_img_size()

        # weights for loss (and bias)
        self.loss_w = torch.zeros(self.parser.get_n_classes(),
                                  dtype=torch.float)
        for idx, w in self.CFG["dataset"]["labels_w"].items():
            self.loss_w[idx] = torch.tensor(w)

        # get architecture and build backbone (with pretrained weights)
        self.bbone_cfg = BackboneConfig(
            name=self.CFG["backbone"]["name"],
            os=self.CFG["backbone"]["OS"],
            h=self.data_h,
            w=self.data_w,
            d=self.data_d,
            dropout=self.CFG["backbone"]["dropout"],
            bn_d=self.CFG["backbone"]["bn_d"],
            extra=self.CFG["backbone"]["extra"])

        self.decoder_cfg = DecoderConfig(
            name=self.CFG["decoder"]["name"],
            dropout=self.CFG["decoder"]["dropout"],
            bn_d=self.CFG["decoder"]["bn_d"],
            extra=self.CFG["decoder"]["extra"])

        self.head_cfg = HeadConfig(n_class=self.parser.get_n_classes(),
                                   dropout=self.CFG["head"]["dropout"],
                                   weights=self.loss_w)

        # concatenate the encoder and the head
        with torch.no_grad():
            self.model = Segmentator(self.bbone_cfg, self.decoder_cfg,
                                     self.head_cfg, self.path)

        # train backbone?
        if not self.CFG["backbone"]["train"]:
            self.CFG["backbone"]["train"] = False
            for w in self.model.backbone.parameters():
                w.requires_grad = False

        # train decoder?
        if not self.CFG["decoder"]["train"]:
            self.CFG["decoder"]["train"] = False
            for w in self.model.decoder.parameters():
                w.requires_grad = False

        # print number of parameters and the ones requiring gradients
        # print number of parameters and the ones requiring gradients
        weights_total = sum(p.numel() for p in self.model.parameters())
        weights_grad = sum(p.numel() for p in self.model.parameters()
                           if p.requires_grad)
        print("Total number of parameters: ", weights_total)
        print("Total number of parameters requires_grad: ", weights_grad)
        # breakdown by layer
        weights_enc = sum(p.numel() for p in self.model.backbone.parameters())
        weights_dec = sum(p.numel() for p in self.model.decoder.parameters())
        weights_head = sum(p.numel() for p in self.model.head.parameters())
        print("Param encoder ", weights_enc)
        print("Param decoder ", weights_dec)
        print("Param head ", weights_head)

        # GPU?
        self.gpu = False
        self.multi_gpu = False
        self.n_gpus = 0
        self.model_single = self.model
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        print("Training in device: ", self.device)
        if torch.cuda.is_available() and torch.cuda.device_count() > 0:
            self.gpu = True
            # cudnn.benchmark = True
            self.model.cuda()
        if torch.cuda.is_available() and torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            self.model = nn.DataParallel(self.model)  # spread in gpus
            self.model = convert_model(self.model).cuda()  # sync batchnorm
            self.model_single = self.model.module  # single model to get weight names
            self.multi_gpu = True
            self.n_gpus = torch.cuda.device_count()

        # loss
        if "loss" in self.CFG["train"].keys(
        ) and self.CFG["train"]["loss"] == "xentropy":
            self.criterion = nn.CrossEntropyLoss(weight=self.loss_w).to(
                self.device)
        elif "loss" in self.CFG["train"].keys(
        ) and self.CFG["train"]["loss"] == "iou":
            self.criterion = mIoULoss(weight=self.loss_w).to(self.device)
        else:
            raise Exception('Loss not defined in config file')
        # loss as dataparallel too (more images in batch)
        if self.n_gpus > 1:
            self.criterion = nn.DataParallel(
                self.criterion).cuda()  # spread in gpus

        # optimizer
        train_dicts = [{'params': self.model_single.head.parameters()}]
        if self.CFG["backbone"]["train"]:
            train_dicts.append(
                {'params': self.model_single.backbone.parameters()})
        if self.CFG["decoder"]["train"]:
            train_dicts.append(
                {'params': self.model_single.decoder.parameters()})

        # Use SGD optimizer to train
        self.optimizer = optim.SGD(train_dicts,
                                   lr=self.CFG["train"]["max_lr"],
                                   momentum=self.CFG["train"]["min_momentum"],
                                   weight_decay=self.CFG["train"]["w_decay"])

        # Use one shot learning rate
        # post decay and step sizes come in epochs and we want it in steps
        steps_per_epoch = self.parser.get_train_size()
        up_steps = int(self.CFG["train"]["up_epochs"] * steps_per_epoch)
        down_steps = int(self.CFG["train"]["down_epochs"] * steps_per_epoch)
        final_decay = self.CFG["train"]["final_decay"]**(1 / steps_per_epoch)

        self.scheduler = OneShot_LR(
            self.optimizer,
            base_lr=self.CFG["train"]["min_lr"],
            max_lr=self.CFG["train"]["max_lr"],
            step_size_up=up_steps,
            step_size_down=down_steps,
            cycle_momentum=True,
            base_momentum=self.CFG["train"]["min_momentum"],
            max_momentum=self.CFG["train"]["max_momentum"],
            post_decay=final_decay)

        # buffer to save the best N models
        self.best_n_models = self.CFG["train"]["avg_N"]
        self.best_backbones = collections.deque(maxlen=self.best_n_models)
        self.best_decoders = collections.deque(maxlen=self.best_n_models)
        self.best_heads = collections.deque(maxlen=self.best_n_models)
コード例 #3
0
ファイル: trainer.py プロジェクト: weiweizhang6338/SalsaNext
    def __init__(self,
                 ARCH,
                 DATA,
                 datadir,
                 logdir,
                 path=None,
                 model_mode='salsanext'):
        # parameters
        self.ARCH = ARCH
        self.DATA = DATA
        self.datadir = datadir
        self.log = logdir
        self.path = path
        self.model_mode = model_mode

        self.batch_time_t = AverageMeter()
        self.data_time_t = AverageMeter()
        self.batch_time_e = AverageMeter()
        self.epoch = 0

        # put logger where it belongs

        self.info = {
            "train_update": 0,
            "train_loss": 0,
            "train_acc": 0,
            "train_iou": 0,
            "valid_loss": 0,
            "valid_acc": 0,
            "valid_iou": 0,
            "best_train_iou": 0,
            "best_val_iou": 0
        }

        # get the data
        parserModule = imp.load_source(
            "parserModule", booger.TRAIN_PATH + '/tasks/semantic/dataset/' +
            self.DATA["name"] + '/parser.py')
        self.parser = parserModule.Parser(
            root=self.datadir,
            train_sequences=self.DATA["split"]["train"],
            valid_sequences=self.DATA["split"]["valid"],
            test_sequences=None,
            labels=self.DATA["labels"],
            color_map=self.DATA["color_map"],
            learning_map=self.DATA["learning_map"],
            learning_map_inv=self.DATA["learning_map_inv"],
            sensor=self.ARCH["dataset"]["sensor"],
            max_points=self.ARCH["dataset"]["max_points"],
            batch_size=self.ARCH["train"]["batch_size"],
            workers=self.ARCH["train"]["workers"],
            gt=True,
            shuffle_train=True)

        # weights for loss (and bias)
        # weights for loss (and bias)
        epsilon_w = self.ARCH["train"]["epsilon_w"]
        content = torch.zeros(self.parser.get_n_classes(), dtype=torch.float)
        for cl, freq in DATA["content"].items():
            x_cl = self.parser.to_xentropy(
                cl)  # map actual class to xentropy class
            content[x_cl] += freq
        self.loss_w = 1 / (content + epsilon_w)  # get weights
        for x_cl, w in enumerate(
                self.loss_w):  # ignore the ones necessary to ignore
            if DATA["learning_ignore"][x_cl]:
                # don't weigh
                self.loss_w[x_cl] = 0
        print("Loss weights from content: ", self.loss_w.data)
        # concatenate the encoder and the head
        with torch.no_grad():
            self.model = SalsaNet(self.ARCH, self.parser.get_n_classes(),
                                  self.path)

        self.tb_logger = Logger(self.log + "/tb", self.model)

        # GPU?
        self.gpu = False
        self.multi_gpu = False
        self.n_gpus = 0
        self.model_single = self.model
        pytorch_total_params = sum(p.numel() for p in self.model.parameters()
                                   if p.requires_grad)
        for name, param in self.model.named_parameters():
            if param.requires_grad:
                print("{}: {:,}".format(name, param.numel()))
        print(
            "Total of Trainable Parameters: {:,}".format(pytorch_total_params))
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        print("Training in device: ", self.device)
        if torch.cuda.is_available() and torch.cuda.device_count() > 0:
            cudnn.benchmark = True
            cudnn.fastest = True
            self.gpu = True
            self.n_gpus = 1
            self.model.cuda()
        if torch.cuda.is_available() and torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            self.model = nn.DataParallel(self.model)  # spread in gpus
            self.model = convert_model(self.model).cuda()  # sync batchnorm
            self.model_single = self.model.module  # single model to get weight names
            self.multi_gpu = True
            self.n_gpus = torch.cuda.device_count()

        self.criterion = nn.NLLLoss(weight=self.loss_w).to(self.device)
        self.ls = Lovasz_softmax(ignore=0).to(self.device)
        # loss as dataparallel too (more images in batch)
        if self.n_gpus > 1:
            self.criterion = nn.DataParallel(
                self.criterion).cuda()  # spread in gpus
            self.ls = nn.DataParallel(self.ls).cuda()
        self.optimizer = optim.SGD([{
            'params': self.model.parameters()
        }],
                                   lr=self.ARCH["train"]["lr"],
                                   momentum=self.ARCH["train"]["momentum"],
                                   weight_decay=self.ARCH["train"]["w_decay"])

        # Use warmup learning rate
        # post decay and step sizes come in epochs and we want it in steps
        steps_per_epoch = self.parser.get_train_size()
        up_steps = int(self.ARCH["train"]["wup_epochs"] * steps_per_epoch)
        final_decay = self.ARCH["train"]["lr_decay"]**(1 / steps_per_epoch)
        self.scheduler = warmupLR(optimizer=self.optimizer,
                                  lr=self.ARCH["train"]["lr"],
                                  warmup_steps=up_steps,
                                  momentum=self.ARCH["train"]["momentum"],
                                  decay=final_decay)

        if self.path is not None:
            torch.nn.Module.dump_patches = True
            w_dict = torch.load(path + "/SalsaNet",
                                map_location=lambda storage, loc: storage)
            self.model.load_state_dict(w_dict['state_dict'], strict=True)
            self.optimizer.load_state_dict(w_dict['optimizer'])
            self.epoch = w_dict['epoch'] + 1
            self.scheduler.load_state_dict(w_dict['scheduler'])
            print("dict epoch:", w_dict['epoch'])
            self.info = w_dict['info']
            print("info", w_dict['info'])
コード例 #4
0
    def __init__(self, ARCH, DATA, datadir, logdir, path=None):
        # parameters
        self.ARCH = ARCH
        self.DATA = DATA
        self.datadir = datadir
        self.log = logdir
        self.path = path

        # put logger where it belongs
        self.tb_logger = Logger(self.log + "/tb")
        self.info = {
            "train_update": 0,
            "train_loss": 0,
            "train_acc": 0,
            "train_iou": 0,
            "valid_loss": 0,
            "valid_acc": 0,
            "valid_iou": 0,
            "backbone_lr": 0,
            "decoder_lr": 0,
            "head_lr": 0,
            "post_lr": 0
        }

        # get the data
        parserModule = imp.load_source(
            "parserModule",
            booger.TRAIN_PATH + '/tasks/mask_regression/dataset/' +
            self.DATA["name"] + '/parser.py')
        self.parser = parserModule.Parser(
            root=self.datadir,
            train_sequences=self.DATA["split"]["train"],
            valid_sequences=self.DATA["split"]["valid"],
            test_sequences=None,
            labels=self.DATA["labels"],
            color_map=self.DATA["color_map"],
            learning_map=self.DATA["learning_map"],
            learning_map_inv=self.DATA["learning_map_inv"],
            sensor=self.ARCH["dataset"]["sensor"],
            max_points=self.ARCH["dataset"]["max_points"],
            batch_size=self.ARCH["train"]["batch_size"],
            workers=self.ARCH["train"]["workers"],
            gt=True,
            shuffle_train=True)

        # concatenate the encoder and the head
        with torch.no_grad():
            self.model = MaskRegressor(self.ARCH, self.path)

        # GPU?
        self.gpu = False
        self.multi_gpu = False
        self.n_gpus = 0
        self.model_single = self.model
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        print("Training in device: ", self.device)
        if torch.cuda.is_available() and torch.cuda.device_count() > 0:
            cudnn.benchmark = True
            cudnn.fastest = True
            self.gpu = True
            self.n_gpus = 1
            self.model.cuda()
        if torch.cuda.is_available() and torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            self.model = nn.DataParallel(self.model)  # spread in gpus
            self.model = convert_model(self.model).cuda()  # sync batchnorm
            self.model_single = self.model.module  # single model to get weight names
            self.multi_gpu = True
            self.n_gpus = torch.cuda.device_count()

        # loss
        if "loss" in self.ARCH["train"].keys(
        ) and self.ARCH["train"]["loss"] == "xentropy":
            w = torch.zeros(1, dtype=torch.float)
            w[0] = 14.0
            self.criterion = nn.BCEWithLogitsLoss(pos_weight=w).to(self.device)
        else:
            raise Exception('Loss not defined in config file')
        # loss as dataparallel too (more images in batch)
        if self.n_gpus > 1:
            self.criterion = nn.DataParallel(
                self.criterion).cuda()  # spread in gpus

        # optimizer
        self.lr_group_names = []
        self.train_dicts = []
        if self.ARCH["backbone"]["train"]:
            self.lr_group_names.append("backbone_lr")
            self.train_dicts.append(
                {'params': self.model_single.backbone.parameters()})
        if self.ARCH["decoder"]["train"]:
            self.lr_group_names.append("decoder_lr")
            self.train_dicts.append(
                {'params': self.model_single.decoder.parameters()})
        if self.ARCH["head"]["train"]:
            self.lr_group_names.append("head_lr")
            self.train_dicts.append(
                {'params': self.model_single.head.parameters()})

        # Use SGD optimizer to train
        self.optimizer = optim.SGD(self.train_dicts,
                                   lr=self.ARCH["train"]["lr"],
                                   momentum=self.ARCH["train"]["momentum"],
                                   weight_decay=self.ARCH["train"]["w_decay"])

        # Use warmup learning rate
        # post decay and step sizes come in epochs and we want it in steps
        steps_per_epoch = self.parser.get_train_size()
        up_steps = int(self.ARCH["train"]["wup_epochs"] * steps_per_epoch)
        final_decay = self.ARCH["train"]["lr_decay"]**(1 / steps_per_epoch)
        self.scheduler = warmupLR(optimizer=self.optimizer,
                                  lr=self.ARCH["train"]["lr"],
                                  warmup_steps=up_steps,
                                  momentum=self.ARCH["train"]["momentum"],
                                  decay=final_decay)