Exemple #1
0
def create_model(args):
    """
    Create the model based on configuration file
    :param args: the configuration parameter class defined in config.py
    :return:
    """
    args['encoder_name'] = misc_utils.stem_string(args['encoder_name'])
    args['decoder_name'] = misc_utils.stem_string(args['decoder_name'])
    if args['optimizer']['aux_loss']:
        aux_loss = True
    else:
        aux_loss = False

    # TODO this is for compatible issue only, we might want to get rid of this later
    if 'imagenet' not in args:
        args['imagenet'] = 'True'

    if args['decoder_name'] == 'unet':
        if args['encoder_name'] == 'base':
            model = unet.UNet(sfn=args['sfn'],
                              n_class=args['dataset']['class_num'],
                              encoder_name=args['encoder_name'],
                              aux_loss=aux_loss,
                              use_emau=args['use_emau'],
                              use_ocr=args['use_ocr'])
        else:
            model = unet.UNet(n_class=args['dataset']['class_num'],
                              encoder_name=args['encoder_name'],
                              pretrained=eval(args['imagenet']),
                              aux_loss=aux_loss,
                              use_emau=args['use_emau'],
                              use_ocr=args['use_ocr'])
    elif args['decoder_name'] in ['psp', 'pspnet']:
        model = pspnet.PSPNet(n_class=args['dataset']['class_num'],
                              encoder_name=args['encoder_name'],
                              pretrained=eval(args['imagenet']),
                              aux_loss=aux_loss,
                              use_emau=args['use_emau'],
                              use_ocr=args['use_ocr'])
    elif args['decoder_name'] == 'dlinknet':
        model = dlinknet.DLinkNet(n_class=args['dataset']['class_num'],
                                  encoder_name=args['encoder_name'],
                                  pretrained=eval(args['imagenet']),
                                  aux_loss=aux_loss,
                                  use_emau=args['use_emau'],
                                  use_ocr=args['use_ocr'])
    elif args['decoder_name'] == 'deeplabv3':
        model = deeplabv3.DeepLabV3(n_class=args['dataset']['class_num'],
                                    encoder_name=args['encoder_name'],
                                    pretrained=eval(args['imagenet']),
                                    aux_loss=aux_loss,
                                    use_emau=args['use_emau'],
                                    use_ocr=args['use_ocr'])
    else:
        raise NotImplementedError(
            'Decoder structure {} is not supported'.format(
                args['decoder_name']))
    return model
Exemple #2
0
def create_loss(args, **kwargs):
    """
    Create loss based on configuration
    :param args: the configuration parameter class defined in config.py
    :return:
    """
    criterions = []
    for c_name in misc_utils.stem_string(
            args['trainer']['criterion_name']).split(','):
        if c_name == 'xent':
            criterions.append(
                metric_utils.CrossEntropyLoss(
                    eval(args['trainer']['class_weight'])))
        elif c_name == 'iou':
            # this metric is non-differentiable
            criterions.append(metric_utils.IoU())
        elif c_name == 'softiou':
            criterions.append(metric_utils.SoftIoULoss(kwargs['device']))
        elif c_name == 'focal':
            criterions.append(
                metric_utils.FocalLoss(kwargs['device'],
                                       gamma=args['trainer']['gamma'],
                                       alpha=args['trainer']['alpha']))
        elif c_name == 'lovasz':
            criterions.append(metric_utils.LovaszSoftmax())
        else:
            raise NotImplementedError(
                'Criterion type {} is not supported'.format(
                    args['trainer']['criterion_name']))
    return criterions
Exemple #3
0
def create_preproc_layer(preproc_name):
    preproc_name = misc_utils.stem_string(preproc_name)
    if preproc_name == 'gamma':
        preproc_layer = GammaAdjustTransform()
    elif preproc_name == 'affine':
        preproc_layer = AffineTransform()
    elif preproc_name == 'colormap':
        preproc_layer = ColorMap()
    else:
        raise NotImplementedError(
            'Preproc layer {} not supported'.format(preproc_name))
    return preproc_layer
Exemple #4
0
def create_optimizer(optm_name, train_params, lr):
    """
    Create optimizer based on configuration
    :param optm_name: the optimizer name defined in config.py
    :param train_params: learning rate arrangement for the training parameters
    :param lr: learning rate
    :return: corresponding torch optim class
    """
    o_name = misc_utils.stem_string(optm_name)
    if o_name == 'sgd':
        optm = optim.SGD(train_params, lr=lr, momentum=0.9, weight_decay=5e-4)
    elif o_name == 'adam':
        optm = optim.Adam(train_params, lr=lr)
    else:
        raise NotImplementedError(
            'Optimizer name {} is not supported'.format(optm_name))
    return optm
Exemple #5
0
 def __init__(self,
              n_class,
              sfn=32,
              encoder_name='base',
              pretrained=True,
              aux_loss=False,
              use_emau=False,
              use_ocr=False):
     """
     Initialize the Unet model
     :param sfn: the start filter number, following blocks have n*sfn number of filters
     :param n_class: the number of class
     :param encoder_name: name of the encoder, could be 'base', 'vgg16'
     :param pretrained: if True, load the weights from pretrained model
     :param aux_loss: if True, will create a classification branch for extracted features
     :param use_emau: if True or int, an EMAU will be appended at the end of the encoder
     :param use_ocr: if True, the OCR module will be appended at the end of the encoder
     """
     super(UNet, self).__init__()
     self.n_class = n_class
     self.encoder_name = misc_utils.stem_string(encoder_name)
     self.aux_loss = aux_loss
     self.use_emau = use_emau
     self.use_ocr = use_ocr
     if self.encoder_name == 'base':
         self.sfn = sfn
         self.encoder = UnetBaseEncoder(self.sfn)
         self.margins = [4, 16, 40, 88]
         filter_nums = [self.sfn * (2**a) for a in range(4, -1, -1)]
         self.decode_in_chans = filter_nums[:-1]
         self.decode_out_chans = filter_nums[1:]
         self.lbl_margin = 92
         conv_chan = None
         pad = 0
         up_sample = 0
     else:
         self.encoder = encoders.models(self.encoder_name, pretrained,
                                        (2, 2, 2, 2, 2), True)
         self.margins = [0, 0, 0, 0]
         filter_nums = self.encoder.chans
         self.decode_in_chans = filter_nums[:-1]
         self.decode_out_chans = filter_nums[1:]
         self.lbl_margin = 0
         conv_chan = [
             d_in // 2 + d_out
             for (d_in,
                  d_out) in zip(self.decode_in_chans, self.decode_out_chans)
         ]
         pad = 1
         up_sample = 0 if 'vgg' in self.encoder_name else 2
     if self.aux_loss:
         self.cls = nn.Sequential(nn.Linear(self.decode_in_chans[0], 256),
                                  nn.ReLU(inplace=True),
                                  nn.Linear(256, self.n_class))
     else:
         self.cls = None
     if self.use_emau:
         if isinstance(self.use_emau, int):
             c = self.use_emau
         else:
             c = 64
         self.encoder.emau = emau.EMAU(self.decode_in_chans[0], c)
     if self.use_ocr:
         self.encoder.ocr = ocr.OCRModule(self.n_class,
                                          *self.encoder.chans[:2][::-1],
                                          self.encoder.chans[0])
     self.decoder = UnetDecoder(self.decode_in_chans, self.decode_out_chans,
                                self.margins, self.n_class, conv_chan, pad,
                                up_sample)
Exemple #6
0
 def __init__(self,
              ds_name,
              data_dir,
              tsfm,
              device,
              load_func=None,
              infer=False,
              ensembler=None,
              **kwargs):
     ds_name = misc_utils.stem_string(ds_name)
     self.tsfm = tsfm
     self.device = device
     if ensembler is None:
         self.ensembler = BaseEnsemble()
     else:
         self.ensembler = ensembler
     if ds_name == 'inria':
         from data.inria import preprocess
         self.rgb_files, self.lbl_files = preprocess.get_images(
             data_dir, **kwargs)
         assert len(self.rgb_files) == len(self.lbl_files)
         self.truth_val = 255
         self.decode_func = None
         self.encode_func = None
         self.class_names = [
             'building',
         ]
     elif ds_name == 'deepglobe':
         from data.deepglobe import preprocess
         self.rgb_files, self.lbl_files = preprocess.get_images(data_dir)
         assert len(self.rgb_files) == len(self.lbl_files)
         self.truth_val = 1
         self.decode_func = None
         self.encode_func = lambda x: x * 255
         self.class_names = [
             'building',
         ]
     elif ds_name == 'deepgloberoad':
         from data.deepgloberoad import preprocess
         self.rgb_files, self.lbl_files = preprocess.get_images(
             data_dir, **kwargs)
         assert len(self.rgb_files) == len(self.lbl_files)
         self.truth_val = 255
         self.decode_func = preprocess.decode_map
         self.encode_func = None
         self.class_names = [
             'road',
         ]
     elif ds_name == 'deepglobeland':
         from data.deepglobeland import preprocess
         if not infer:
             self.rgb_files, self.lbl_files = preprocess.get_images(
                 data_dir, **kwargs)
         else:
             self.rgb_files, self.lbl_files = preprocess.get_test_images(
                 data_dir, **kwargs)
         assert len(self.rgb_files) == len(self.lbl_files)
         self.truth_val = 1
         self.decode_func = preprocess.decode_map
         self.encode_func = preprocess.encode_map
         self.class_names = preprocess.CLASS_NAMES[:6]
     elif ds_name == 'mnih':
         from data.mnih import preprocess
         self.rgb_files, self.lbl_files = preprocess.get_images(
             data_dir, **kwargs)
         assert len(self.rgb_files) == len(self.lbl_files)
         self.truth_val = 255
         self.decode_func = None
         self.encode_func = None
         self.class_names = [
             'road',
         ]
     elif ds_name == 'spca':
         from data.spca import preprocess
         self.rgb_files, self.lbl_files = preprocess.get_images(
             data_dir, **kwargs)
         assert len(self.rgb_files) == len(self.lbl_files)
         self.truth_val = 1
         self.decode_func = None
         self.encode_func = None
         self.class_names = [
             'panel',
         ]
     elif load_func:
         self.truth_val = kwargs.pop('truth_val', 1)
         self.decode_func = kwargs.pop('decode_func', None)
         self.encode_func = kwargs.pop('encode_func', None)
         self.class_names = kwargs.pop('class_names', [
             'building',
         ])
         self.rgb_files, self.lbl_files = load_func(data_dir, **kwargs)
         assert len(self.rgb_files) == len(self.lbl_files)
     else:
         raise NotImplementedError('Dataset {} is not supported')