def __init__(self, config, num_classes, train_triplet=False):
        """

        :param config: 配置参数
        :param num_classes: 训练集的类别数;类型为int
        :param train_triplet: 是否只训练triplet损失;类型为bool
        """
        self.num_classes = num_classes

        self.model_name = config.model_name
        self.last_stride = config.last_stride
        self.num_gpus = torch.cuda.device_count()
        print('Using {} GPUS'.format(self.num_gpus))
        print('NUM_CLASS: {}'.format(self.num_classes))
        print('USE LOSS: {}'.format(config.selected_loss))

        # 加载模型,只要有GPU,则使用DataParallel函数,当GPU有多个GPU时,调用sync_bn函数
        self.model = get_model(self.model_name, self.num_classes, self.last_stride)
        if torch.cuda.is_available():
            self.model = torch.nn.DataParallel(self.model)
            if self.num_gpus > 1:
                self.model = convert_model(self.model)
            self.model = self.model.cuda()

        # 加载超参数
        self.epoch = config.epoch

        # 实例化实现各种子函数的 solver 类
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.solver = Solver(self.model, self.device)

        # 加载损失函数
        self.criterion = Loss(self.model_name, config.selected_loss, config.margin, self.num_classes)

        # 加载优化函数
        self.optim = get_optimizer(config, self.model)

        # 加载学习率衰减策略
        self.scheduler = get_scheduler(config, self.optim)

        # 创建保存权重的路径
        self.model_path = os.path.join(config.save_path, config.model_name)
        if not os.path.exists(self.model_path):
            os.makedirs(self.model_path)

        # 如果只训练Triplet损失
        if train_triplet:
            self.solver.load_checkpoint(os.path.join(self.model_path, '{}.pth'.format(self.model_name)))

        # 保存json文件和初始化tensorboard
        TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S}".format(datetime.datetime.now())
        self.writer = SummaryWriter(log_dir=os.path.join(self.model_path, TIMESTAMP))
        with codecs.open(self.model_path + '/' + TIMESTAMP + '.json', 'w', "utf-8") as json_file:
            json.dump({k: v for k, v in config._get_kwargs()}, json_file, ensure_ascii=False)

        # 设置随机种子,注意交叉验证部分划分训练集和验证集的时候,要保持种子固定
        self.seed = int(time.time())
        seed_torch(self.seed)
        with open(self.model_path + '/' + TIMESTAMP + '.pkl', 'wb') as f:
            pickle.dump({'seed': self.seed}, f, -1)
def prepare(config, train_labels_number):
    """

    Args:
        config: 配置参数
        train_labels_number: list, 某一折的[number_class0, number__class1, ...]

    Returns:
        optimizer: 优化器
        model: 模型
        criterion: 损失函数
    """
    # 加载模型
    prepare_model = PrepareModel()
    model = prepare_model.create_model(
        model_type=config.model_type,
        classes_num=config.num_classes,
        drop_rate=config.drop_rate,
        pretrained=True,
        bn_to_gn=config.bn_to_gn
    )
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model)
        model = model.cuda()

    # 加载优化器
    optimizer = prepare_model.create_optimizer(config.model_type, model, config)

    # 加载损失函数
    criterion = Loss(config.model_type, config.loss_name, config.num_classes, train_labels_number, config.beta_CB,
                     config.gamma)
    return optimizer, model, criterion
Example #3
0
    def __init__(self, config, fold):
        """
        Args:
            config: 配置参数
            fold: 当前为第几折
        """
        self.config = config
        self.fold = fold
        self.epoch = config.epoch
        self.num_classes = config.num_classes
        self.lr_scheduler = config.lr_scheduler
        print('USE LOSS: {}'.format(config.loss_name))

        # 加载模型
        prepare_model = PrepareModel()
        self.model = prepare_model.create_local_attention_model(
            model_type=config.model_type,
            classes_num=self.num_classes,
            last_stride=2,
            droprate=0)

        # 得到最新产生的权重文件
        weight_path = os.path.join('checkpoints', config.model_type)
        lists = os.listdir(weight_path)  # 获得文件夹内所有文件
        lists.sort(
            key=lambda fn: os.path.getmtime(weight_path + '/' + fn))  # 排序
        weight_path = os.path.join(weight_path, lists[-1], 'model_best.pth')

        # 加载之前训练的权重
        pretrained_dict = torch.load(weight_path)['state_dict']
        model_dict = self.model.state_dict()
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items() if k in model_dict
        }  # filter out unnecessary keys
        model_dict.update(pretrained_dict)
        self.model.load_state_dict(model_dict)
        print('Successfully Loaded from %s' % weight_path)

        if torch.cuda.is_available():
            self.model = torch.nn.DataParallel(self.model)
            self.model = self.model.cuda()

        # 加载优化器
        self.optimizer = prepare_model.create_optimizer(
            config.model_type, self.model, config)

        # 加载衰减策略
        self.exp_lr_scheduler = prepare_model.create_lr_scheduler(
            self.lr_scheduler,
            self.optimizer,
            step_size=config.lr_step_size,
            restart_step=config.restart_step,
            multi_step=config.multi_step)

        # 加载损失函数
        self.criterion = Loss(config.model_type, config.loss_name,
                              self.num_classes)

        # 实例化实现各种子函数的 solver 类
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.solver = Solver(self.model, self.device)

        # log初始化
        self.writer, self.time_stamp = self.init_log()
        self.model_path = os.path.join(self.config.save_path,
                                       self.config.model_type, self.time_stamp)

        # 初始化分类度量准则类
        with open("online-service/model/label_id_name.json",
                  'r',
                  encoding='utf-8') as json_file:
            self.class_names = list(json.load(json_file).values())
        self.classification_metric = ClassificationMetric(
            self.class_names, self.model_path)

        self.max_accuracy_valid = 0
Example #4
0
    def __init__(self, config, fold):
        """
        Args:
            config: 配置参数
            fold: 当前为第几折
        """
        self.config = config
        self.fold = fold
        self.epoch = config.epoch
        self.num_classes = config.num_classes
        self.lr_scheduler = config.lr_scheduler
        self.save_interval = 10
        self.cut_mix = config.cut_mix
        self.beta = config.beta
        self.cutmix_prob = config.cutmix_prob
        self.auto_aug = config.auto_aug

        # 多尺度
        self.image_size = config.image_size
        self.multi_scale = config.multi_scale
        self.val_multi_scale = config.val_multi_scale
        self.multi_scale_size = config.multi_scale_size
        self.multi_scale_interval = config.multi_scale_interval
        # 稀疏训练
        self.sparsity = config.sparsity
        self.sparsity_scale = config.sparsity_scale
        self.penalty_type = config.penalty_type
        self.selected_labels = config.selected_labels
        if self.auto_aug:
            print('@ Using AutoAugment.')
        if self.cut_mix:
            print('@ Using cut mix.')
        if self.multi_scale:
            print('@ Using multi scale training.')
        print('@ Using LOSS: {}'.format(config.loss_name))

        # 加载模型
        prepare_model = PrepareModel()
        self.model = prepare_model.create_model(model_type=config.model_type,
                                                classes_num=self.num_classes,
                                                drop_rate=config.drop_rate,
                                                pretrained=True,
                                                bn_to_gn=config.bn_to_gn)
        if config.weight_path:
            self.model = prepare_model.load_chekpoint(self.model,
                                                      config.weight_path)

        # 稀疏训练
        self.sparsity_train = None
        if config.sparsity:
            print('@ Using sparsity training.')
            self.sparsity_train = Sparsity(self.model,
                                           sparsity_scale=self.sparsity_scale,
                                           penalty_type=self.penalty_type)

        # l1正则化
        self.l1_regular = config.l1_regular
        self.l1_decay = config.l1_decay
        if self.l1_regular:
            print('@ Using l1_regular')
            self.l1_reg_loss = Regularization(self.model,
                                              weight_decay=self.l1_decay,
                                              p=1)

        if torch.cuda.is_available():
            self.model = torch.nn.DataParallel(self.model)
            self.model = self.model.cuda()

        # 加载优化器
        self.optimizer = prepare_model.create_optimizer(
            config.model_type, self.model, config)

        # 加载衰减策略
        self.exp_lr_scheduler = prepare_model.create_lr_scheduler(
            self.lr_scheduler,
            self.optimizer,
            step_size=config.lr_step_size,
            restart_step=config.restart_step,
            multi_step=config.multi_step,
            warmup=config.warmup,
            multiplier=config.multiplier,
            warmup_epoch=config.warmup_epoch,
            delay_epoch=config.delay_epoch)

        # 加载损失函数
        self.criterion = Loss(config.model_type, config.loss_name,
                              self.num_classes)

        # 实例化实现各种子函数的 solver 类
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.solver = Solver(self.model, self.device)

        # log初始化
        self.writer, self.time_stamp = self.init_log()
        self.model_path = os.path.join(self.config.save_path,
                                       self.config.model_type, self.time_stamp)

        # 初始化分类度量准则类
        with open("online-service/model/label_id_name.json",
                  'r',
                  encoding='utf-8') as json_file:
            self.class_names = list(json.load(json_file).values())
        self.classification_metric = ClassificationMetric(
            self.class_names, self.model_path)

        self.max_accuracy_valid = 0
Example #5
0
    def __init__(self, config, fold, train_labels_number):
        """
        Args:
            config: 配置参数
            fold: int, 当前为第几折
            train_labels_number: list, 某一折的[number_class0, number__class1, ...]
        """
        self.config = config
        self.fold = fold
        self.epoch = config.epoch
        self.num_classes = config.num_classes
        self.lr_scheduler = config.lr_scheduler
        self.save_interval = 100
        self.cut_mix = config.cut_mix
        self.beta = config.beta
        self.cutmix_prob = config.cutmix_prob

        self.image_size = config.image_size
        self.multi_scale = config.multi_scale
        self.multi_scale_size = config.multi_scale_size
        self.multi_scale_interval = config.multi_scale_interval
        if self.cut_mix:
            print('Using cut mix.')
        if self.multi_scale:
            print('Using multi scale training.')
        print('USE LOSS: {}'.format(config.loss_name))

        # 加载模型
        prepare_model = PrepareModel()
        self.model = prepare_model.create_model(model_type=config.model_type,
                                                classes_num=self.num_classes,
                                                drop_rate=config.drop_rate,
                                                pretrained=True,
                                                bn_to_gn=config.bn_to_gn)
        if torch.cuda.is_available():
            self.model = torch.nn.DataParallel(self.model)
            self.model = self.model.cuda()

        # 加载优化器
        self.optimizer = prepare_model.create_optimizer(
            config.model_type, self.model, config)

        # 加载衰减策略
        self.exp_lr_scheduler = prepare_model.create_lr_scheduler(
            self.lr_scheduler,
            self.optimizer,
            step_size=config.lr_step_size,
            restart_step=config.restart_step,
            multi_step=config.multi_step)

        # 加载损失函数
        self.criterion = Loss(config.model_type, config.loss_name,
                              self.num_classes, train_labels_number,
                              config.beta_CB, config.gamma)

        # 实例化实现各种子函数的 solver 类
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.solver = Solver(self.model, self.device)
        if config.restore:
            weight_path = os.path.join('checkpoints', config.model_type)
            if config.restore == 'last':
                lists = os.listdir(weight_path)  # 获得文件夹内所有文件
                lists.sort(key=lambda fn: os.path.getmtime(weight_path + '/' +
                                                           fn))  # 按照最近修改时间排序
                weight_path = os.path.join(weight_path, lists[-1],
                                           'model_best.pth')
            else:
                weight_path = os.path.join(weight_path, config.restore,
                                           'model_best.pth')
            self.solver.load_checkpoint(weight_path)

        # log初始化
        self.writer, self.time_stamp = self.init_log()
        self.model_path = os.path.join(self.config.train_url,
                                       self.config.model_type, self.time_stamp)

        # 初始化分类度量准则类
        with open("online-service/model/label_id_name.json",
                  'r',
                  encoding='utf-8') as json_file:
            self.class_names = list(json.load(json_file).values())
        self.classification_metric = ClassificationMetric(self.class_names,
                                                          self.model_path,
                                                          text_flag=0)

        self.max_accuracy_valid = 0
Example #6
0
    def __init__(self, config, fold, train_labels_number):
        """
        Args:
            config: 配置参数
            fold: int, 当前为第几折
            train_labels_number: list, 某一折的[number_class0, number__class1, ...]
        """
        self.config = config
        self.fold = fold
        self.epoch = config.epoch
        self.num_classes = config.num_classes
        self.lr_scheduler = config.lr_scheduler
        self.cut_mix = config.cut_mix
        self.beta = config.beta
        self.cutmix_prob = config.cutmix_prob
        self.train_url = config.train_url
        self.bucket_name = config.bucket_name

        self.image_size = config.image_size
        self.multi_scale = config.multi_scale
        self.multi_scale_size = config.multi_scale_size
        self.multi_scale_interval = config.multi_scale_interval
        if self.cut_mix:
            print('Using cut mix.')
        if self.multi_scale:
            print('Using multi scale training.')
        print('USE LOSS: {}'.format(config.loss_name))

        # 拷贝预训练权重
        print("=> using pre-trained model '{}'".format(config.model_type))
        if not mox.file.exists(
                '/home/work/.cache/torch/checkpoints/se_resnext101_32x4d-3b2fe3d8.pth'
        ):
            mox.file.copy(
                os.path.join(self.bucket_name,
                             'model_zoo/se_resnext101_32x4d-3b2fe3d8.pth'),
                '/home/work/.cache/torch/checkpoints/se_resnext101_32x4d-3b2fe3d8.pth'
            )
            print(
                'copy pre-trained model from OBS to: %s success' %
                (os.path.abspath(
                    '/home/work/.cache/torch/checkpoints/se_resnext101_32x4d-3b2fe3d8.pth'
                )))
        else:
            print('use exist pre-trained model at: %s' % (os.path.abspath(
                '/home/work/.cache/torch/checkpoints/se_resnext101_32x4d-3b2fe3d8.pth'
            )))

        # 拷贝预训练权重
        print("=> using pre-trained model '{}'".format(config.model_type))
        if not mox.file.exists(
                '/home/work/.cache/torch/checkpoints/efficientnet-b5-b6417697.pth'
        ):
            mox.file.copy(
                os.path.join(self.bucket_name,
                             'model_zoo/efficientnet-b5-b6417697.pth'),
                '/home/work/.cache/torch/checkpoints/efficientnet-b5-b6417697.pth'
            )
            print(
                'copy pre-trained model from OBS to: %s success' %
                (os.path.abspath(
                    '/home/work/.cache/torch/checkpoints/efficientnet-b5-b6417697.pth'
                )))
        else:
            print('use exist pre-trained model at: %s' % (os.path.abspath(
                '/home/work/.cache/torch/checkpoints/efficientnet-b5-b6417697.pth'
            )))

        # 加载模型
        prepare_model = PrepareModel()
        self.model = prepare_model.create_model(model_type=config.model_type,
                                                classes_num=self.num_classes,
                                                drop_rate=config.drop_rate,
                                                pretrained=True,
                                                bn_to_gn=config.bn_to_gn)
        self.model = torch.nn.DataParallel(self.model).cuda()

        # 加载优化器
        self.optimizer = prepare_model.create_optimizer(
            config.model_type, self.model, config)

        # 加载衰减策略
        self.exp_lr_scheduler = prepare_model.create_lr_scheduler(
            self.lr_scheduler,
            self.optimizer,
            step_size=config.lr_step_size,
            restart_step=config.restart_step,
            multi_step=config.multi_step)

        # 加载损失函数
        self.criterion = Loss(config.model_type, config.loss_name,
                              self.num_classes, train_labels_number,
                              config.beta_CB, config.gamma)

        # 实例化实现各种子函数的 solver 类
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        self.solver = Solver(self.model, self.device)
        if config.restore:
            weight_path = os.path.join('checkpoints', config.model_type)
            if config.restore == 'last':
                lists = os.listdir(weight_path)  # 获得文件夹内所有文件
                lists.sort(key=lambda fn: os.path.getmtime(weight_path + '/' +
                                                           fn))  # 按照最近修改时间排序
                weight_path = os.path.join(weight_path, lists[-1],
                                           'model_best.pth')
            else:
                weight_path = os.path.join(weight_path, config.restore,
                                           'model_best.pth')
            self.solver.load_checkpoint(weight_path)

        # log初始化
        self.writer, self.time_stamp = self.init_log()
        self.model_path = os.path.join(self.config.train_local,
                                       self.config.model_type, self.time_stamp)

        # 初始化分类度量准则类
        with open(config.local_data_root + 'label_id_name.json',
                  'r',
                  encoding='utf-8') as json_file:
            self.class_names = list(json.load(json_file).values())
        self.classification_metric = ClassificationMetric(self.class_names,
                                                          self.model_path,
                                                          text_flag=0)

        self.max_accuracy_valid = 0