def __init__(self, agent_config):
        '''
                :param agent_config (dict): lr=float,momentum=float,weight_decay=float,
                                                                        schedule=[int],  # The last number in the list is the end of epoch
                                                                        model_type=str,model_name=str,out_dim={task:dim},model_weights=str
                                                                        force_single_head=bool
                                                                        print_freq=int
                                                                        gpuid=[int]'''

        super(Network_, self).__init__()
        self.log = print
        self.config = agent_config

        self.train_loader, self.test_loader = self.config['loaders'][
            'pretrain']

        self.model = self.create_model()
        self.criterion_fn = FocalLoss(
        ) if self.config['loss'] == 'fl' else CrossEntropyLoss()

        if agent_config['gpuid'][0] >= 0:
            self.cuda()
            self.gpu = True
        else:
            self.gpu = False

        self.exp_name = agent_config['exp_name']
        self.init_optimizer()
        self.n_iter = 0

        self.writer = SummaryWriter(log_dir="runs/" + self.exp_name)
        self.save_after = self.config['save_after']
Beispiel #2
0
    def __init__(self, task, cuda=True):
        self.task = 2
        self.floattensor = "torch.FloatTensor"
        if self.task == 2:
            self.loss_func = FocalLoss(gamma=0.75)  #nn.CrossEntropyLoss()#
        else:
            self.loss_func = nn.BCEWithLogitsLoss()

        if cuda:
            self.loss_func = self.loss_func.cuda()
            self.floattensor = "torch.cuda.FloatTensor"
Beispiel #3
0
    def __init__(self, num_labels, dropout_prob, bret_pretrainded_path):
        """
        :param num_labels:
        :param dropout_prob:
        :param bret_pretrainded_path:
        """
        # 初始化
        super().__init__()

        # 构建深度学习的网络结构
        self.fc = nn.Linear(768, num_labels)
        # self.loss_fn = torch.nn.CrossEntropyLoss(ignore_index=0)
        self.loss_fn = FocalLoss()
def train(args, model, device, train_loader, optimizer, epoch, joint=True):

    model.train()
    focal_loss = FocalLoss(gamma=0.2)
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.to(device)
        print(data.size())
        target = target.to(device)
        optimizer.zero_grad()
        output = model(data)
        if len(output.size()) > 2:
            loss = 0
            for j in range(output.size(1)):
                if joint:
                    loss += torch.sum(
                        -target * F.log_softmax(output[:, j, :], -1),
                        -1)  #target has to be multihot encoding scheme
                else:
                    loss += F.cross_entropy(output[:, j, :], target)
                    #loss += focal_loss(output[:,j,:],target)
            loss /= output.size(1)
        else:
            loss = F.cross_entropy(output, target)

        if joint: loss = loss.mean()
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
Beispiel #5
0
class TaskModel(TaskModelBase):
    def __init__(self, num_labels, dropout_prob, bret_pretrainded_path):
        """
        :param num_labels:
        :param dropout_prob:
        :param bret_pretrainded_path:
        """
        # 初始化
        super().__init__()

        # 构建深度学习的网络结构
        self.fc = nn.Linear(768, num_labels)
        # self.loss_fn = torch.nn.CrossEntropyLoss(ignore_index=0)
        self.loss_fn = FocalLoss()

    def forward(self, data):
        """
        :param data:
        :return:
        """
        # get data
        x = data["x"]
        y = data["y"]

        logits = self.fc(x)

        if y is not None:
            loss = self.loss_fn.forward(torch.reshape(logits, [-1, logits.shape[-1]]), y.view(-1))
        else:
            loss = None

        return {"loss": loss, "logits": logits, "predict": logits.argmax(dim=-1)}
Beispiel #6
0
class Criterion:
    def __init__(self, task, cuda=True):
        self.task = 2
        self.floattensor = "torch.FloatTensor"
        if self.task == 2:
            self.loss_func = FocalLoss(gamma=0.75)  #nn.CrossEntropyLoss()#
        else:
            self.loss_func = nn.BCEWithLogitsLoss()

        if cuda:
            self.loss_func = self.loss_func.cuda()
            self.floattensor = "torch.cuda.FloatTensor"

    def loss_compute(self, logits, y):
        batch_size = y.shape[0]
        if self.task == 2:
            return self.loss_func(logits, y.view(batch_size))
        else:
            logits = logits.view(batch_size)
            return self.loss_func(logits,
                                  y.view(batch_size).type(self.floattensor))

    def accu_compute(self, logits, y):
        batch_size = y.shape[0]
        if self.task == 2:
            _, predict = torch.max(logits, dim=1)
        else:
            predict = (logits >= 0.5)

        y = y.type(predict.dtype)

        comp = (predict.view(batch_size,
                             -1) == y.view(batch_size,
                                           -1)).type(self.floattensor)
        accu = torch.mean(comp)
        return accu.item()

    def f1_compute(self, logits, y):
        # everyone uses macro, but class imbalance should use micro
        batch_size = y.shape[0]
        if self.task == 2:
            _, predict = torch.max(logits, dim=1)
        else:
            predict = (logits >= 0.5)

        y = y.type(predict.dtype)

        # to cpu
        y = y.cpu()
        predict = predict.cpu()

        f1 = f1_score(y_true=y.view(batch_size, -1),
                      y_pred=predict.view(batch_size, -1),
                      average='macro')

        return f1
Beispiel #7
0
def test():
    device = torch.device("cpu")
    model = LWANet(num_classes=num_classes, pretrained=True)
    model.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(weight_load, map_location=device).items()})

    model=model.cuda(device_ids[0])
    criterion = FocalLoss(gamma=6)

    val_file_names = glob.glob('dataset/test/images/*.png')
    val_dataset = Load_Dataset(val_file_names)
    val_loader = DataLoader(val_dataset, batch_size=args.batch_size,num_workers=16)

    val_multi(model, criterion, val_loader, num_classes,batch_size=args.batch_size,device_ids=device_ids)
Beispiel #8
0
    def __init__(self, net, num_class, classes, size, model_path, multi_task=False, resume=False, always_save=False, use_cuda=True):
        self.always_save = always_save
        self.size = size
        self.classes = classes
        self.model_path = model_path
        self.resume = resume
        self.start_epoch = 0
        self.multi_task = multi_task
        self.best_loss = 1000000
        self.net = net
        self.use_cuda = use_cuda
        if not net is None and use_cuda:
            self.net.cuda()

        self.criterion = FocalLoss(class_num=num_class, alpha=None, gamma=2, size_average=True)
        if self.resume and self.net is not None:
            self.start_epoch, self.best_loss = self.resume_model(self.model_path)
Beispiel #9
0
    def __init__(self,
                 embedding,
                 arch='ExampleNet',
                 dropout_rate=0.2,
                 loss='BCELoss',
                 margin=0,
                 threshold=None,
                 similarity='inner_product',
                 **kwargs):
        super(ExamplePredictor, self).__init__(**kwargs)
        self.arch = arch
        logging.info('building ' + self.arch + '...')
        self.model = ExampleNet(embedding.size(1), similarity=similarity)
        self.embedding = torch.nn.Embedding(embedding.size(0),
                                            embedding.size(1))
        self.embedding.weight = torch.nn.Parameter(embedding)

        # use cuda
        self.model = self.model.to(self.device)
        self.embedding = self.embedding.to(self.device)

        # make optimizer
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=self.learning_rate)

        self.loss = {
            'BCELoss': torch.nn.BCEWithLogitsLoss(),
            'FocalLoss': FocalLoss(),
            #--
            'L1Loss': torch.nn.L1Loss(),
            'SmoothL1Loss': torch.nn.SmoothL1Loss(),
            'MSELoss': torch.nn.MSELoss(),
            'CrossEntropyLoss': torch.nn.CrossEntropyLoss(),
            'NLLLoss': torch.nn.NLLLoss(),
            #'NLLLoss2d': torch.nn.NLLLoss2d(),
            'KLDivLoss': torch.nn.KLDivLoss(),
            'MarginRankingLoss': torch.nn.MarginRankingLoss(),
            'MultiMarginLoss': torch.nn.MultiMarginLoss(),
            'MultiLabelMarginLoss': torch.nn.MultiLabelMarginLoss(),
            'SoftMarginLoss': torch.nn.SoftMarginLoss(),
            'MultiLabelSoftMarginLoss': torch.nn.MultiLabelSoftMarginLoss(),
            'CosineEmbeddingLoss': torch.nn.CosineEmbeddingLoss(),
            'HingeEmbeddingLoss': torch.nn.HingeEmbeddingLoss(),
            'TripleMarginLoss': torch.nn.TripleMarginLoss()
        }[loss]
Beispiel #10
0
def train():
    mod = LWANet(num_classes=num_classes, pretrained=True)

    model = mod.cuda(device_ids[0])
    model = nn.DataParallel(model, device_ids=device_ids)

    batch_size = args.batch_size
    criterion = FocalLoss(gamma=6)
    optimizer = optim.Adam(model.parameters(), lr=lra)

    train_file, val_file = load_filename()
    liver_dataset = Load_Dataset(train_file)
    val_dataset = Load_Dataset(val_file)

    dataloaders = DataLoader(liver_dataset,
                             batch_size=batch_size,
                             shuffle=True,
                             num_workers=12)  # drop_last=True
    val_load = DataLoader(val_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=12)
    train_model(model, criterion, optimizer, dataloaders, val_load,
                num_classes)
def main():
    # 随机种子
    np.random.seed(21)
    torch.manual_seed(21)
    torch.cuda.manual_seed_all(21)
    random.seed(21)

    monitor = pd.DataFrame()

    # 获取当前文件名,用于创建模型及结果文件的目录
    file_name = os.path.basename(__file__).split('.')[0]
    # 创建保存模型和结果的文件夹
    if not os.path.exists('./model/%s' % file_name):
        os.makedirs('./model/%s' % file_name)
    if not os.path.exists('./result/%s' % file_name):
        os.makedirs('./result/%s' % file_name)
    # 创建日志文件
    if not os.path.exists('./result/%s.txt' % file_name):
        with open('./result/%s.txt' % file_name, 'w') as acc_file:
            pass
    with open('./result/%s.txt' % file_name, 'a') as acc_file:
        acc_file.write('\n%s %s\n' % (time.strftime(
            "%Y-%m-%d %H:%M:%S", time.localtime(time.time())), file_name))

    # 默认使用PIL读图
    def default_loader(path):
        # return Image.open(path)
        return Image.open(path).convert('RGB')

    # 训练集图片读取
    class TrainDataset(Dataset):
        def __init__(self,
                     label_list,
                     transform=None,
                     target_transform=None,
                     loader=default_loader):
            imgs = []
            for index, row in label_list.iterrows():
                imgs.append((row['img_path'], row['label']))
            self.imgs = imgs
            self.transform = transform
            self.target_transform = target_transform
            self.loader = loader

        def __getitem__(self, index):
            filename, label = self.imgs[index]
            img = self.loader(filename)
            if self.transform is not None:
                img = self.transform(img)
            return img, label

        def __len__(self):
            return len(self.imgs)

    # 验证集图片读取
    class ValDataset(Dataset):
        def __init__(self,
                     label_list,
                     transform=None,
                     target_transform=None,
                     loader=default_loader):
            imgs = []
            for index, row in label_list.iterrows():
                imgs.append((row['img_path'], row['label']))
            self.imgs = imgs
            self.transform = transform
            self.target_transform = target_transform
            self.loader = loader

        def __getitem__(self, index):
            filename, label = self.imgs[index]
            img = self.loader(filename)
            if self.transform is not None:
                img = self.transform(img)
            return img, label

        def __len__(self):
            return len(self.imgs)

    # 测试集图片读取
    class TestDataset(Dataset):
        def __init__(self,
                     label_list,
                     transform=None,
                     target_transform=None,
                     loader=default_loader):
            imgs = []
            for index, row in label_list.iterrows():
                imgs.append((row['img_path']))
            self.imgs = imgs
            self.transform = transform
            self.target_transform = target_transform
            self.loader = loader

        def __getitem__(self, index):
            filename = self.imgs[index]
            img = self.loader(filename)
            if self.transform is not None:
                img = self.transform(img)
            return img, filename

        def __len__(self):
            return len(self.imgs)

    # 数据增强:在给定角度中随机进行旋转
    class FixedRotation(object):
        def __init__(self, angles):
            self.angles = angles

        def __call__(self, img):
            return fixed_rotate(img, self.angles)

    def fixed_rotate(img, angles):
        angles = list(angles)
        angles_num = len(angles)
        index = random.randint(0, angles_num - 1)
        return img.rotate(angles[index])

    # 训练函数
    def train(train_loader, model, criterion, optimizer, epoch):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()

        # switch to train mode
        model.train()

        end = time.time()
        # 从训练集迭代器中获取训练数据
        for i, (images, target) in enumerate(train_loader):
            # 评估图片读取耗时
            data_time.update(time.time() - end)
            # 将图片和标签转化为tensor
            image_var = torch.tensor(images).cuda(async=True)
            label = torch.tensor(target).cuda(async=True)

            # 将图片输入网络,前传,生成预测值
            y_pred = model(image_var)
            # 计算loss
            loss = criterion(y_pred, label)
            losses.update(loss.item(), images.size(0))

            # 计算top1正确率
            prec, PRED_COUNT = accuracy(y_pred.data, target, topk=(1, 1))
            acc.update(prec, PRED_COUNT)

            # 对梯度进行反向传播,使用随机梯度下降更新网络权重
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # 评估训练耗时
            batch_time.update(time.time() - end)
            end = time.time()

            # 打印耗时与结果
            if i % print_freq == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                          epoch,
                          i,
                          len(train_loader),
                          batch_time=batch_time,
                          data_time=data_time,
                          loss=losses,
                          acc=acc))
        return acc.avg, losses.avg

    # 验证函数
    def validate(val_loader, model, criterion):
        batch_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()

        # switch to evaluate mode
        model.eval()

        end = time.time()
        for i, (images, labels) in enumerate(val_loader):
            image_var = torch.tensor(images).cuda(async=True)
            target = torch.tensor(labels).cuda(async=True)

            # 图片前传。验证和测试时不需要更新网络权重,所以使用torch.no_grad(),表示不计算梯度
            with torch.no_grad():
                y_pred = model(image_var)
                loss = criterion(y_pred, target)

            # measure accuracy and record loss
            prec, PRED_COUNT = accuracy(y_pred.data, labels, topk=(1, 1))
            losses.update(loss.item(), images.size(0))
            acc.update(prec, PRED_COUNT)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('TrainVal: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Accuray {acc.val:.3f} ({acc.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          acc=acc))

        print(' * Accuray {acc.avg:.3f}'.format(acc=acc),
              '(Previous Best Acc: %.3f)' % best_precision,
              ' * Loss {loss.avg:.3f}'.format(loss=losses),
              'Previous Lowest Loss: %.3f)' % lowest_loss)
        return acc.avg, losses.avg

    # 测试函数
    def test(test_loader, model):
        csv_map = OrderedDict({'filename': [], 'probability': []})
        # switch to evaluate mode
        model.eval()
        for i, (images, filepath) in enumerate(tqdm(test_loader)):
            # bs, ncrops, c, h, w = images.size()
            filepath = [os.path.basename(i) for i in filepath]
            image_var = torch.tensor(images,
                                     requires_grad=False)  # for pytorch 0.4

            with torch.no_grad():
                y_pred = model(image_var)
                # 使用softmax函数将图片预测结果转换成类别概率
                smax = nn.Softmax(1)
                smax_out = smax(y_pred)

            # 保存图片名称与预测概率
            csv_map['filename'].extend(filepath)
            for output in smax_out:
                prob = ';'.join([str(i) for i in output.data.tolist()])
                csv_map['probability'].append(prob)

        result = pd.DataFrame(csv_map)
        result['probability'] = result['probability'].map(
            lambda x: [float(i) for i in x.split(';')])

        # 转换成提交样例中的格式
        sub_filename, sub_label = [], []
        for index, row in result.iterrows():
            sub_filename.append(row['filename'])
            pred_label = np.argmax(row['probability'])
            if pred_label == 0:
                sub_label.append('norm')
            else:
                sub_label.append('defect%d' % pred_label)

        # 生成结果文件,保存在result文件夹中,可用于直接提交
        submission = pd.DataFrame({
            'filename': sub_filename,
            'label': sub_label
        })
        submission.to_csv('./result/%s/submission.csv' % file_name,
                          header=None,
                          index=False)
        print('test done!')
        return

    # 保存最新模型以及最优模型
    def save_checkpoint(state,
                        is_best,
                        is_lowest_loss,
                        filename='./model/%s/checkpoint.pth.tar' % file_name):
        torch.save(state, filename)
        if is_best:
            shutil.copyfile(filename,
                            './model/%s/model_best.pth.tar' % file_name)
        if is_lowest_loss:
            shutil.copyfile(filename,
                            './model/%s/lowest_loss.pth.tar' % file_name)

    # 用于计算精度和时间的变化
    class AverageMeter(object):
        """Computes and stores the average and current value"""
        def __init__(self):
            self.reset()

        def reset(self):
            self.val = 0
            self.avg = 0
            self.sum = 0
            self.count = 0

        def update(self, val, n=1):
            self.val = val
            self.sum += val * n
            self.count += n
            self.avg = self.sum / self.count

    # 学习率衰减:lr = lr / lr_decay
    def adjust_learning_rate():
        nonlocal lr
        lr = lr / lr_decay
        return optim.Adam(model.parameters(),
                          lr,
                          weight_decay=weight_decay,
                          amsgrad=True)

    # 计算top K准确率
    def accuracy(y_pred, y_actual, topk=(1, )):
        """Computes the precision@k for the specified values of k"""
        final_acc = 0
        maxk = max(topk)
        # for prob_threshold in np.arange(0, 1, 0.01):
        PRED_COUNT = y_actual.size(0)
        PRED_CORRECT_COUNT = 0
        prob, pred = y_pred.topk(maxk, 1, True, True)
        # prob = np.where(prob > prob_threshold, prob, 0)
        for j in range(pred.size(0)):
            if int(y_actual[j]) == int(pred[j]):
                PRED_CORRECT_COUNT += 1
        if PRED_COUNT == 0:
            final_acc = 0
        else:
            final_acc = PRED_CORRECT_COUNT / PRED_COUNT
        return final_acc * 100, PRED_COUNT

    # 程序主体

    # 设定GPU ID
    os.environ["CUDA_VISIBLE_DEVICES"] = '2,3'
    # 小数据集上,batch size不易过大。如出现out of memory,应调小batch size
    batch_size = 6
    # 进程数量,最好不要超过电脑最大进程数,尽量能被batch size整除。windows下报错可以改为workers=0
    workers = 12

    # epoch数量,分stage进行,跑完一个stage后降低学习率进入下一个stage
    stage_epochs = [20, 10, 10, 10, 10]
    # 初始学习率
    lr = 1e-4
    # 学习率衰减系数 (new_lr = lr / lr_decay)
    lr_decay = 5
    # 正则化系数
    weight_decay = 1e-4  # 原始正则化系数 1e-4

    # 参数初始化
    stage = 0
    start_epoch = 0
    total_epochs = sum(stage_epochs)
    best_precision = 0
    lowest_loss = 100

    # 设定打印频率,即多少step打印一次,用于观察loss和acc的实时变化
    # 打印结果中,括号前面为实时loss和acc,括号内部为epoch内平均loss和acc
    print_freq = 10
    # 验证集比例
    val_ratio = 0.12
    # 是否只验证,不训练
    evaluate = False
    # 是否从断点继续跑
    resume = False
    # 创建inception_v4模型
    # model = model_v4.v4(num_classes=12)

    model = make_model(
        'pnasnet5large',
        pretrained=True,
        num_classes=12,
        dropout_p=0.5,
    )
    model = torch.nn.DataParallel(model).cuda()

    # optionally resume from a checkpoint
    if resume:
        checkpoint_path = './model/%s/checkpoint.pth.tar' % file_name
        if os.path.isfile(checkpoint_path):
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path)
            start_epoch = checkpoint['epoch'] + 1
            best_precision = checkpoint['best_precision']
            lowest_loss = checkpoint['lowest_loss']
            stage = checkpoint['stage']
            lr = checkpoint['lr']
            model.load_state_dict(checkpoint['state_dict'])
            # 如果中断点恰好为转换stage的点,需要特殊处理
            if start_epoch in np.cumsum(stage_epochs)[:-1]:
                stage += 1
                optimizer = adjust_learning_rate()
                model.load_state_dict(
                    torch.load('./model/%s/model_best.pth.tar' %
                               file_name)['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(resume))

    # 读取训练图片列表
    all_data = pd.read_csv('data/label.csv')
    # 分离训练集和测试集,stratify参数用于分层抽样
    train_data_list, val_data_list = train_test_split(
        all_data,
        test_size=val_ratio,
        random_state=21,
        shuffle=True,
        stratify=all_data['label'])
    # 读取测试图片列表
    test_data_list = pd.read_csv('data/test.csv')

    # 图片归一化,由于采用ImageNet预训练网络,因此这里直接采用ImageNet网络的参数
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # 训练集图片变换,输入网络的尺寸为384*384
    train_data = TrainDataset(
        train_data_list,
        transform=transforms.Compose([
            transforms.Resize((400, 400)),
            transforms.ColorJitter(0.15, 0.15, 0.15, 0.075),
            transforms.RandomHorizontalFlip(),
            transforms.RandomGrayscale(),
            # transforms.RandomRotation(20),
            FixedRotation([0, 30, 45, 60, 90, 135, 180,
                           270]),  # 添加了45、135 两个角度 ljc
            transforms.RandomCrop(384),
            transforms.ToTensor(),
            normalize,
        ]))

    # 验证集图片变换
    val_data = ValDataset(val_data_list,
                          transform=transforms.Compose([
                              transforms.Resize((400, 400)),
                              transforms.CenterCrop(384),
                              transforms.ToTensor(),
                              normalize,
                          ]))

    # 测试集图片变换
    test_data = TestDataset(test_data_list,
                            transform=transforms.Compose([
                                transforms.Resize((400, 400)),
                                transforms.CenterCrop(384),
                                transforms.ToTensor(),
                                normalize,
                            ]))

    # 生成图片迭代器
    train_loader = DataLoader(train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=workers)
    val_loader = DataLoader(val_data,
                            batch_size=batch_size * 2,
                            shuffle=False,
                            pin_memory=False,
                            num_workers=workers)
    test_loader = DataLoader(test_data,
                             batch_size=batch_size * 2,
                             shuffle=False,
                             pin_memory=False,
                             num_workers=workers)

    # 对占比较少的类别进行加权
    # weight = torch.Tensor([1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5])

    # 使用交叉熵损失函数
    #  weight added  by  ljc
    # weight = torch.Tensor([1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10])
    # criterion = nn.CrossEntropyLoss(weight=weight).cuda()  # weight
    criterion = FocalLoss(class_num=12, gamma=2)  # changed by ljc
    # criterion = nn.CrossEntropyLoss().cuda()  # weight

    # 优化器,使用带amsgrad的Adam
    optimizer = optim.Adam(model.parameters(),
                           lr,
                           weight_decay=weight_decay,
                           amsgrad=True)

    if evaluate:
        validate(val_loader, model, criterion)
    else:
        # 开始训练
        for epoch in range(start_epoch, total_epochs):
            # train for one epoch
            train_acc, train_loss = train(train_loader, model, criterion,
                                          optimizer, epoch)
            # evaluate on validation set
            precision, avg_loss = validate(val_loader, model, criterion)

            monitor = pd.concat([
                monitor,
                pd.DataFrame({
                    'train_acc': [train_acc],
                    'train_loss': [train_loss],
                    'val_acc': [precision],
                    'val_acc': [avg_loss]
                })
            ])

            # 在日志文件中记录每个epoch的精度和loss
            with open('./result/%s.txt' % file_name, 'a') as acc_file:
                acc_file.write('Epoch: %2d, Precision: %.8f, Loss: %.8f\n' %
                               (epoch, precision, avg_loss))

            # 记录最高精度与最低loss,保存最新模型与最佳模型
            is_best = precision > best_precision
            is_lowest_loss = avg_loss < lowest_loss
            best_precision = max(precision, best_precision)
            lowest_loss = min(avg_loss, lowest_loss)
            state = {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'best_precision': best_precision,
                'lowest_loss': lowest_loss,
                'stage': stage,
                'lr': lr,
            }
            save_checkpoint(state, is_best, is_lowest_loss)

            # 判断是否进行下一个stage
            if (epoch + 1) in np.cumsum(stage_epochs)[:-1]:
                stage += 1
                optimizer = adjust_learning_rate()
                model.load_state_dict(
                    torch.load('./model/%s/model_best.pth.tar' %
                               file_name)['state_dict'])
                print('Step into next stage')
                with open('./result/%s.txt' % file_name, 'a') as acc_file:
                    acc_file.write(
                        '---------------Step into next stage----------------\n'
                    )

    # 记录线下最佳分数
    with open('./result/%s.txt' % file_name, 'a') as acc_file:
        acc_file.write('* best acc: %.8f  %s\n' %
                       (best_precision, os.path.basename(__file__)))
    with open('./result/best_acc.txt', 'a') as acc_file:
        acc_file.write(
            '%s  * best acc: %.8f  %s\n' %
            (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(
                time.time())), best_precision, os.path.basename(__file__)))

    # 读取最佳模型,预测测试集,并生成可直接提交的结果文件
    best_model = torch.load('./model/%s/model_best.pth.tar' % file_name)
    model.load_state_dict(best_model['state_dict'])
    test(test_loader=test_loader, model=model)

    # 释放GPU缓存
    torch.cuda.empty_cache()
Beispiel #12
0
def _train_or_test(model,
                   dataloader,
                   config: Settings,
                   optimizer=None,
                   use_l1_mask=True,
                   log_writer: SummaryWriter = None,
                   step: int = 0,
                   weighting_attention=False,
                   is_valid=False):
    '''
    model: the multi-gpu model
    dataloader:
    optimizer: if None, will be test evaluation
    '''
    is_train = optimizer is not None
    n_examples = 0
    n_correct = 0
    n_batches = 0
    total_cross_entropy = 0
    total_cluster_cost = 0
    # separation cost is meaningful only for class_specific
    total_separation_cost = 0
    total_avg_separation_cost = 0
    total_loss = 0
    conf_matrix = np.zeros((2, 2), dtype='int32')
    preds = []
    targets = []

    if config.loss_function == 'cross_entropy':
        loss_fn = torch.nn.CrossEntropyLoss()
    elif config.loss_function == 'focal':
        loss_fn = FocalLoss(alpha=0.5, gamma=2)
    else:
        raise NotImplementedError('unknown loss function: ' +
                                  config.loss_function)

    with tqdm(total=len(dataloader.dataset), unit='bag') as pbar:
        for i, (image, label) in enumerate(dataloader):
            input = image.cuda()

            # if param all_labels=True in dataloader, set label to positive if at least one in the list
            if len(label) > 1:
                label = label.max().unsqueeze(0)

            target = label.cuda()

            # torch.enable_grad() has no effect outside of no_grad()
            grad_req = torch.enable_grad() if is_train else torch.no_grad()
            with grad_req:
                output, min_distances, attention, _ = model.forward_(input)

                cross_entropy = loss_fn(output, target)
                if config.mil_pooling == 'loss_attention':
                    instance_labels = target * torch.ones(
                        input.size(0), dtype=torch.long, device=input.device)
                    loss_2 = WeightCrossEntropyLoss()(model.out_c,
                                                      instance_labels, model.A)
                    cross_entropy += 2.0 * loss_2

                if config.class_specific:
                    max_dist = (model.prototype_shape[1] *
                                model.prototype_shape[2] *
                                model.prototype_shape[3])

                    # prototypes_of_correct_class is a tensor of shape batch_size * num_prototypes

                    attention_detached = attention.detach().cpu()
                    weight = np.interp(
                        attention_detached,
                        (attention_detached.min(), attention_detached.max()),
                        (0.001, 1))

                    if weighting_attention:
                        tensor_weight = torch.tensor(weight).cuda()
                    else:
                        tensor_weight = torch.tensor(1).cuda()

                    # calculate cluster cost
                    prototypes_of_correct_class = torch.t(
                        model.prototype_class_identity[:, label]).cuda()
                    inverted_distances, _ = torch.max(
                        (max_dist - (min_distances * tensor_weight.T)) *
                        prototypes_of_correct_class,
                        dim=1)
                    cluster_cost = torch.mean(max_dist - inverted_distances)

                    # calculate separation cost
                    prototypes_of_wrong_class = 1 - prototypes_of_correct_class
                    inverted_distances_to_nontarget_prototypes, _ = \
                        torch.max((max_dist - (min_distances * tensor_weight.T)) * prototypes_of_wrong_class, dim=1)
                    separation_cost = torch.mean(
                        max_dist - inverted_distances_to_nontarget_prototypes)

                    # calculate avg cluster cost
                    avg_separation_cost = \
                        torch.sum((min_distances * tensor_weight.T) * prototypes_of_wrong_class, dim=1) / torch.sum(
                            prototypes_of_wrong_class,
                            dim=1)
                    avg_separation_cost = torch.mean(avg_separation_cost)

                    if use_l1_mask:
                        l1_mask = 1 - torch.t(
                            model.prototype_class_identity).cuda()
                        l1 = (model.last_layer.weight * l1_mask).norm(p=1)
                    else:
                        l1 = model.last_layer.weight.norm(p=1)

                else:
                    min_distance, _ = torch.min(min_distances, dim=1)
                    cluster_cost = torch.mean(min_distance)
                    l1 = model.last_layer.weight.norm(p=1)

                # evaluation statistics
                _, predicted = torch.max(output.data, 1)
                n_examples += target.size(0)
                n_correct += (predicted == target).sum().item()

                pred_s = func.softmax(output, dim=-1)
                preds.append(pred_s.data.cpu().numpy())
                targets.append(target.cpu().numpy())

                conf_matrix += confusion_matrix(target.cpu().numpy(),
                                                predicted.cpu().numpy(),
                                                labels=[0, 1])

                n_batches += 1
                total_cross_entropy += cross_entropy.item()
                total_cluster_cost += cluster_cost.item()
                total_separation_cost += separation_cost.item()
                total_avg_separation_cost += avg_separation_cost.item()

            # compute gradient and do SGD step
            if config.class_specific:
                loss = (config.coef_crs_ent * cross_entropy +
                        config.coef_clst * cluster_cost +
                        config.coef_sep * separation_cost +
                        config.coef_l1 * l1)
            else:
                loss = (config.coef_crs_ent * cross_entropy +
                        config.coef_clst * cluster_cost + config.coef_l1 * l1)
            total_loss += loss.item()
            if is_train:
                optimizer.zero_grad()
                loss.backward()
                # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) # gradient clipping
                optimizer.step()

            del input
            del target
            del output
            del predicted
            del min_distances
            pbar.update(1)

    total_cross_entropy /= n_batches
    total_cluster_cost /= n_batches
    total_separation_cost /= n_batches
    total_loss /= n_batches

    preds = np.concatenate(preds)
    targets = np.concatenate(targets)
    auc = roc_auc_score(targets, preds[..., 1])
    pred_y = preds.argmax(1)
    precision = precision_score(targets, pred_y, zero_division=0)
    recall = recall_score(targets, pred_y, zero_division=0)
    f1 = f1_score(targets, pred_y, zero_division=0)
    fpr, tpr, threshold = roc_curve(targets, preds[..., 1])

    print('\t\taccuracy:', n_correct / n_examples)
    print('\t\tauc:', auc)
    print('\t\ttotal_loss:', total_loss)

    if is_train:
        suffix = '/train'
    else:
        if is_valid:
            suffix = '/valid'
        else:
            suffix = '/test'

    if log_writer:

        log_writer.add_scalar('total_loss' + suffix,
                              total_loss,
                              global_step=step)
        log_writer.add_scalar('cross_entropy' + suffix,
                              total_cross_entropy,
                              global_step=step)
        log_writer.add_scalar('cluster_cost' + suffix,
                              total_cluster_cost,
                              global_step=step)

        if config.class_specific:
            log_writer.add_scalar('separation_cost' + suffix,
                                  total_separation_cost,
                                  global_step=step)
            log_writer.add_scalar('avg_separation_cost' + suffix,
                                  total_avg_separation_cost / n_batches,
                                  global_step=step)

        log_writer.add_scalar('accuracy' + suffix,
                              n_correct / n_examples,
                              global_step=step)
        log_writer.add_scalar('auc' + suffix, auc, global_step=step)
        log_writer.add_scalar('precision' + suffix,
                              precision,
                              global_step=step)
        log_writer.add_scalar('recall' + suffix, recall, global_step=step)
        log_writer.add_scalar('f-score' + suffix, f1, global_step=step)
        log_writer.add_scalar('l1' + suffix,
                              model.last_layer.weight.norm(p=1).item(),
                              global_step=step)
        conf_plot = ConfusionMatrixDisplay(confusion_matrix=conf_matrix).plot(
            cmap='Blues', values_format='d')
        log_writer.add_figure('confusion_matrix' + suffix,
                              conf_plot.figure_,
                              global_step=step,
                              close=True)

        plt.figure()
        plt.title('Receiver Operating Characteristic')
        plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % auc)
        plt.legend(loc='lower right')
        plt.plot([0, 1], [0, 1], 'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        log_writer.add_figure('roc' + suffix,
                              plt.gcf(),
                              global_step=step,
                              close=True)

    p = model.prototype_vectors.view(model.num_prototypes, -1).cpu()
    with torch.no_grad():
        p_avg_pair_dist = torch.mean(list_of_distances(p, p))

    if log_writer:
        log_writer.add_scalar('p_avg_pair_dist' + suffix,
                              p_avg_pair_dist,
                              global_step=step)

    return auc
Beispiel #13
0
    opt.scales = [opt.initial_scale]
    for i in range(1, opt.n_scales):
        opt.scales.append(opt.scales[-1] * opt.scale_step)
    opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
    opt.mean = get_mean(opt.norm_value, dataset=opt.mean_dataset)
    opt.std = get_std(opt.norm_value)
    print(opt)
    with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
        json.dump(vars(opt), opt_file)

    torch.manual_seed(opt.manual_seed)

    model, parameters = generate_model(opt)
    print(model)

    criterion = FocalLoss(num_class=opt.n_classes)  #nn.CrossEntropyLoss()
    if not opt.no_cuda:
        criterion = criterion.cuda()

    if opt.no_mean_norm and not opt.std_norm:
        norm_method = Normalize([0, 0, 0], [1, 1, 1])
    elif not opt.std_norm:
        norm_method = Normalize(opt.mean, [1, 1, 1])
    else:
        norm_method = Normalize(opt.mean, opt.std)

    if not opt.no_train:
        assert opt.train_crop in ['random', 'corner', 'center']
        if opt.train_crop == 'random':
            crop_method = MultiScaleRandomCrop(opt.scales, opt.sample_size)
        elif opt.train_crop == 'corner':
Beispiel #14
0
    if ntokens > 500000:
        # One Billion
        # This produces fairly even matrix mults for the buckets:
        # 0: 11723136, 1: 10854630, 2: 11270961, 3: 11219422
        splits = [4200, 35000, 180000]
    elif ntokens > 75000:
        # WikiText-103
        splits = [2800, 20000, 76000]
    print('Using', splits)
    if args.loss == 'splitcrossentropy':
        criterion = SplitCrossEntropyLoss(args.emsize,
                                          splits=splits,
                                          verbose=False)
        fn_exclude_keys += ['gamma']
    elif args.loss == 'focal':
        criterion = FocalLoss(args.gamma)
        fn_exclude_keys += ['emsize']
###
if args.cuda:
    model = model.cuda()
    criterion = criterion.cuda()
###
params = list(model.parameters()) + list(criterion.parameters())
total_params = sum(x.size()[0] *
                   x.size()[1] if len(x.size()) > 1 else x.size()[0]
                   for x in params if x.size())
print('Args:', args)
print('Model total parameters:', total_params)

## get filename
fn = 'save/' + get_name(args.__dict__,
Beispiel #15
0
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        mems=None,
        perm_mask=None,
        target_mapping=None,
        token_type_ids=None,
        input_mask=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        #use_mems=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
        **kwargs,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
            Labels for computing the multiple choice classification loss. Indices should be in ``[0, ...,
            num_choices]`` where `num_choices` is the size of the second dimension of the input tensors. (see
            `input_ids` above)
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.transformer(
            input_ids,
            attention_mask=attention_mask,
            mems=mems,
            perm_mask=perm_mask,
            target_mapping=target_mapping,
            token_type_ids=token_type_ids,
            input_mask=input_mask,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            #use_mems=use_mems,
            output_attentions=output_attentions,
            output_hidden_states=True,
            return_dict=return_dict,
        )

        sequence_output = outputs.hidden_states  # tuple(  [batch, len, hiddenstate],)
        layers = len(sequence_output)
        batchsize, length, hidden_size = sequence_output[0].size(
            0), sequence_output[0].size(1), sequence_output[0].size(2)
        '''print(layers)
        print(batchsize)
        print(length)
        print(hidden_size)'''
        # print(sequence_output.size())
        sequence_output = torch.cat(sequence_output).view(
            layers, batchsize, length,
            hidden_size)  # tensor.size([layers, batch, len, hiddenstate])
        # print(sequence_output.size())
        sequence_output = sequence_output.transpose(0, 1).transpose(
            1, 2).contiguous()
        sequence_output = self.attn(sequence_output)
        if self.quick_return:
            return sequence_output
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            if self.lossfct == 'diceloss':
                loss_fct = MultiDiceLoss()
                if attention_mask is not None:
                    '''print(attention_mask)
                    print(attention_mask.shape)  #torch.Size([4, 80])   batch,len'''

                    active_loss = attention_mask.view(-1) == 1
                    '''print(active_loss)
                    print(active_loss.shape)#torch.Size([320])   4*80
                    print(logits)
                    print(logits.shape)  #torch.Size([4, 80, 6])'''
                    active_logits = logits.view(-1, self.num_labels)
                    '''print(active_logits)
                    print(active_logits.shape)#torch.Size([320, 6])  4*80*6'''
                    #active_logits = torch.masked_select(active_logits, (active_loss == 1))
                    active_labels = labels.view(-1)  #->torch.Size([320])
                    active_labels = F.one_hot(active_labels, self.num_labels)
                    '''print(labels)
                    print(labels.shape)#torch.Size([4, 80])
                    print(active_labels)
                    print(active_labels.shape)#torch.Size([320,6])
                    print(active_logits)'''
                    mask = attention_mask.view(-1, 1)
                    mask = mask.repeat(1, self.num_labels)
                    '''print(mask)
                    print(mask.shape)#torch.Size([320, 6])'''
                    loss = loss_fct(active_logits, active_labels, mask)
                    #print(loss)
                else:
                    loss = loss_fct(logits.view(-1, self.num_labels),
                                    labels.view(-1))
            elif self.lossfct == 'focalloss':
                loss_fct = FocalLoss()  # 'sum'
                # Only keep active parts of the loss
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                    active_logits = logits.view(-1, self.num_labels)
                    active_labels = torch.where(
                        active_loss, labels.view(-1),
                        torch.tensor(loss_fct.ignore_index).type_as(labels))
                    loss = loss_fct(active_logits,
                                    active_labels)  # 320*6,  320
                else:
                    loss = loss_fct(logits.view(-1, self.num_labels),
                                    labels.view(-1))
            else:
                loss_fct = CrossEntropyLoss(reduction=self.CEL_type)  #'sum'
                # Only keep active parts of the loss
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                    active_logits = logits.view(-1, self.num_labels)
                    active_labels = torch.where(
                        active_loss, labels.view(-1),
                        torch.tensor(loss_fct.ignore_index).type_as(labels))
                    loss = loss_fct(active_logits, active_labels)
                else:
                    loss = loss_fct(logits.view(-1, self.num_labels),
                                    labels.view(-1))

        if not return_dict:
            output = (logits, ) + outputs[1:]
            return ((loss, ) + output) if loss is not None else output

        return XLNetForTokenClassificationOutput(
            loss=loss,
            logits=logits,
            mems=outputs.mems,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
Beispiel #16
0
def train_net(args):
    cropsize = [cfgs.crop_height, cfgs.crop_width]
    # dataset_train = CityScapes(cfgs.data_dir, cropsize=cropsize, mode='train')
    dataset_train = ContextVoc(cfgs.train_file,
                               cropsize=cropsize,
                               mode='train')
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  drop_last=True)
    # dataset_val = CityScapes(cfgs.data_dir,  mode='val')
    dataset_val = ContextVoc(cfgs.val_file, cropsize=cropsize, mode='train')
    dataloader_val = DataLoader(dataset_val,
                                batch_size=1,
                                shuffle=True,
                                num_workers=args.num_workers,
                                drop_last=True)
    # build net
    os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda
    if torch.cuda.is_available() and args.use_gpu:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    # model = BiSeNet(args.num_classes, args.context_path)
    net = DeeplabV3plus(cfgs).to(device)
    # net = SCAR(load_weights=True).to(device)
    if args.pretrained_model_path is not None:
        print('load model from %s ...' % args.pretrained_model_path)
        state_dict = torch.load(args.pretrained_model_path,
                                map_location=device)
        state_dict = renamedict(state_dict)
        net.load_state_dict(state_dict, strict=False)
        # net.load_state_dict(torch.load(args.pretrained_model_path))
        print('Done!')
    if args.mulgpu:
        net = torch.nn.DataParallel(net)
    net.train()
    # build optimizer
    if args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(net.parameters(), args.learning_rate)
    elif args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(net.parameters(),
                                    args.learning_rate,
                                    momentum=0.9,
                                    weight_decay=1e-4)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(net.parameters(), args.learning_rate)
    else:
        print('not supported optimizer \n')
        optimizer = None
    #build loss
    if args.losstype == 'dice':
        criterion = DiceLoss()
    elif args.losstype == 'crossentropy':
        criterion = torch.nn.CrossEntropyLoss()
    elif args.losstype == 'ohem':
        score_thres = 0.7
        n_min = args.batch_size * cfgs.crop_height * cfgs.crop_width // 16
        criterion = OhemCELoss(thresh=score_thres, n_min=n_min)
    elif args.losstype == 'focal':
        # criterion = SoftmaxFocalLoss()
        criterion = FocalLoss()
    elif args.losstype == 'multi':
        criterion = Multiloss(4)
    return net, optimizer, criterion, dataloader_train, dataloader_val
Beispiel #17
0
    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        position_ids=None,
        head_mask=None,
        inputs_embeds=None,
        labels=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):
        r"""
        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
            Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
            1]``.
        """
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.albert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=True,
            return_dict=return_dict,
        )

        sequence_output = outputs[2]
        layers = len(sequence_output)
        batchsize, length, hidden_size = sequence_output[0].size(
            0), sequence_output[0].size(1), sequence_output[0].size(2)

        sequence_output = torch.cat(sequence_output).view(
            layers, batchsize, length, hidden_size)

        sequence_output = sequence_output.transpose(0, 1).transpose(
            1, 2).contiguous()
        sequence_output = self.attn(sequence_output)
        if self.quick_return:
            return sequence_output

        sequence_output = self.dropout(sequence_output)
        logits = self.classifier(sequence_output)

        loss = None
        if labels is not None:
            if self.lossfct == 'diceloss':
                loss_fct = MultiDiceLoss()
                if attention_mask is not None:

                    active_loss = attention_mask.view(-1) == 1

                    active_logits = logits.view(-1, self.num_labels)

                    active_labels = labels.view(-1)
                    active_labels = F.one_hot(active_labels, self.num_labels)

                    mask = attention_mask.view(-1, 1)
                    mask = mask.repeat(1, self.num_labels)

                    loss = loss_fct(active_logits, active_labels, mask)
                    #print(loss)
                else:
                    loss = loss_fct(logits.view(-1, self.num_labels),
                                    labels.view(-1))
            elif self.lossfct == 'focalloss':
                loss_fct = FocalLoss()
                # Only keep active parts of the loss
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                    active_logits = logits.view(-1, self.num_labels)
                    active_labels = torch.where(
                        active_loss, labels.view(-1),
                        torch.tensor(loss_fct.ignore_index).type_as(labels))
                    loss = loss_fct(active_logits, active_labels)
                else:
                    loss = loss_fct(logits.view(-1, self.num_labels),
                                    labels.view(-1))
            else:
                loss_fct = CrossEntropyLoss(reduction=self.CEL_type)
                # Only keep active parts of the loss
                if attention_mask is not None:
                    active_loss = attention_mask.view(-1) == 1
                    active_logits = logits.view(-1, self.num_labels)
                    active_labels = torch.where(
                        active_loss, labels.view(-1),
                        torch.tensor(loss_fct.ignore_index).type_as(labels))
                    loss = loss_fct(active_logits, active_labels)
                else:
                    loss = loss_fct(logits.view(-1, self.num_labels),
                                    labels.view(-1))

        if not return_dict:
            output = (logits, ) + outputs[2:]
            return ((loss, ) + output) if loss is not None else output

        return TokenClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )
class Network_(nn.Module):
    def __init__(self, agent_config):
        '''
                :param agent_config (dict): lr=float,momentum=float,weight_decay=float,
                                                                        schedule=[int],  # The last number in the list is the end of epoch
                                                                        model_type=str,model_name=str,out_dim={task:dim},model_weights=str
                                                                        force_single_head=bool
                                                                        print_freq=int
                                                                        gpuid=[int]'''

        super(Network_, self).__init__()
        self.log = print
        self.config = agent_config

        self.train_loader, self.test_loader = self.config['loaders'][
            'pretrain']

        self.model = self.create_model()
        self.criterion_fn = FocalLoss(
        ) if self.config['loss'] == 'fl' else CrossEntropyLoss()

        if agent_config['gpuid'][0] >= 0:
            self.cuda()
            self.gpu = True
        else:
            self.gpu = False

        self.exp_name = agent_config['exp_name']
        self.init_optimizer()
        self.n_iter = 0

        self.writer = SummaryWriter(log_dir="runs/" + self.exp_name)
        self.save_after = self.config['save_after']

    def init_optimizer(self):
        optimizer_arg = {
            'params': self.model.parameters(),
            'lr': self.config['lr'],
            'weight_decay': self.config['weight_decay']
        }
        if self.config['optimizer'] in ['SGD', 'RMSprop']:
            optimizer_arg['momentum'] = self.config['momentum']
            optimizer_arg['nesterov'] = self.config['nesterov']
        elif self.config['optimizer'] in ['Rprop']:
            optimizer_arg.pop('weight_decay')
        elif self.config['optimizer'] == 'amsgrad':
            optimizer_arg['amsgrad'] = True
            self.config['optimizer'] = 'Adam'

        self.optimizer = torch.optim.__dict__[self.config['optimizer']](
            **optimizer_arg)
        self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
            self.optimizer,
            milestones=self.config['schedule'],
            gamma=self.config['gamma'])

    # def freeze_layers(self):
    #       if self.config['freeze']:

    def create_model(self):
        if self.config['model_name'] == 'RESNET18':
            model = RESNET18()
        else:
            model = models.__dict__[self.config['model_name']](
                pretrained=self.config['pretrain_in'])

        # Freeze training for all "features" layers
        if self.config['freeze']:
            for param in model.parameters():
                param.requires_grad = False

        n_inp = model.fc.in_features
        model.fc = nn.Linear(n_inp, len(self.train_loader.dataset.class_list))
        return model

    # Load pre-trained weights
    def load_model(self):
        if self.config['model_weights'] is not None:
            print('=> Load model weights:', self.config['model_weights'])
            model_state = torch.load(
                self.config['model_weights'],
                map_location=lambda storage, loc: storage)  # Load to CPU.
            self.model.load_state_dict(model_state)
            print('=> Load Done')
        return self.model

    def criterion(self, preds, targets):
        loss = self.criterion_fn(preds, targets)
        return loss

    def cuda(self):
        torch.cuda.set_device(self.config['gpuid'][0])
        self.model = self.model.cuda()
        self.criterion_fn = self.criterion_fn.cuda()
        # Multi-GPU
        if len(self.config['gpuid']) > 1:
            self.model = torch.nn.DataParallel(
                self.model,
                device_ids=self.config['gpuid'],
                output_device=self.config['gpuid'][0])
        return self

    def forward(self, x):
        return self.model.forward(x)

    def switch_finetune(self):
        print('Switched to new task FINETUNING')
        self.train_loader, self.test_loader = self.config['loaders'][
            'finetune']

        # Freeze training for all "features" layers
        if self.config['finetune_freeze']:

            for name, param in self.model.named_parameters():
                if any(substring in name
                       for substring in self.config['freeze_layers']):
                    print(name)
                    param.requires_grad = True
                else:
                    param.requires_grad = False

        else:
            for param in self.model.parameters():
                param.requires_grad = True

        print('FINETUNING number of classes are ',
              len(self.train_loader.dataset.class_list))
        n_inp = self.model.fc.in_features
        self.model.fc = nn.Linear(n_inp,
                                  len(self.train_loader.dataset.class_list))

        self.config['lr'] = self.config['finetune_lr']
        self.init_optimizer()
        self.cuda()

        # switch train and test loaders
        # switch model's layers or freeze them
        # change lr or criterion if required
        # change tensorboard suffixes
        return

    def accumulate_acc(self, output, target, meter):
        meter.update(accuracy(output, target), len(target))
        return meter

    def update_model(self, inputs, targets):
        out = self.forward(inputs)
        loss = self.criterion(out, targets)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        return loss.detach(), out

    def validation(self, test_loader, from_train=1):
        # this might possibly change for other incremental scenario
        # This function doesn't distinguish tasks.
        batch_timer = Timer()
        acc = AverageMeter()
        losses = AverageMeter()
        acc_5 = AverageMeter()
        acc_class = [
            AverageMeter()
            for i in range(len(self.train_loader.dataset.class_list))
        ]  #[AverageMeter()] *  len(self.train_loader.dataset.class_list)
        acc_class_5 = [
            AverageMeter()
            for i in range(len(self.train_loader.dataset.class_list))
        ]
        batch_timer.tic()
        orig_mode = self.training
        self.eval()
        for i, (input, target) in enumerate(test_loader):

            if self.gpu:
                with torch.no_grad():
                    input = input.cuda()
                    target = target.cuda()
                    output = self.forward(input)
                    loss = self.criterion(output, target)

            losses.update(loss, input.size(0))
            # Summarize the performance of all tasks, or 1 task, depends on dataloader.
            # Calculated by total number of data.

            t_acc, acc_class = accuracy(
                output, target, topk=(1, ), avg_meters=acc_class
            )  #self.accumulate_acc(output, target, acc)
            t_acc_5, acc_class_5 = accuracy(output,
                                            target,
                                            topk=(5, ),
                                            avg_meters=acc_class_5)
            # import pdb; pdb.set_trace()
            acc.update(t_acc, len(target))
            acc_5.update(t_acc_5, len(target))

        class_list = self.train_loader.dataset.class_list.inverse
        acc_cl_1 = {}
        acc_cl_5 = {}

        #from accuracies obtained create inst size based accuracies
        inst_clss_lst = self.train_loader.dataset.class_inst_list
        # import pdb; pdb.set_trace()
        for ins_clss_, insts in inst_clss_lst.items():
            cls_sum = sum([acc_class[inst].sum for inst in insts])
            cls_cnt = sum([acc_class[inst].count for inst in insts])
            if cls_cnt == 0:
                import pdb
                pdb.set_trace()
            inst_avg = cls_sum / cls_cnt

            self.writer.add_scalar(self.str_ + '/Acc_1_{}'.format(ins_clss_),
                                   inst_avg, self.n_iter)

            cls_sum_5 = sum([acc_class_5[inst].sum for inst in insts])
            cls_cnt_5 = sum([acc_class_5[inst].count for inst in insts])
            inst_avg_5 = cls_sum_5 / cls_cnt_5
            self.writer.add_scalar(self.str_ + '/Acc_5_{}'.format(ins_clss_),
                                   inst_avg_5, self.n_iter)

        for idx, cl_ in class_list.items():
            acc_cl_1[cl_] = [
                acc_class[idx].avg, acc_class[idx].sum, acc_class[idx].count
            ]
            acc_cl_5[cl_] = [
                acc_class_5[idx].avg, acc_class_5[idx].sum,
                acc_class_5[idx].count
            ]
            # self.log(' * Val Acc {acc.avg:.3f} for class {cls}, {acc.sum} / {acc.count} '
            #       .format(acc=acc_class[idx], cls=cl_))

        self.train(orig_mode)

        self.log(' * Val Acc {acc.avg:.3f}, Total time {time:.2f}'.format(
            acc=acc, time=batch_timer.toc()))
        if from_train:
            return acc, losses
        else:
            return acc, acc_5, acc_cl_1, acc_cl_5, losses

    def predict(self, inputs):
        self.model.eval()
        out = self.forward(inputs)
        return out

    def save_model(self, filename):
        dir_ = os.path.join('models', self.exp_name)
        if not os.path.exists(dir_):
            os.makedirs(dir_)
        model_state = self.model.state_dict()
        for key in model_state.keys():  # Always save it to cpu
            model_state[key] = model_state[key].cpu()
        print('=> Saving model to:', filename)
        torch.save(model_state, os.path.join(dir_, filename + '.pth'))
        print('=> Save Done')

    def train_(self, epochs, finetune=False):
        str_ = 'pretrain'
        self.str_ = str_

        if finetune:
            self.switch_finetune()
            str_ = 'finetune'
            self.str_ = str_

        for epoch in range(epochs):

            data_timer = Timer()
            batch_timer = Timer()
            batch_time = AverageMeter()
            data_time = AverageMeter()
            losses = AverageMeter()
            acc = AverageMeter()
            self.model.train()
            self.scheduler.step(epoch)
            if self.config['train_between']:
                if epoch == self.config['schedule'][0]:
                    for param in self.model.parameters():
                        param.requires_grad = True
                    #self.config['lr'] = 0.01
                    self.config['weight_decay'] = 5e-4
                    self.init_optimizer()
                if self.config['switch_all']:
                    if epoch == self.config['switch_all']:
                        self.config['weight_decay'] = 5e-3
                        for param in self.model.parameters():
                            param.requires_grad = True
                        self.init_optimizer()
                    #self.config['lr'] = 0.01

            for param_group in self.optimizer.param_groups:
                self.log('LR:', param_group['lr'])

            self.log('Itr\t\tTime\t\t  Data\t\t  Loss\t\tAcc')
            self.log('{0} Epoch:{1}'.format(str_, epoch))

            data_timer.tic()
            batch_timer.tic()

            for i, (input, target) in enumerate(self.train_loader):
                self.model.train()
                data_time.update(data_timer.toc())  # measure data loading time

                if self.gpu:
                    input = input.cuda()
                    target = target.cuda()

                loss, output = self.update_model(input, target)
                input = input.detach()
                target = target.detach()

                # measure accuracy and record loss
                acc = self.accumulate_acc(output, target, acc)
                losses.update(loss, input.size(0))
                batch_time.update(batch_timer.toc())  # measure elapsed time
                data_timer.toc()
                self.n_iter = (epoch) * len(self.train_loader) + i
                self.writer.add_scalar(str_ + '/Loss_train', losses.avg,
                                       self.n_iter)
                self.writer.add_scalar(str_ + '/Acc_train', acc.avg,
                                       self.n_iter)
                # if ((self.config['print_freq']>0) and (i % self.config['print_freq'] == 0)) or (i+1)==len(train_loader):
                self.log('[{0}/{1}]\t'
                         '{batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                         '{data_time.val:.4f} ({data_time.avg:.4f})\t'
                         '{loss.val:.3f} ({loss.avg:.3f})\t'
                         '{acc.val:.2f} ({acc.avg:.2f})'.format(
                             i,
                             len(self.train_loader),
                             batch_time=batch_time,
                             data_time=data_time,
                             loss=losses,
                             acc=acc))

            acc_v, loss_v = self.validation(self.test_loader)
            self.writer.add_scalar(str_ + '/Loss_test', loss_v.avg,
                                   self.n_iter)
            self.writer.add_scalar(str_ + '/Acc_test', acc_v.avg, self.n_iter)

            if epoch % self.save_after == 0 and epoch != 0:
                self.save_model(str_ + str(epoch))
def main(args):

  best_acc = -1

  logger = bit_common.setup_logger(args)
  cp, cn = smooth_BCE(eps=0.1)

  # Lets cuDNN benchmark conv implementations and choose the fastest.
  # Only good if sizes stay the same within the main loop!
  torch.backends.cudnn.benchmark = True

  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  logger.info(f"Going to train on {device}")

  classes = 5

  train_set, valid_set, train_loader, valid_loader = mktrainval(args, logger)
  print(len(train_loader))
  logger.info(f"Loading model from {args.model}.npz")
  model = models.KNOWN_MODELS[args.model](head_size=classes, zero_head=True)
  model.load_from(np.load(f"{args.model}.npz"))

  logger.info("Moving model onto all GPUs")
  model = torch.nn.DataParallel(model)

  # Optionally resume from a checkpoint.
  # Load it to CPU first as we'll move the model to GPU later.
  # This way, we save a little bit of GPU memory when loading.
  start_epoch = 0

  # Note: no weight-decay!
  optim = torch.optim.SGD(model.parameters(), lr=0.003, momentum=0.9)

  # Resume fine-tuning if we find a saved model.
  savename = pjoin(args.logdir, args.name, "bit.pth.tar")
  try:
    logger.info(f"Model will be saved in '{savename}'")
    checkpoint = torch.load(savename, map_location="cpu")
    logger.info(f"Found saved model to resume from at '{savename}'")

    start_epoch = checkpoint["epoch"]
    model.load_state_dict(checkpoint["model"])
    optim.load_state_dict(checkpoint["optim"])
    logger.info(f"Resumed at epoch {start_epoch}")
  except FileNotFoundError:
    logger.info("Fine-tuning from BiT")

  model = model.to(device)
  optim.zero_grad()

  model.train()
  #mixup = bit_hyperrule.get_mixup(len(train_set))
  mixup = -1
  from focalloss import FocalLoss
  cri = FocalLoss(gamma=1.0)
  #cri = torch.nn.CrossEntropyLoss().to(device)

  logger.info("Starting training!")
  chrono = lb.Chrono()
  accum_steps = 0
  mixup_l = np.random.beta(mixup, mixup) if mixup > 0 else 1
  end = time.time()

  epoches = 20
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optim, max_lr=0.01, steps_per_epoch=1, epochs=epoches)

  with lb.Uninterrupt() as u:
      for epoch in range(start_epoch, epoches):

          pbar = enumerate(train_loader)
          pbar = tqdm.tqdm(pbar, total=len(train_loader))

          scheduler.step()
          all_top1, all_top5 = [], []
          for param_group in optim.param_groups:
              lr = param_group["lr"]
          #for x, y in recycle(train_loader):
          for batch_id, (x, y) in pbar:
          #for batch_id, (x, y) in enumerate(train_loader):
            # measure data loading time, which is spent in the `for` statement.
            chrono._done("load", time.time() - end)

            if u.interrupted:
              break

            # Schedule sending to GPU(s)
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            # Update learning-rate, including stop training if over.
            #lr = bit_hyperrule.get_lr(step, len(train_set), args.base_lr)            
            #if lr is None:
            #  break

            if mixup > 0.0:
              x, y_a, y_b = mixup_data(x, y, mixup_l)

            # compute output
            with chrono.measure("fprop"):
              logits = model(x)
              top1, top5 = topk(logits, y, ks=(1, 5))
              all_top1.extend(top1.cpu())
              all_top5.extend(top5.cpu())
              if mixup > 0.0:
                c = mixup_criterion(cri, logits, y_a, y_b, mixup_l)
              else:
                c = cri(logits, y)
            train_loss = c.item()
            train_acc  = np.mean(all_top1)*100.0
            # Accumulate grads
            with chrono.measure("grads"):
              (c / args.batch_split).backward()
              accum_steps += 1
            accstep = f"({accum_steps}/{args.batch_split})" if args.batch_split > 1 else ""
            s = f"epoch={epoch} batch {batch_id}{accstep}: loss={train_loss:.5f} train_acc={train_acc:.2f} lr={lr:.1e}"
            #s = f"epoch={epoch} batch {batch_id}{accstep}: loss={c.item():.5f} lr={lr:.1e}"
            pbar.set_description(s)
            #logger.info(f"[batch {batch_id}{accstep}]: loss={c_num:.5f} (lr={lr:.1e})")  # pylint: disable=logging-format-interpolation
            logger.flush()

            # Update params
            with chrono.measure("update"):
                optim.step()
                optim.zero_grad()
            # Sample new mixup ratio for next batch
            mixup_l = np.random.beta(mixup, mixup) if mixup > 0 else 1

          # Run evaluation and save the model.
          val_loss, val_acc = run_eval(model, valid_loader, device, chrono, logger, epoch)

          best = val_acc > best_acc
          if best:
              best_acc = val_acc
              torch.save({
                  "epoch": epoch,
                  "val_loss": val_loss,
                  "val_acc": val_acc,
                  "train_acc": train_acc,
                  "model": model.state_dict(),
                  "optim" : optim.state_dict(),
              }, savename)
          end = time.time()

  logger.info(f"Timings:\n{chrono}")
Beispiel #20
0
def main_worker(index, opt):
    random.seed(opt.manual_seed)
    np.random.seed(opt.manual_seed)
    torch.manual_seed(opt.manual_seed)

    if index >= 0 and opt.device.type == 'cuda':
        opt.device = torch.device(f'cuda:{index}')

    if opt.distributed:
        opt.dist_rank = opt.dist_rank * opt.ngpus_per_node + index
        dist.init_process_group(backend='nccl',
                                init_method=opt.dist_url,
                                world_size=opt.world_size,
                                rank=opt.dist_rank)
        opt.batch_size = int(opt.batch_size / opt.ngpus_per_node)
        opt.n_threads = int(
            (opt.n_threads + opt.ngpus_per_node - 1) / opt.ngpus_per_node)
    opt.is_master_node = not opt.distributed or opt.dist_rank == 0

    model = generate_model(opt)
    if opt.batchnorm_sync:
        assert opt.distributed, 'SyncBatchNorm only supports DistributedDataParallel.'
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    if opt.pretrain_path:
        model = load_pretrained_model(model, opt.pretrain_path, opt.model,
                                      opt.n_finetune_classes)
    if opt.resume_path is not None:
        model = resume_model(opt.resume_path, opt.arch, model)
    model = make_data_parallel(model, opt.distributed, opt.device)

    if opt.pretrain_path:
        parameters = get_fine_tuning_parameters(model, opt.ft_begin_module)
    else:
        parameters = model.parameters()

    if opt.is_master_node:
        print(model)

    #criterion = CrossEntropyLoss().to(opt.device)
    # ADDED for 231n
    criterion = FocalLoss().to(opt.device)

    if not opt.no_train:
        (train_loader, train_sampler, train_logger, train_batch_logger,
         optimizer, scheduler) = get_train_utils(opt, parameters)
        if opt.resume_path is not None:
            opt.begin_epoch, optimizer, scheduler = resume_train_utils(
                opt.resume_path, opt.begin_epoch, optimizer, scheduler)
            if opt.overwrite_milestones:
                scheduler.milestones = opt.multistep_milestones
    if not opt.no_val:
        val_loader, val_logger = get_val_utils(opt)

    if opt.tensorboard and opt.is_master_node:
        from torch.utils.tensorboard import SummaryWriter
        if opt.begin_epoch == 1:
            tb_writer = SummaryWriter(log_dir=opt.result_path)
        else:
            tb_writer = SummaryWriter(log_dir=opt.result_path,
                                      purge_step=opt.begin_epoch)
    else:
        tb_writer = None

    conf_mtx_dict = {}  # ADDED for CS231n

    prev_val_loss = None
    for i in range(opt.begin_epoch, opt.n_epochs + 1):
        if not opt.no_train:
            if opt.distributed:
                train_sampler.set_epoch(i)
            current_lr = get_lr(optimizer)
            train_epoch(i, train_loader, model, criterion, optimizer,
                        opt.device, current_lr, train_logger,
                        train_batch_logger, tb_writer, opt.distributed)

            if i % opt.checkpoint == 0 and opt.is_master_node:
                save_file_path = opt.result_path / 'save_{}.pth'.format(i)
                save_checkpoint(save_file_path, i, opt.arch, model, optimizer,
                                scheduler)

        if not opt.no_val:
            prev_val_loss = val_epoch(i, val_loader, model, criterion,
                                      opt.device, val_logger, tb_writer,
                                      opt.distributed,
                                      conf_mtx_dict)  # ADDED for CS231n

        # ADDED for 231n - uncomment if using cross entropy loss
        #if not opt.no_train and opt.lr_scheduler == 'multistep':
        #    scheduler.step()
        #elif not opt.no_train and opt.lr_scheduler == 'plateau':
        #    scheduler.step(prev_val_loss)

    if opt.inference:
        inference_loader, inference_class_names = get_inference_utils(opt)
        inference_result_path = opt.result_path / '{}.json'.format(
            opt.inference_subset)

        inference.inference(inference_loader, model, inference_result_path,
                            inference_class_names, opt.inference_no_average,
                            opt.output_topk)

    # ADDED for CS231n
    conf_mtx_file = csv.writer(open("conf_mtxs.csv", "w+"))
    for key, val in conf_mtx_dict.items():
        conf_mtx_file.writerow([key, val])
Beispiel #21
0
def main():
    torch.backends.cudnn.benchmark = True
    global best_loss, args, use_gpu
    args = parser.parse_args()
    use_gpu = torch.cuda.is_available()

    train_imgdir = '/home/zhaoyang/data/voc0712/train'
    test_imgdir = '/home/zhaoyang/data/voc0712/test'
    train_annotation_file = '/home/zhaoyang/data/voc0712/annotation/train_annotation.txt'
    test_annotaiion_file = '/home/zhaoyang/data/voc0712/annotation/test_annotation.txt'
    normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
    train_set = VocDataset(train_imgdir,
                           train_annotation_file,
                           input_size=[600, 600],
                           transform=transforms.Compose(
                               [transforms.ToTensor(), normalizer]))
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=8,
                                               shuffle=True,
                                               num_workers=8,
                                               collate_fn=train_set.collate_fn)
    test_set = VocDataset(test_imgdir,
                          test_annotaiion_file,
                          input_size=[600, 600],
                          transform=transforms.Compose(
                              [transforms.ToTensor(), normalizer]))
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=4,
                                              shuffle=True,
                                              num_workers=8,
                                              collate_fn=test_set.collate_fn)

    model = retinanet101(pretrained=True, num_classes=args.num_classes)
    # model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
    optimizer = optim.SGD(model.parameters(),
                          args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                  milestones=[80, 120],
                                                  gamma=0.1)
    #criterion = FocalLoss(args.alpha * torch.ones(args.num_classes, 1), args.gamma)
    criterion = FocalLoss()

    if args.resume:
        if os.path.isfile(args.resume):
            print('Loading model from {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            model = model.load_state_dict(checkpoint['model'])
            optimizer = optimizer.load_state_dict((checkpoint['optimizer']))
            args.start_epoch = checkpoint['epoch']
            best_loss = checkpoint['best_loss']
            print('Loaded model from {} (epoch {})'.format(
                args.resume, args.start_epoch))
        else:
            print('No checkpoint founded in {}'.format(args.resume))

    if use_gpu:
        model.cuda()
        criterion.cuda()

    for epoch in xrange(args.start_epoch, args.epochs):
        #test_loss = test_model(model, test_loader, criterion)
        #print("test loss", test_loss)
        lr_scheduler.step()
        train_model(model, train_loader, optimizer, criterion, epoch)
        if (epoch + 1) % args.test_freq == 0:
            test_loss = test_model(model, test_loader, criterion)
            if test_loss <= best_loss:
                best_loss = test_loss
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'model': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'best_loss': best_loss
                    }, epoch + 1, True)
                print('best test loss: {}'.format(best_loss))
        if (epoch + 1) % args.save_freq == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_loss': best_loss
                }, epoch + 1, False)
def run_experiment(_exp_name, _epochs, _train_manifest, _test_manifest,
                   _labels, _use_mfcc_in, _use_ivectors_in, _use_embeddings_in,
                   _use_transcripts_out, _use_accents_out, _batch_size,
                   _num_workers, _mfcc_size, _ivector_size, _embedding_size,
                   _rnn_type, _rnn_hidden_size, _nb_head_layers,
                   _nb_speech_layers, _nb_accents_layers, _bidirectional,
                   _losses_mix, _learning_rate, _lm_path, _decoder_alpha,
                   _decoder_beta, _decoder_cutoff_top_n, _decoder_beam_width,
                   _cuda, _tensorboard_path, _saved_models_path,
                   _bottleneck_size, _accent_loss):

    print(f'\n##### Running experiment {_exp_name} #####')

    # Tools to log values
    results_dict = {}
    results_dict['train_loss'] = []
    results_dict['train_loss_text'] = []
    results_dict['train_loss_accent'] = []
    results_dict['test_loss'] = []
    results_dict['test_loss_text'] = []
    results_dict['test_loss_accent'] = []
    results_dict['test_wer'] = []
    results_dict['test_accent_acc'] = []

    tb_path = Path(_tensorboard_path) / _exp_name
    makedirs(tb_path, exist_ok=True)
    tb_writer = SummaryWriter(tb_path)

    ### DATA LOADING

    # Training set
    train_dataset = MultiDataset(_train_manifest,
                                 _labels,
                                 use_mfcc_in=_use_mfcc_in,
                                 use_ivectors_in=_use_ivectors_in,
                                 use_embeddings_in=_use_embeddings_in,
                                 embedding_size=_embedding_size,
                                 use_transcripts_out=_use_transcripts_out,
                                 use_accents_out=_use_accents_out)

    train_loader = MultiDataLoader(train_dataset,
                                   batch_size=_batch_size,
                                   shuffle=True,
                                   num_workers=_num_workers)

    # Testing set
    test_dataset = MultiDataset(_test_manifest,
                                _labels,
                                use_mfcc_in=_use_mfcc_in,
                                use_ivectors_in=_use_ivectors_in,
                                use_embeddings_in=_use_embeddings_in,
                                embedding_size=_embedding_size,
                                use_transcripts_out=_use_transcripts_out,
                                use_accents_out=_use_accents_out)

    test_loader = MultiDataLoader(test_dataset,
                                  batch_size=_batch_size,
                                  shuffle=True,
                                  num_workers=_num_workers)

    ### CREATE MODEL

    model = MultiTask(use_mfcc_in=_use_mfcc_in,
                      use_ivectors_in=_use_ivectors_in,
                      use_embeddings_in=_use_embeddings_in,
                      use_transcripts_out=_use_transcripts_out,
                      use_accents_out=_use_accents_out,
                      mfcc_size=_mfcc_size,
                      ivector_size=_ivector_size,
                      embedding_size=_embedding_size,
                      rnn_type=_rnn_type,
                      labels=_labels,
                      accents_dict=train_dataset.accent_dict,
                      rnn_hidden_size=_rnn_hidden_size,
                      nb_head_layers=_nb_head_layers,
                      nb_speech_layers=_nb_speech_layers,
                      nb_accents_layers=_nb_accents_layers,
                      bidirectional=_bidirectional,
                      bottleneck_size=_bottleneck_size,
                      DEBUG=False)
    if _cuda:
        model = model.cuda()

    print(model, '\n')
    print('Model parameters counts:', MultiTask.get_param_size(model), '\n')

    ### OPTIMIZER, CRITERION, DECODER

    # Optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=_learning_rate)

    # Criterion
    if _use_accents_out:
        if _accent_loss == 'focal':
            AccLoss = FocalLoss()
        elif _accent_loss == 'CE':
            AccLoss = nn.CrossEntropyLoss()
        else:
            raise ValueError(
                f'Loss {_accent_loss} for accent_loss is unknown. Please use either "focal" or "CE".'
            )

    if not _use_transcripts_out:  # only accent classification
        criterion = AccLoss
    elif not _use_accents_out:  # only text recognition
        criterion = nn.CTCLoss()
    else:  # both tasks
        criterion = (nn.CTCLoss(), FocalLoss())

    # Decoder
    if _use_transcripts_out:
        decoder = BeamCTCDecoder(_labels,
                                 lm_path=_lm_path,
                                 alpha=_decoder_alpha,
                                 beta=_decoder_beta,
                                 cutoff_top_n=_decoder_cutoff_top_n,
                                 cutoff_prob=_decoder_cutoff_top_n,
                                 beam_width=_decoder_beam_width,
                                 num_processes=_num_workers)

        target_decoder = GreedyDecoder(_labels)
    else:
        decoder, target_decoder = None, None

    ### EPOCHS
    best_wer = math.inf
    best_acc = 0

    for epoch in range(1, _epochs + 1):
        ### TRAIN
        print(f'Epoch {epoch} training: {exp_name}')
        train_results = train(model,
                              train_loader,
                              criterion,
                              optimizer,
                              losses_mix=_losses_mix)
        train_loss, train_loss_text, train_loss_accent = train_results

        results_dict['train_loss'].append(train_loss)
        results_dict['train_loss_text'].append(train_loss_text)
        results_dict['train_loss_accent'].append(train_loss_accent)
        print(f'Epoch {epoch} training loss: {train_loss}')

        ### TEST
        print(f'Epoch {epoch} testing')
        test_results = test(model,
                            test_loader,
                            criterion,
                            decoder,
                            target_decoder,
                            losses_mix=_losses_mix)
        test_loss, test_loss_text, test_loss_accent, test_wer, test_accent_acc = test_results

        results_dict['test_loss'].append(test_loss)
        results_dict['test_loss_text'].append(test_loss_text)
        results_dict['test_loss_accent'].append(test_loss_accent)
        results_dict['test_wer'].append(test_wer)
        results_dict['test_accent_acc'].append(test_accent_acc)
        print(f'Epoch {epoch} testing loss: {test_loss}')

        # Add values to tensorboard
        for key, results in results_dict.items():
            tb_writer.add_scalar(key, results[-1], epoch)

        #Save model if it is best
        save_new = False
        if _use_transcripts_out:
            if test_wer < best_wer:
                save_new = True
                best_wer = test_wer
        else:
            if test_accent_acc > best_acc:
                save_new = True
                best_acc = test_accent_acc

        if save_new:
            MultiTask.serialize(
                model,
                Path(_saved_models_path) / _exp_name,
                save=True,
                exp_name=_exp_name,
                optimizer=optimizer,
                epoch=epoch,
                train_losses=results_dict['train_loss'],
                test_losses=results_dict['test_loss'],
                text_train_losses=results_dict['train_loss_text'],
                text_test_losses=results_dict['test_loss_text'],
                text_wers=results_dict['test_wer'],
                accent_train_losses=results_dict['train_loss_accent'],
                accent_test_losses=results_dict['test_loss_accent'],
                accent_accuracies=results_dict['test_accent_acc'])

    del model
    gc.collect()
    torch.cuda.empty_cache()
Beispiel #23
0
def main():
    torch.backends.cudnn.benchmark = True
    global best_loss, args, use_gpu
    global step, test_loader
    args = parser.parse_args()
    use_gpu = torch.cuda.is_available()

    train_imgdir = '/home/zhaoyang/data/voc0712/train'
    test_imgdir = '/home/zhaoyang/data/voc0712/test'
    train_annotation_file = '/home/zhaoyang/data/voc0712/annotation/train_annotation.txt'
    test_annotaiion_file = '/home/zhaoyang/data/voc0712/annotation/test_annotation.txt'
    normalizer = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225])
    train_set = VocDataset(train_imgdir,
                           train_annotation_file,
                           input_size=[600, 600],
                           transform=transforms.Compose(
                               [transforms.ToTensor(), normalizer]))
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=16,
                                               shuffle=True,
                                               num_workers=8,
                                               collate_fn=train_set.collate_fn)
    test_set = VocDataset(test_imgdir,
                          test_annotaiion_file,
                          input_size=[600, 600],
                          transform=transforms.Compose(
                              [transforms.ToTensor(), normalizer]))
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=4,
                                              shuffle=True,
                                              num_workers=8,
                                              collate_fn=test_set.collate_fn)

    model = retinanet101(pretrained=True, num_classes=args.num_classes)
    model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3])
    optimizer = optim.SGD(model.parameters(),
                          0.001,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
    #lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 40], gamma=0.1)
    #criterion = FocalLoss(args.alpha * torch.ones(args.num_classes, 1), args.gamma)
    criterion = FocalLoss()

    if args.resume:
        if os.path.isfile(args.resume):
            print('Loading model from {}'.format(args.resume))
            checkpoint = torch.load(args.resume)
            model = model.load_state_dict(checkpoint['model'])
            optimizer = optimizer.load_state_dict((checkpoint['optimizer']))
            args.start_step = checkpoint['step']
            best_loss = checkpoint['best_loss']
            print('Loaded model from {} (epoch {})'.format(
                args.resume, args.start_epoch))
        else:
            print('No checkpoint founded in {}'.format(args.resume))

    if use_gpu:
        model.cuda()
        criterion.cuda()
    step = args.start_step
    while step <= args.steps:
        #lr_scheduler.step()
        train_model(model, train_loader, optimizer, criterion)
def unet_train():

    batch_size = 1
    num_epochs = [5000, 5000, 5000, 5000, 5000, 5000, 5000, 5000]
    num_workers = 2
    lr = 0.0001

    losslist = ['dice']  # ['focal', 'bce', 'dice', 'lovasz']
    optimlist = ['adam']  # ['adam', 'sgd']
    iflog = True

    SC_root_dir = '../dataset-EdmSealedCrack-512'
    train_files, val_files, test_files = myutils.organize_SC_files(SC_root_dir)

    train_RC_dataset = DatasetRealCrack('../dataset-EdmCrack600-512/A/train',
                                        transform=transform)
    train_SC_dataset = DatasetSealedCrack(files=train_files,
                                          root_dir=SC_root_dir,
                                          transform=data_Train_transforms)
    val_RC_dataset = DatasetRealCrack('../dataset-EdmCrack600-512/A/val',
                                      transform=transform)
    val_SC_dataset = DatasetSealedCrack(files=val_files,
                                        root_dir=SC_root_dir,
                                        transform=data_Test_transforms)

    train_loader = torch.utils.data.DataLoader(ConcatDataset(
        train_RC_dataset, train_SC_dataset),
                                               batch_size=2,
                                               shuffle=True,
                                               num_workers=2)

    criterion = nn.BCELoss()
    focallos = FocalLoss(gamma=2)
    doubleFocalloss = focalloss.FocalLoss_2_datasets(gamma=2)

    epoidx = -1
    for los in losslist:
        for opt in optimlist:
            start = time.time()
            print(los, opt)
            torch.manual_seed(77)
            torch.cuda.manual_seed(77)
            #################
            #unet = Unet_SpatialPyramidPooling(3).cuda()
            #################
            unet = Unet(3).cuda()
            SC_classifier = classifier(64, 2).cuda()
            RC_classifier = classifier(64, 2).cuda()

            ##################
            #unet = smp.Unet('resnet34', encoder_weights='imagenet').cuda()
            #unet.segmentation_head = torch.nn.Sequential().cuda()
            #SC_classifier = classifier(16, 2).cuda()
            #RC_classifier = classifier(16, 2).cuda()

            #UNCOMMENT TO KEEP TRAINING THE BEST MODEL
            prev_epoch = 0  # if loading model 58, change to prev_epoch = 58. When saving the model, it is going to be named as 59, 60, 61...
            #unet.load_state_dict(torch.load('trained_models/unet_adam_dice_58.pkl'))
            #SC_classifier.load_state_dict(torch.load('trained_models/SC_classifier_adam_dice_58.pkl'))
            #RC_classifier.load_state_dict(torch.load('trained_models/RC_classifier_adam_dice_58.pkl'))

            history = []
            if 'adam' in opt:
                optimizer = torch.optim.Adam(unet.parameters(), lr=lr)
            elif 'sgd' in opt:
                optimizer = torch.optim.SGD(unet.parameters(),
                                            lr=10 * lr,
                                            momentum=0.9)

            logging.basicConfig(filename='./logs/logger_unet.log',
                                level=logging.INFO)

            total_step = len(train_loader)
            epoidx += 1
            for epoch in range(num_epochs[epoidx]):
                totalloss = 0
                for i, (realCrack_batch,
                        sealedCrack_batch) in enumerate(train_loader):
                    SC_images = sealedCrack_batch[0].cuda()
                    SC_masks = sealedCrack_batch[1].cuda()
                    RC_images = realCrack_batch[0].cuda()
                    RC_masks = realCrack_batch[1].cuda()
                    SC_encoder = unet(SC_images)
                    RC_encoder = unet(RC_images)
                    #############
                    SC_outputs = SC_classifier(SC_encoder)
                    RC_outputs = RC_classifier(RC_encoder)
                    #############
                    #Deep lab v3
                    #SC_outputs = SC_classifier(SC_encoder['out'])
                    #RC_outputs = RC_classifier(RC_encoder['out'])
                    ##############
                    if 'bce' in los:
                        masks = onehot(masks)
                        loss = criterion(outputs, masks)
                    elif 'dice' in los:
                        branch_RC = {'outputs': RC_outputs, 'masks': RC_masks}
                        branch_SC = {'outputs': SC_outputs, 'masks': SC_masks}
                        loss = dice_loss_2_datasets(branch_RC, branch_SC)
                        #masks = onehot(masks)
                        #loss = dice_loss(outputs, masks)
                    elif 'lovasz' in los:
                        masks = onehot(masks)
                        loss = L.lovasz_hinge(outputs, masks)
                    elif 'focal' in los:
                        #loss = focallos(outputs, masks.long())
                        branch_RC = {
                            'outputs': RC_outputs,
                            'masks': RC_masks.long()
                        }
                        branch_SC = {
                            'outputs': SC_outputs,
                            'masks': SC_masks.long()
                        }
                        loss = doubleFocalloss(branch_RC, branch_SC)
                    totalloss += loss * RC_images.size(0)  #*2?
                    #print(RC_images.size(0))

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    if i % 10 == 0:
                        print(epoch, i)
                        print("total loss: ", totalloss)
                    if i % 1000 == 0:
                        print("Epoch:%d;     Iteration:%d;      Loss:%f" %
                              (epoch, i, loss))

                    if i + 1 == total_step:  # and epoch%1==0: #and val_miou>0.85:
                        torch.save(
                            unet.state_dict(),
                            './trained_models/unet_' + opt + '_' + los + '_' +
                            str(epoch + 1 + prev_epoch) + '.pkl')
                        torch.save(
                            RC_classifier.state_dict(),
                            './trained_models/RC_classifier_' + opt + '_' +
                            los + '_' + str(epoch + 1 + prev_epoch) + '.pkl')
                        torch.save(
                            SC_classifier.state_dict(),
                            './trained_models/SC_classifier_' + opt + '_' +
                            los + '_' + str(epoch + 1 + prev_epoch) + '.pkl')
                history_np = np.array(history)
                np.save('./logs/unet_' + opt + '_' + los + '.npy', history_np)
            end = time.time()
            print((end - start) / 60)
Beispiel #25
0
def visualize_example(data_loader=None,
                      sample=None,
                      model=None,
                      classifier=None):
    '''
    Input: 
      - data_loader: torch DataLoader, which will generate the plotted image
      - sample: dictionary specifying {image, anotation} to be plotted
      - model: torch model to calculate predictions

    Output: image, groud truth, prediction, superposition of image and prediction
    '''
    if (data_loader is None) and (sample is None):
        return
    focallos = FocalLoss(gamma=2)
    if model is not None:
        pass
        #model.eval()
        #classifier.eval()
    if data_loader is not None:
        dataiter = iter(data_loader)
        image, anotation = dataiter.next()
    else:
        image, anotation = sample['image'], sample['anotation']

    image = image.to('cuda')
    anotation = anotation.type('torch.LongTensor')
    anotation = anotation.to('cuda')
    if model is not None:
        with torch.no_grad():
            outputs = model(image)  #unsqueeze?
            outputs = classifier(outputs)
    invTrans = transforms.Compose([
        transforms.Normalize(mean=[0., 0., 0.],
                             std=[1 / 0.5, 1 / 0.5, 1 / 0.5]),
        transforms.Normalize(mean=[-0.5, -0.5, -0.5], std=[1., 1., 1.]),
    ])

    image = invTrans(image[0])

    if model is not None:
        loss = focallos(outputs, anotation.long())
        #loss = criterion(outputs, torch.squeeze(anotation,1))
        #print(loss.item(), anotation.shape)

        preds = outputs.argmax(1)  #predictions in black & white

        #F1 score:
        TP = torch.sum(preds[0] * anotation[0])
        FP = torch.sum(preds[0] * ((anotation[0] + 1) % 2))
        FN = torch.sum(anotation[0] * ((preds[0] + 1) % 2))
        print(TP, FP, FN)
        p = TP / (TP + FP + 1e-20)
        r = TP / (TP + FN + 1e-20)
        F1 = 2 * (p * r) / (p + r + 1e-20)
        print("precision: ", p)
        print("recall: ", r)
        print("F1 score: ", F1)

        #preds = outputs[:,1,:,:] > 0.3
        preds_prob = outputs[:, 1, :, :]  #predictions in probability
        #print(outputs.shape, outputs.argmax(1).shape, "preds shape", preds.shape)
        preds = preds.to('cpu')
        preds = preds.type('torch.FloatTensor')
        preds_PIL = transforms.ToPILImage()(preds[0])
        preds_PIL = preds_PIL.convert('RGB')
        preds_prob = preds_prob.to('cpu')
        preds_prob = preds_prob.type('torch.FloatTensor')
        preds_prob_PIL = transforms.ToPILImage()(preds_prob[0])
        preds_prob_PIL = preds_prob_PIL.convert('RGB')
    anotation = anotation.to('cpu')
    anotation = anotation.type('torch.FloatTensor')
    #print(anotation[0].dtype)
    anotation_PIL = transforms.ToPILImage()(anotation[0])
    anotation_PIL = anotation_PIL.convert('RGB')
    image = image.to('cpu')
    image_PIL = transforms.ToPILImage()(image)
    image_PIL = image_PIL.convert('RGB')

    if model is not None:
        superposition = superposition_anotation(
            transforms.ToTensor()(image_PIL),
            transforms.ToTensor()(preds_PIL))  #replace 2 channels to 0s
        img_grid = torchvision.utils.make_grid([
            transforms.ToTensor()(image_PIL),
            transforms.ToTensor()(anotation_PIL),
            transforms.ToTensor()(preds_prob_PIL), superposition
        ])
    else:
        superposition = superposition_anotation(
            transforms.ToTensor()(image_PIL),
            transforms.ToTensor()(anotation_PIL))
        img_grid = torchvision.utils.make_grid([
            transforms.ToTensor()(image_PIL),
            transforms.ToTensor()(anotation_PIL), superposition
        ])

    # show images
    matplotlib_imshow(img_grid)
Beispiel #26
0
def train_src_threemodal(model1, model2, model3, train_loader1, train_loader2,
                         train_loader3, val_loader):

    global lr
    global best_prec1

    lr = params.base_lr

    # model1 = construct_resnet18(model1, params)
    # model2 = construct_resnet18(model2, params)
    # model3 = construct_resnet18(model3, params)
    model1 = construct_resnet34(model1, params)
    model2 = construct_resnet34(model2, params)
    model3 = construct_resnet34(model3, params)

    model1.train()
    model2.train()
    model3.train()

    optimizer1 = torch.optim.Adam(list(model1.parameters()),
                                  lr=params.base_lr,
                                  betas=(0.9, 0.99))
    optimizer2 = torch.optim.Adam(list(model2.parameters()),
                                  lr=params.base_lr,
                                  betas=(0.9, 0.99))
    optimizer3 = torch.optim.Adam(list(model3.parameters()),
                                  lr=params.base_lr,
                                  betas=(0.9, 0.99))
    # criterion = nn.CrossEntropyLoss().cuda()
    focalloss = FocalLoss(gamma=2)

    for epoch in range(params.start_epoch,
                       params.start_epoch + params.num_epochs):
        adjust_learning_rate(optimizer1, epoch, params.base_lr)
        adjust_learning_rate(optimizer2, epoch, params.base_lr)
        adjust_learning_rate(optimizer3, epoch, params.base_lr)
        # train for one epoch
        # train_batch(train_loader, model, criterion, optimizer, epoch)
        for step, (images, labels) in enumerate(train_loader1):
            # make images and labels variable
            images = make_variable(images)
            labels = make_variable(labels.squeeze_())

            # zero gradients for optimizer
            optimizer1.zero_grad()

            # compute loss for critic
            preds = model1(images)
            loss = focalloss(preds, labels)

            # optimize source classifier
            loss.backward()
            optimizer1.step()

            # print step info
            if ((step + 1) % params.log_step_pre == 0):
                print("Color Epoch [{}/{}] Step [{}/{}]: loss={}".format(
                    epoch + 1, params.num_epochs, step + 1, len(train_loader1),
                    loss.item()))

        for step, (images, labels) in enumerate(train_loader2):
            # make images and labels variable
            images = make_variable(images)
            labels = make_variable(labels.squeeze_())

            # zero gradients for optimizer
            optimizer2.zero_grad()

            # compute loss for critic
            preds = model2(images)
            loss = focalloss(preds, labels)

            # optimize source classifier
            loss.backward()
            optimizer2.step()

            # print step info
            if ((step + 1) % params.log_step_pre == 0):
                print("Depth Epoch [{}/{}] Step [{}/{}]: loss={}".format(
                    epoch + 1, params.num_epochs, step + 1, len(train_loader2),
                    loss.item()))

        for step, (images, labels) in enumerate(train_loader3):
            # make images and labels variable
            images = make_variable(images)
            labels = make_variable(labels.squeeze_())

            # zero gradients for optimizer
            optimizer3.zero_grad()

            # compute loss for critic
            preds = model3(images)
            loss = focalloss(preds, labels)

            # optimize source classifier
            loss.backward()
            optimizer3.step()

            # print step info
            if ((step + 1) % params.log_step_pre == 0):
                print("Ir Epoch [{}/{}] Step [{}/{}]: loss={}".format(
                    epoch + 1, params.num_epochs, step + 1, len(train_loader3),
                    loss.item()))

        if ((epoch + 1) % params.eval_step_pre == 0):
            eval_acc(model1, model2, model3, val_loader)

        # save model parameters
        if ((epoch + 1) % params.save_step_pre == 0):
            save_model(model1, "MultiNet-color-{}.pt".format(epoch + 1))
            save_model(model2, "MultiNet-depth-{}.pt".format(epoch + 1))
            save_model(model3, "MultiNet-ir-{}.pt".format(epoch + 1))

    # # save final model
    save_model(model1, "MultiNet-color-final.pt")
    save_model(model2, "MultiNet-depth-final.pt")
    save_model(model3, "MultiNet-ir-final.pt")

    return model1, model2, model3
def main():
    global args, best_prec1
    best_prec1 = 1e6
    args = parser.parse_args()
    args.original_lr = 1e-6
    args.lr = 1e-6
    args.momentum = 0.95
    args.decay = 5 * 1e-4
    args.start_epoch = 0
    args.epochs = 5000
    args.steps = [-1, 1, 100, 150]
    args.scales = [1, 1, 1, 1]
    args.workers = 4
    args.seed = time.time()
    args.print_freq = 30
    wandb.config.update(args)
    wandb.run.name = f"Default_{wandb.run.name}" if (
        args.task == wandb.run.name) else f"{args.task}_{wandb.run.name}"

    conf = configparser.ConfigParser()
    conf.read(args.config)
    print(conf)
    TRAIN_DIR = conf.get("COVNet_CSR_Focal_CBAM_HIGH", "train")
    VALID_DIR = conf.get("COVNet_CSR_Focal_CBAM_HIGH", "valid")
    TEST_DIR = conf.get("COVNet_CSR_Focal_CBAM_HIGH", "test")
    LOG_DIR = conf.get("COVNet_CSR_Focal_CBAM_HIGH", "log")
    create_dir_not_exist(LOG_DIR)
    train_list = [
        os.path.join(TRAIN_DIR, item) for item in os.listdir(TRAIN_DIR)
    ]
    val_list = [
        os.path.join(VALID_DIR, item) for item in os.listdir(VALID_DIR)
    ]
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    torch.cuda.manual_seed(args.seed)
    model = COVNet_CSR_CBAM(3)
    model = model.cuda()
    criterion = FocalLoss(num_class=3, size_average=False).cuda()
    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 betas=(0.9, 0.999),
                                 eps=1e-08,
                                 weight_decay=args.decay)
    model = DataParallel_withLoss(model, criterion)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)
        train(train_list, model, criterion, optimizer, epoch)
        prec1 = validate(val_list, model, criterion, epoch)
        with open(os.path.join(LOG_DIR, args.task + ".txt"), "a") as f:
            f.write("epoch " + str(epoch) + "  BCELoss: " + str(float(prec1)))
            f.write("\n")
        wandb.save(os.path.join(LOG_DIR, args.task + ".txt"))
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        print(' * best BCELoss {BCELoss:.3f} '.format(BCELoss=best_prec1))
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.pre,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            args.task,
            epoch=epoch,
            path=os.path.join(LOG_DIR, args.task))