def _make_loader(self, batch_size=1):
        """
        读入数据集的根目录,摘取出训练集和测试集对应的文件路径
        :return:
        """
        input_root = self.input_root
        ground_root = self.ground_root

        path_dict = {
            "img": input_root,
            "label": ground_root,
        }

        input_data_paths = [path for path in os.listdir(input_root)]
        training_set = [input_data_paths]
        training_len = math.ceil(self.k_fold * len(input_data_paths))
        self.joint_shuffle(training_set)

        train_img_paths = training_set[0][:training_len]
        test_img_paths = training_set[0][training_len:]

        train_dataset = KaggleDataset(path_dict, train_img_paths, ground_root)
        test_dataset = KaggleDataset(path_dict, test_img_paths, ground_root)

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=batch_size)
        test_loader = torch.utils.data.DataLoader(test_dataset,
                                                  batch_size=batch_size)

        return train_loader, test_loader
def prepare_dataset(args, category=None):
    if args.mode == 'train':
        train_csv = re.sub('\.csv$', '', args.in_train_csv)
        dataset_train = KaggleDataset(train_csv,
                                      transform=Compose(),
                                      img_folder=args.in_train_img,
                                      category=category,
                                      resize_scale=[128, 128])
        return dataset_train

    if args.mode == 'valid':
        valid_csv = re.sub('\.csv$', '', args.in_valid_csv)
        dataset_valid = KaggleDataset(valid_csv,
                                      transform=Compose(),
                                      img_folder=args.in_valid_img,
                                      category=category,
                                      resize_scale=[128, 128])
        return dataset_valid

    if args.mode == 'test':
        test_csv = re.sub('\.csv$', '', args.in_test_csv)
        dataset_test = KaggleDataset(test_csv,
                                     transform=Compose(),
                                     img_folder=args.in_test_img,
                                     category=category,
                                     resize_scale=[128, 128])
        return dataset_test
Example #3
0
def main(tocsv=False, save=False, mask=False, valid_train=False, toiou=False):
    model_name = config['param']['model']
    resize = not config['valid'].getboolean('pred_orig_size')

    if model_name == 'unet_vgg16':
        model = UNetVgg16(3, 1, fixed_vgg=True)
    elif model_name == 'dcan':
        model = DCAN(3, 1)
    elif model_name == 'caunet':
        model = CAUNet()
    elif model_name == 'camunet':
        model = CAMUNet()
    else:
        model = UNet()

    if torch.cuda.is_available():
        model = model.cuda()
        # model = torch.nn.DataParallel(model).cuda()

    # Sets the model in evaluation mode.
    model.eval()

    epoch = load_ckpt(model)
    if epoch == 0:
        print("Aborted: checkpoint not found!")
        return

    # prepare dataset
    compose = Compose(augment=False, resize=resize)
    data_dir = 'data/stage1_train' if valid_train else 'data/stage1_test'
    dataset = KaggleDataset(data_dir, transform=compose)
    iter = predict(model, dataset, compose, resize)

    if tocsv:
        with open('result.csv', 'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['ImageId', 'EncodedPixels'])
            for uid, _, y, y_c, y_m, _, _, _, _ in iter:
                for rle in prob_to_rles(y, y_c, y_m):
                    writer.writerow([uid, ' '.join([str(i) for i in rle])])
    elif toiou and valid_train:
        with open('iou.csv', 'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['ImageId', 'IoU'])
            for uid, _, y, y_c, y_m, gt, _, _, _ in tqdm(iter):
                iou = get_iou(y, y_c, y_m, gt)
                writer.writerow([uid, iou])
    else:
        for uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m in tqdm(iter):
            if valid_train:
                show_groundtruth(uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m,
                                 save)
            elif mask:
                save_mask(uid, y, y_c, y_m)
            else:
                show(uid, x, y, y_c, y_m, save)
Example #4
0
def main(resume=True, n_epoch=None, learn_rate=None):
    model_name = config['param']['model']
    if learn_rate is None:
        learn_rate = config['param'].getfloat('learn_rate')
    width = config.getint(model_name, 'width')
    weight_map = config['param'].getboolean('weight_map')
    c = config['train']
    log_name = c.get('log_name')
    n_batch = c.getint('n_batch')
    n_worker = c.getint('n_worker')
    n_cv_epoch = c.getint('n_cv_epoch')
    if n_epoch is None:
        n_epoch = c.getint('n_epoch')
    balance_group = c.getboolean('balance_group')
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = build_model(model_name)
    model = model.to(device)

    # define optimizer
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=args.learn_rate,
        weight_decay=1e-6
        )

    # dataloader workers are forked process thus we need a IPC manager to keep cache in same memory space
    manager = Manager()
    cache = manager.dict()
    compose = Compose()
    # prepare dataset
    if os.path.exists('data/valid'):
        # advance mode: use valid folder as CV
        train_dataset = KaggleDataset('data/train', transform=compose, cache=cache)
        valid_dataset = KaggleDataset('data/valid', transform=compose, cache=cache)
    else:
        # auto mode: split part of train dataset as CV
        train_dataset = KaggleDataset('data/train', transform=compose, cache=cache, use_filter=True)
        train_dataset, valid_dataset = train_dataset.split()
    # decide whether to balance training set
    if balance_group:
        weights, ratio = train_dataset.class_weight()
        # Len of weights is number of original epoch samples. 
        # After oversample balance, majority class will be under-sampled (least sampled)
        # Multipling raito is to gain chance for each sample to be visited at least once in each epoch 
        sampler = WeightedRandomSampler(weights, int(len(weights) * ratio))
    else:
        sampler = RandomSampler(train_dataset)
    # data loader
    train_loader = DataLoader(
        train_dataset,
        sampler=sampler,
        batch_size=n_batch,
        num_workers=n_worker,
        pin_memory=torch.cuda.is_available())
    valid_loader = DataLoader(
        valid_dataset,
        shuffle=False,
        batch_size=n_batch,
        num_workers=n_worker)

    # resume checkpoint
    start_epoch = iou_tr = iou_cv = 0
    if resume:
        start_epoch = load_ckpt(model, optimizer)
    if start_epoch == 0:
        print('Grand new training ...')

    # put model to GPU
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    # decide log directory name
    log_dir = os.path.join(
        'logs', log_name, '{}-{}'.format(model_name, width),
        'ep_{},{}-lr_{}'.format(
            start_epoch,
            n_epoch + start_epoch,
            learn_rate,
        )
    )

    with SummaryWriter(log_dir) as writer:
        if start_epoch == 0 and False:
            # dump graph only for very first training, disable by default
            dump_graph(model, writer, n_batch, width)
        print('Training started...')
        for epoch in range(start_epoch + 1, n_epoch + start_epoch + 1): # 1 base
            iou_tr = train(train_loader, model, optimizer, epoch, writer)
            if len(valid_dataset) > 0 and epoch % n_cv_epoch == 0:
                with torch.no_grad():
                    iou_cv = valid(valid_loader, model, epoch, writer, len(train_loader))
            save_ckpt(model, optimizer, epoch, iou_tr, iou_cv)
        print('Training finished...')
Example #5
0
def run_submit(args):    
        
    augment = ['null'] 
    out_dir = args.out_dir + f'/{args.model_name}'
    initial_checkpoint = args.initial_checkpoint
    batch_size = args.batch_size

    ## setup out_dir
    os.makedirs(out_dir +'/submit', exist_ok=True)

    log = Logger()
    log.open(out_dir+'/log.submit.txt',mode='a')
    log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64))
    log.write('\t%s\n' % COMMON_STRING)
    log.write('\n')
    log.write('\tSEED         = %u\n' % SEED)
    log.write('\t__file__     = %s\n' % __file__)
    log.write('\tout_dir      = %s\n' % out_dir)
    log.write('\n')

    log.write('submitting .... @ %s\n'%str(augment))
    log.write('initial_checkpoint  = %s\n'%initial_checkpoint)
    log.write('\n')

    if 1: #save
        log.write('** dataset setting **\n')
        files_train = [f'train_image_data_{fid}.feather' for fid in range(4)]
        data = read_data(args.data_dir, files_train)
        
        df = pd.read_csv(args.df_path)
        valid_split = np.load(args.data_dir + '/valid_b_fold1_15985.npy').tolist()
        valid_df = df[df['image_id'].isin(valid_split)]

        test_dataset = KaggleDataset(
            df       = df,
            data     = data,
            idx      = valid_df.index.values, 
            augment  = valid_augment,
        )

        log.write('\n')

        ## net
        log.write('** net setting **\n')
        if args.model_name == 'serex50':
            net = Serex50_Net().cuda()
        elif args.model_name == 'effnetb3':
            net = EfficientNet_3().cuda()
        else:
            raise NotImplemented

        net.load_state_dict(torch.load(initial_checkpoint, map_location=lambda storage, loc: storage), strict=True)

        image_id, truth, probability = do_evaluate(net, test_dataset, batch_size,  augment)


        if 1: #save
            write_list_to_file (out_dir + '/submit/image_id.txt',image_id)
            write_pickle_to_file(out_dir + '/submit/probability.pickle', probability)
            write_pickle_to_file(out_dir + '/submit/truth.pickle', truth)

    if 1:
        image_id = read_list_from_file(out_dir + '/submit/image_id.txt')
        probability = read_pickle_from_file(out_dir + '/submit/probability.pickle')
        truth       = read_pickle_from_file(out_dir + '/submit/truth.pickle')
    num_test= len(image_id)

    if 1:
        recall, avgerage_recall = compute_kaggle_metric(probability, truth)
        log.write('avgerage_recall : %f\n'%(avgerage_recall))

        for i,name in enumerate(TASK_NAME):
            log.write('%28s  %f\n'%(name,recall[i]))
        log.write('\n')
Example #6
0
def main(tocsv=False,
         save=False,
         mask=False,
         valid_train=False,
         toiou=False,
         submit_folder=False):
    model_name = config['param']['model']
    resize = not config['valid'].getboolean('pred_orig_size')

    if model_name == 'unet_vgg16':
        model = UNetVgg16(3, 1, fixed_vgg=True)
    elif model_name == 'dcan':
        model = DCAN(3, 1)
    elif model_name == 'caunet':
        model = CAUNet()
    elif model_name == 'camunet':
        model = CAMUNet()
    else:
        model = UNet()

    if torch.cuda.is_available():
        model = model.cuda()
        # model = torch.nn.DataParallel(model).cuda()

    # Sets the model in evaluation mode.
    model.eval()

    epoch = load_ckpt(model)
    if epoch == 0:
        print("Aborted: checkpoint not found!")
        return

    # prepare dataset
    compose = Compose(augment=False, resize=resize)
    #data_dir = 'data/stage1_train' if valid_train else 'data/stage1_test'
    #data_dir = 'data/stage1_train' if valid_train else '../bowl_classifier/stage2_test'
    data_dir = 'data/stage1_train' if valid_train else config['param'][
        'CSV_PATH']
    print(data_dir)
    data_dir = re.sub('\.csv', '', data_dir)
    dataset = KaggleDataset(data_dir,
                            transform=compose,
                            img_folder=config['param']['img_folder'])
    iter = predict(model, dataset, compose, resize)

    if tocsv:
        if valid_train:
            print('Saving %s/train_result.csv... Done!' % submit_folder)
            with open('%s/train_result.csv' % submit_folder, 'w') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(['ImageId', 'EncodedPixels'])
                for uid, _, y, y_c, y_m, _, _, _, _ in iter:
                    for rle in prob_to_rles(y, y_c, y_m):
                        writer.writerow([uid, ' '.join([str(i) for i in rle])])
        else:
            print('Saving %s/test_result.csv... Done!' % submit_folder)
            with open('%s/test_result.csv' % submit_folder, 'w') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(['ImageId', 'EncodedPixels'])
                for uid, _, y, y_c, y_m, _, _, _, _ in iter:
                    for rle in prob_to_rles(y, y_c, y_m):
                        writer.writerow([uid, ' '.join([str(i) for i in rle])])

    elif toiou and valid_train:
        print('Saving %s/iou_train.csv...Done!' % submit_folder)
        with open('%s/iou_train.csv' % submit_folder, 'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['ImageId', 'IoU'])
            for uid, _, y, y_c, y_m, gt, _, _, _ in tqdm(iter):
                iou = get_iou(y, y_c, y_m, gt)
                writer.writerow([uid, iou])
    else:
        for uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m in tqdm(iter):
            if valid_train:
                show_groundtruth(uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m,
                                 save)
            elif mask:
                save_mask(uid, y, y_c, y_m)
            else:
                show(uid, x, y, y_c, y_m, save)

    if valid_train:
        data_dir = 'data/stage1_valid'
        if not os.path.exists(data_dir):
            print(
                '%s does not exist.  It will not generate %s/iou_valid.csv\nBye bye!'
                % (data_dir, submit_folder))
        else:
            dataset = KaggleDataset(data_dir, transform=compose)
            iter = predict(model, dataset, compose, resize)
            if toiou and valid_train:
                print('Saving %s/iou_valid.csv... Done!' % submit_folder)
                with open('%s/iou_valid.csv' % submit_folder, 'w') as csvfile:
                    writer = csv.writer(csvfile)
                    writer.writerow(['ImageId', 'IoU'])
                    for uid, _, y, y_c, y_m, gt, _, _, _ in tqdm(iter):
                        iou = get_iou(y, y_c, y_m, gt)
                        writer.writerow([uid, iou])
    if torch.cuda.is_available():
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ['CUDA_VISIBLE_DEVICES'] = gpu
        device = torch.device("cuda", int(gpu))
        pos_weights = pos_weights.to(device)

    if not os.path.exists(csv_output):
        os.mkdir(csv_output)

    # TODO: 替换为真正的变量后使用
    model = RecognizeModel()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    writer = tensorboard.SummaryWriter(training_log_dir)
    picker = DataPicker(path_dict["img"], path_dict["label"], k_fold)
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weights)
    Dataset = KaggleDataset(path_dict)
    DataLoader = None
    iter_test_vals = []
    iter_test_accuracy = []

    model.to(device)

    if not os.path.exists(model_output):
        os.mkdir(model_output)

    for iter in range(iteration):
        train_loader, test_loader = picker.get_loader(batch_size=10)
        temp_test_vals = []
        temp_test_accuracy = []
        start = 0
Example #8
0
def main(args):

    wandb.init(project="kaggle_cassava_leaf_disease_classification")
    wandb.run.name = args.config
    config = importlib.import_module(f"stage1.{args.config}")
    wandb.save(f"configs/{args.config}.py")
    os.makedirs(f"./result/{args.config}", exist_ok=True)
    config.fold_num = args.fold_num
    print(config.fold_num)

    if config.use_prev_data:
        df = pd.read_csv("./data/split_df.csv")
        # # df = df_[~df_.image_id.isin(invalid_ids)].copy()

        # df_20 = df.loc[(df["source"] == 2020)].copy().reset_index(drop=True)
        # df_20["data_dir"] = "train_images"

        df_20 = df.loc[(df["source"] == 2020)].copy().reset_index(drop=True)
        df_20["data_dir"] = "train_images"

        df_19_0 = df.loc[(df["source"] == 2019)
                         & (df["label"] == 0)].copy().reset_index(drop=True)
        df_19_0["data_dir"] = "train/cbb/"

        df_19_2 = df.loc[(df["source"] == 2019)
                         & (df["label"] == 2)].copy().reset_index(drop=True)
        df_19_2["data_dir"] = "train/cgm/"

        df_19_4 = df.loc[(df["source"] == 2019)
                         & (df["label"] == 4)].copy().reset_index(drop=True)
        df_19_4["data_dir"] = "train/healthy/"

        df = pd.concat([df_20, df_19_0, df_19_2, df_19_4],
                       axis=0).reset_index(drop=True)
        # df = pd.concat([df_20, df_19_0, df_19_2, df_19_4], axis=0).reset_index(drop=True)
    else:
        df = pd.read_csv("./data/train.csv")

        # df = df_[~df_.image_id.isin(invalid_ids)].copy()

    df["kfold"] = -1
    df = df.sample(frac=1).reset_index(drop=True)
    y = df["label"].values
    skf = StratifiedKFold(n_splits=5)

    for (fold_num), (train_index, val_index) in enumerate(skf.split(X=df,
                                                                    y=y)):
        df.loc[df.iloc[val_index].index, "kfold"] = fold_num

    train_df = df.loc[df["kfold"] != args.fold_num].reset_index(
        drop=True).copy()
    valid_df = df.loc[df["kfold"] == args.fold_num].reset_index(
        drop=True).copy()

    sampler = None
    if config.upsampling:
        target = train_df.label
        class_sample_count = np.unique(target, return_counts=True)[1]

        class_sample_count[0] *= 1
        class_sample_count[1] *= 1
        class_sample_count[2] *= 1
        class_sample_count[3] *= 0.7
        class_sample_count[4] *= 1

        weight = 1. / class_sample_count
        samples_weight = weight[target]
        samples_weight = torch.from_numpy(samples_weight)

        sampler = torch.utils.data.WeightedRandomSampler(
            samples_weight, len(samples_weight))

    print("finish data setting")
    print(train_df.head())
    print(valid_df.head())

    train_dataset = KaggleDataset(
        df=train_df,
        transforms=config.train_transforms,
        preprocessing=config.preprocessing,
        mode="train",
    )

    validation_dataset = KaggleDataset(
        df=valid_df,
        transforms=config.valid_transforms,
        preprocessing=config.preprocessing,
        mode="val",
    )

    train_loader = DataLoader(
        train_dataset,
        sampler=sampler,
        batch_size=config.batch_size,
        pin_memory=True,
        num_workers=4,
    )

    valid_loader = DataLoader(
        validation_dataset,
        batch_size=config.batch_size,
        pin_memory=True,
        num_workers=4,
    )

    print("model setting")

    if config.resume_dir is None:
        if "efficientnet" in config.net_type:
            net = MODEL_LIST["effcientnet"](net_type=config.net_type,
                                            pretrained=True,
                                            bn=config.bn)
        elif "vit" in config.net_type:
            net = MODEL_LIST["vit"](net_type=config.net_type, pretrained=True)
        elif "res" in config.net_type:
            net = MODEL_LIST["resnet"](net_type=config.net_type,
                                       pretrained=True)
        elif "hrnet" in config.net_type:
            net = net = MODEL_LIST["hrnet"](net_type=config.net_type,
                                            pretrained=True)
    else:
        net = MODEL_LIST["pretrained_enet"](
            net_type=config.net_type,
            pretrained_path=
            f"./result/{config.resume_dir}/{config.resume_dir}_fold_{config.fold_num}/{config.resume_dir}_fold_{config.fold_num}_last-checkpoint.bin"
        )

    # if torch.cuda.device_count() > 1:
    #     net = torch.nn.DataParallel(net, device_ids=[0,1,2,3])
    #     config.lr = config.lr * torch.cuda.device_count()

    net = net.to(device)

    wandb.watch(net, log="all")

    runner = PyTorchTrainer(model=net,
                            device=device,
                            config=config,
                            fold_num=args.fold_num)
    if config.resume:
        print("load model")

        runner.load(
            f"./result/{config.dir}/{config.dir}_fold_{config.fold_num}/{config.dir}_fold_{config.fold_num}_last-checkpoint.bin"
        )

    runner.fit(train_loader=train_loader, validation_loader=valid_loader)
Example #9
0
def main(ckpt, tocsv=False, save=False, mask=False, target='test', toiou=False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # load one or more checkpoint
    models = []
    for fn in ckpt or [None]:
        # load model
        model = load_ckpt(filepath=fn)
        if not model:
            print("Aborted: checkpoint {} not found!".format(fn))
            return
        # Sets the model in evaluation mode.
        model.eval()
        # put model to GPU
        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            model = nn.DataParallel(model)
        model = model.to(device)
        # append to model list
        models.append(model)

    resize = not config['valid'].getboolean('pred_orig_size')
    compose = Compose(augment=False, resize=resize)
    # decide which dataset to pick sample
    data_dir = os.path.join('data', target)
    if target == 'test':
        dataset = KaggleDataset(data_dir, transform=compose)
    elif os.path.exists('data/valid'):
        # advance mode: use valid folder as CV
        dataset = KaggleDataset(data_dir, transform=compose)
    else:
        # auto mode: split part of train dataset as CV
        dataset = KaggleDataset('data/train', transform=compose, use_filter=True)
        if target == 'train':
            dataset, _ = dataset.split()
        elif target == 'valid':
            _, dataset = dataset.split()

    # iterate dataset and inference each sample
    ious = []
    writer = csvfile = None
    for data in tqdm(dataset):
        with torch.no_grad():
            uid, y, y_c, y_m = inference(data, models, resize)
            x, gt, gt_s, gt_c, gt_m = unpack_data(data, compose, resize)

        if tocsv:
            if writer is None:
                csvfile = open('result.csv', 'w')
                writer = csv.writer(csvfile)
                writer.writerow(['ImageId', 'EncodedPixels'])
            for rle in prob_to_rles(y, y_c, y_m):
                writer.writerow([uid, ' '.join([str(i) for i in rle])])
        elif toiou:
            assert target != 'test'
            if writer is None:
                csvfile = open('iou.csv', 'w')
                writer = csv.writer(csvfile)
                writer.writerow(['ImageId', 'IoU'])
            iou = get_iou(y, y_c, y_m, gt)
            writer.writerow([uid, iou])
            ious.append(iou)
        elif mask:
            save_mask(uid, y, y_c, y_m)
        elif target == 'test':
            show(uid, x, y, y_c, y_m, save)
        else: # train or valid
            show_groundtruth(uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m, save)

    # end of for-loop
    if csvfile is not None:
        csvfile.close()
    if toiou:
        print('\nIoU Metrics:\n mean: {0:.4f}\t std: {1:.4f}\t max: {2:.4f}\t min: {3:.4f}\t count: {4}\n'
            .format(np.mean(ious), np.std(ious), np.max(ious), np.min(ious), len(ious)))
Example #10
0
                                map_location=lambda storage, loc: storage)
        net2.load_state_dict(state_dict, strict=True)

        moving_average(net, net2, 1. / (i + 2))

    ## dataset ----------------------------------------
    files_train = [f'train_image_data_{fid}.feather' for fid in range(4)]
    data = read_data(args.data_dir, files_train)

    df = pd.read_csv(args.df_path)
    train_split = np.load(args.data_dir + '/train_b_fold1_184855.npy').tolist()
    train_df = df[df['image_id'].isin(train_split)]

    train_dataset = KaggleDataset(
        df=df,
        data=data,
        idx=train_df.index.values,
        augment=train_augment if args.use_gridmask else valid_augment,
    )

    train_loader = DataLoader(train_dataset,
                              sampler=RandomSampler(train_dataset),
                              batch_size=args.batch_size,
                              drop_last=True,
                              num_workers=4,
                              pin_memory=True,
                              collate_fn=null_collate)

    net.cuda()
    bn_update(train_loader, net)
    torch.save(net.state_dict(),
               args.out_dir + f'/{args.model_name}/' + output_name)
Example #11
0
    # print('id_ ' + id_ + '\t' + f_content[id_]) # debug
    fn.write(f_content[id_])    
fn.close()



#   _   _   ____    ____       _      _____   _____            ____   ____   __     __
#  | | | | |  _ \  |  _ \     / \    |_   _| | ____|          / ___| / ___|  \ \   / /
#  | | | | | |_) | | | | |   / _ \     | |   |  _|    _____  | |     \___ \   \ \ / / 
#  | |_| | |  __/  | |_| |  / ___ \    | |   | |___  |_____| | |___   ___) |   \ V /  
#   \___/  |_|     |____/  /_/   \_\   |_|   |_____|          \____| |____/     \_/   
#     


CSV_FILE         = re.sub('\.csv','',CSV_OUT_PATH)
dataset_test     = KaggleDataset(CSV_FILE,transform=Compose(), img_folder= TEST_IMG_DIR, resize_scale=[128,128])
confidence_alert = 0

valid_idx = range(dataset_test.__len__())
valid_loader = DataLoader(dataset_test, sampler=SubsetRandomSampler(valid_idx),batch_size=4,num_workers=2)

# network
net = VGG('VGG16')
print(net)
net.cuda()
net.eval()
net.load_state_dict(torch.load(MODEL_IN_PATH))

invert_majorlabel = {v:k for k,v in dataset_test.majorlabels.items()}

for i, data in enumerate(valid_loader, 0):
Example #12
0
def main(resume=True, n_epoch=None, learn_rate=None):
    model_name = config['param']['model']
    cv_ratio = config['param'].getfloat('cv_ratio')
    if learn_rate is None:
        learn_rate = config['param'].getfloat('learn_rate')
    width = config[model_name].getint('width')
    weight_map = config['param'].getboolean('weight_map')
    c = config['train']
    log_name = c.get('log_name')
    n_batch = c.getint('n_batch')
    n_worker = c.getint('n_worker')
    n_ckpt_epoch = c.getint('n_ckpt_epoch')
    if n_epoch is None:
        n_epoch = c.getint('n_epoch')

    # initialize model
    if model_name == 'unet_vgg16':
        model = UNetVgg16(3, 1, fixed_vgg=True)
    elif model_name == 'dcan':
        model = DCAN(3, 1)
    elif model_name == 'caunet':
        model = CAUNet()
    elif model_name == 'camunet':
        model = CAMUNet()
    else:
        model = UNet()

    if torch.cuda.is_available():
        model = model.cuda()
        # model = torch.nn.DataParallel(model).cuda()

    # define optimizer
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=args.learn_rate,
        weight_decay=1e-6
        )

    # dataloader workers are forked process thus we need a IPC manager to keep cache in same memory space
    manager = Manager()
    cache = manager.dict()
    # prepare dataset and loader
    dataset = KaggleDataset('data/stage1_train', transform=Compose(), cache=cache)
    train_idx, valid_idx = dataset.split()
    train_loader = DataLoader(
        dataset, sampler=SubsetRandomSampler(train_idx),
        batch_size=n_batch,
        num_workers=n_worker,
        pin_memory=torch.cuda.is_available())
    valid_loader = DataLoader(
        dataset, sampler=SubsetRandomSampler(valid_idx),
        batch_size=n_batch,
        num_workers=n_worker)

    # resume checkpoint
    start_epoch = 0
    if resume:
        start_epoch = load_ckpt(model, optimizer)
    if start_epoch == 0:
        print('Grand new training ...')

    # decide log directory name
    log_dir = os.path.join(
        'logs', log_name, '{}-{}'.format(model_name, width),
        'ep_{},{}-lr_{}'.format(
            start_epoch,
            n_epoch + start_epoch,
            learn_rate,
        )
    )

    with SummaryWriter(log_dir) as writer:
        if start_epoch == 0 and False:
            # dump graph only for very first training, disable by default
            dump_graph(model, writer, n_batch, width)
        print('Training started...')
        for epoch in range(start_epoch, n_epoch + start_epoch):
            train(train_loader, model, optimizer, epoch, writer)
            if cv_ratio > 0 and epoch % 3 == 2:
                valid(valid_loader, model, epoch, writer, len(train_loader))
            # save checkpoint per n epoch
            if epoch % n_ckpt_epoch == n_ckpt_epoch - 1:
                save_ckpt(model, optimizer, epoch+1)
        print('Training finished...')
Example #13
0
def run_train(args):
    
    out_dir = args.out_dir + '/' + args.model_name
    use_gridmask = args.use_gridmask
    initial_checkpoint = args.initial_checkpoint
    
    if args.scheduler_name == 'null':
        schduler = NullScheduler(lr=0.001)
    else:
        schduler = CyclicScheduler0(min_lr=0.00001, max_lr=0.00005, period=750, ratio=1 )
    
    iter_accum = 1
    batch_size = args.batch_size

    # set-up directories
    for f in ['checkpoint'] : os.makedirs(out_dir +'/'+f, exist_ok=True)

    log = Logger()
    log.open(out_dir+'/log.train.txt',mode='a')
    log.write('\n--- [START %s] %s\n\n' % (IDENTIFIER, '-' * 64))
    log.write('\t%s\n' % COMMON_STRING)
    log.write('\n')

    log.write('\tSEED         = %u\n' % SEED)
    log.write('\t__file__     = %s\n' % __file__)
    log.write('\tout_dir      = %s\n' % out_dir)
    log.write('\n')


    ## dataset ----------------------------------------
    log.write('** dataset setting **\n')
    files_train = [f'train_image_data_{fid}.feather' for fid in range(4)]
    data = read_data(args.data_dir, files_train)
    
    df = pd.read_csv(args.df_path)
    train_split = np.load(args.data_dir + '/train_b_fold1_184855.npy').tolist()
    valid_split = np.load(args.data_dir + '/valid_b_fold1_15985.npy').tolist()

    train_df = df[df['image_id'].isin(train_split)]
    valid_df = df[df['image_id'].isin(valid_split)]

    train_dataset = KaggleDataset(
        df       = df,
        data     = data,
        idx      = train_df.index.values, 
        augment  = train_augment if use_gridmask else valid_augment,
    )

    train_loader  = DataLoader(
        train_dataset,
        sampler     = RandomSampler(train_dataset),
        batch_size  = batch_size,
        drop_last   = True,
        num_workers = 4,
        pin_memory  = True,
        collate_fn  = null_collate
    )

    valid_dataset = KaggleDataset(
        df       = df,
        data     = data,
        idx      = valid_df.index.values, 
        augment  = valid_augment,
    )

    valid_loader = DataLoader(
        valid_dataset,
        sampler     = SequentialSampler(valid_dataset),
        batch_size  = batch_size,
        drop_last   = False,
        num_workers = 4,
        pin_memory  = True,
        collate_fn  = null_collate
    )

    assert(len(train_dataset)>=batch_size)
    log.write('batch_size = %d\n'%(batch_size))
    log.write('\n')

    ## net ----------------------------------------
    log.write('** net setting **\n')
    
    if args.model_name == 'serex50':
        net = Serex50_Net().cuda()
    elif args.model_name == 'effnetb3':
        net = EfficientNet_3().cuda()
    else:
        raise NotImplemented
    
    log.write('\tinitial_checkpoint = %s\n' % initial_checkpoint)

    if initial_checkpoint is not None:
        state_dict = torch.load(initial_checkpoint, map_location=lambda storage, loc: storage)
        net.load_state_dict(state_dict,strict=True) 
    else:
        if args.model_name == 'serex50':
            net.load_pretrain(is_print=False)
        else:
            pass

    log.write('net=%s\n'%(type(net)))
    log.write('\n')

    if args.optimizer_name == 'AdamW':
        optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, net.parameters()),lr=schduler(0), weight_decay=1e-4)
    else:
        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=schduler(0), momentum=0.0, weight_decay = 1e-4)
    
    num_iters   = 3000*1000
    iter_smooth = 50
    iter_log    = 250
    iter_valid  = 500
    iter_save   = [0, num_iters-1]\
                   + list(range(0, num_iters, 1000))#1*1000

    start_iter = 0
    start_epoch= 0
    rate       = 0

    if initial_checkpoint is not None:
        initial_optimizer = initial_checkpoint.replace('_model.pth','_optimizer.pth')
        if os.path.exists(initial_optimizer):
            checkpoint  = torch.load(initial_optimizer)
            start_iter  = checkpoint['iter' ]
            start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
        pass

    log.write('optimizer\n  %s\n'%(optimizer))
    log.write('schduler\n  %s\n'%(schduler))
    log.write('\n')

    ## start training here! ##############################################
    log.write('** start training here! **\n')
    log.write('   batch_size=%d,  iter_accum=%d\n'%(batch_size,iter_accum))
    log.write('   experiment  = %s\n' % str(__file__.split('/')[-2:]))
    log.write('                    |----------------------- VALID------------------------------------|------- TRAIN/BATCH -----------\n')
    log.write('rate    iter  epoch | kaggle                    | loss               acc              | loss             | time       \n')
    log.write('----------------------------------------------------------------------------------------------------------------------\n')

    def message(rate, iter, epoch, kaggle, valid_loss, train_loss, batch_loss, mode='print'):
        if mode==('print'):
            asterisk = ' '
            loss = batch_loss
        if mode==('log'):
            asterisk = '*' if iter in iter_save else ' '
            loss = train_loss

        text = \
            '%0.5f %5.1f%s %4.1f | '%(rate, iter/1000, asterisk, epoch,) +\
            '%0.4f : %0.4f %0.4f %0.4f | '%(kaggle[1],*kaggle[0]) +\
            '%4.4f, %4.4f, %4.4f : %4.4f, %4.4f, %4.4f | '%(*valid_loss,) +\
            '%4.4f, %4.4f, %4.4f |'%(*loss,) +\
            '%s' % (time_to_str((timer() - start_timer),'min'))

        return text

    kaggle = (0,0,0,0)
    valid_loss = np.zeros(6,np.float32)
    train_loss = np.zeros(3,np.float32)
    batch_loss = np.zeros_like(train_loss)
    iter = 0
    i    = 0

    start_timer = timer()
    while  iter<num_iters:
        sum_train_loss = np.zeros_like(train_loss)
        sum_train = np.zeros_like(train_loss)

        optimizer.zero_grad()
        for t, (input, truth, infor) in enumerate(train_loader):

            input, truth, shuffled_truth, lam = cutmix(input, truth,alpha=0.3)

            batch_size = len(infor)
            iter  = i + start_iter
            epoch = (iter-start_iter)*batch_size/len(train_dataset) + start_epoch

            if (iter % iter_valid==0):
                valid_loss, kaggle = do_valid(net, valid_loader, out_dir) #
                pass

            if (iter % iter_log==0):
                print('\r',end='',flush=True)
                log.write(message(rate, iter, epoch, kaggle, valid_loss, train_loss, batch_loss, mode='log'))
                log.write('\n')

            if iter in iter_save:
                torch.save({
                    'optimizer': optimizer.state_dict(),
                    'iter'     : iter,
                    'epoch'    : epoch,
                }, out_dir +'/checkpoint/%08d_optimizer.pth'%(iter))
                if iter!=start_iter:
                    torch.save(net.state_dict(),out_dir +'/checkpoint/%08d_model.pth'%(iter))
                    pass

            # learning rate schduler -------------
            lr = schduler(iter)
            if lr<0 : break
            adjust_learning_rate(optimizer, lr)
            rate = get_learning_rate(optimizer)

            net.train()
            
            input = input.cuda()
            truth = [t.cuda() for t in truth]
            shuffled_truth = [t.cuda() for t in shuffled_truth]

            logit = net(input) 
            probability = logit_to_probability(logit)

            loss = cutmix_criterion(logit, truth, shuffled_truth, lam)
        
            ((loss[0]+loss[1]+loss[2] )/iter_accum).backward()
        
            if (iter % iter_accum)==0:
                optimizer.step()
                optimizer.zero_grad()

            loss = [l.item() for l in loss]
            l = np.array([ *loss, ])*batch_size
            n = np.array([ 1, 1, 1 ])*batch_size
            batch_loss      = l/(n+1e-8)
            sum_train_loss += l
            sum_train      += n
            if iter%iter_smooth == 0:
                train_loss = sum_train_loss/(sum_train+1e-12)
                sum_train_loss[...] = 0
                sum_train[...]      = 0

            print('\r',end='',flush=True)
            print(message(rate, iter, epoch, kaggle, valid_loss, train_loss, batch_loss, mode='print'), end='',flush=True)
            i=i+1

        pass  #-- end of one data loader --
    pass #-- end of all iterations --

    log.write('\n')
Example #14
0
def main():

    # wandb.init(project="kaggle_cassava_leaf_disease_classification")
    # wandb.run.name = args.config
    # wandb.save(f"configs/{args.config}.py")
    os.makedirs(f"./checkpoint/{config.dir}", exist_ok=True)
    config.fold_num = args.fold_num
    print(config.fold_num)

    invalid_ids =  ['274726002.jpg',
                    '9224019.jpg',
                    '159654644.jpg',
                    '199112616.jpg',
                    '226533928.jpg',
                    '262902341.jpg',
                    '269713568.jpg',
                    '384390206.jpg',
                    '390601409.jpg',
                    '421035788.jpg',
                    '457405364.jpg',
                    '600736721.jpg',
                    '580111608.jpg',
                    '616718743.jpg',
                    '695438825.jpg',
                    '723564013.jpg',
                    '826231979.jpg',
                    '847847826.jpg',
                    '927165736.jpg',
                    '1004389140.jpg',
                    '1008244905.jpg',
                    '1338159402.jpg',
                    '1339403533.jpg',
                    '1366430957.jpg',
                    '9224019.jpg',
                    '4269208386.jpg',
                    '4239074071.jpg',
                    '3810809174.jpg',
                    '3652033201.jpg',
                    '3609350672.jpg',
                    '3609986814.jpg',
                    '3477169212.jpg',
                    '3435954655.jpg',
                    '3425850136.jpg',
                    '3251960666.jpg',
                    '3252232501.jpg',
                    '3199643560.jpg',
                    '3126296051.jpg',
                    '3040241097.jpg',
                    '2981404650.jpg',
                    '2925605732.jpg',
                    '2839068946.jpg',
                    '2698282165.jpg',
                    '2604713994.jpg',
                    '2415837573.jpg',
                    '2382642453.jpg',
                    '2321669192.jpg',
                    '2320471703.jpg',
                    '2278166989.jpg',
                    '2276509518.jpg',
                    '2262263316.jpg',
                    '2182500020.jpg',
                    '2139839273.jpg',
                    '2084868828.jpg',
                    '1848686439.jpg',
                    '1689510013.jpg',
                    '1359893940.jpg']

    if config.use_prev_data:
        df = pd.read_csv("./data/merged.csv")
        df = df[~df.image_id.isin(invalid_ids)]    

        # df_20 = df.loc[(df["source"] == 2020)].copy().reset_index(drop=True)
        # df_20["data_dir"] = "train_images"

        df_20 = df.loc[(df["source"] == 2020) & (df["label"] != 3)].copy().reset_index(drop=True)
        df_20["data_dir"] = "train_images"
        df_20_3 = df.loc[(df["source"] == 2020) & (df["label"] == 3)].copy().reset_index(drop=True)

        df_20_3 = df_20_3.sample(frac=0.7)
        df_20_3["data_dir"] = "train_images"



        df_19_0 = df.loc[(df["source"] == 2019) & (df["label"] == 0)].copy().reset_index(drop=True)
        df_19_0["data_dir"] = "train/cbb/"

        df_19_2 = df.loc[(df["source"] == 2019) & (df["label"] == 2)].copy().reset_index(drop=True)
        df_19_2["data_dir"] = "train/cgm/"

        df_19_4 = df.loc[(df["source"] == 2019) & (df["label"] == 4)].copy().reset_index(drop=True)
        df_19_4["data_dir"] = "train/healthy/"

        df = pd.concat([df_20, df_20_3, df_19_0, df_19_2, df_19_4], axis=0).reset_index(drop=True).sample(frac=0.2)
        # df = pd.concat([df_20, df_19_0, df_19_2, df_19_4], axis=0).reset_index(drop=True)
    else:
        df = pd.read_csv("./data/train.csv")

    df["kfold"] = -1
    df = df.sample(frac=1).reset_index(drop=True)
    y = df["label"].values
    skf = StratifiedKFold(n_splits=5)
    
    for (fold_num), (train_index, val_index) in enumerate(skf.split(X=df, y=y)):
        df.loc[df.iloc[val_index].index, "kfold"] = fold_num

    train_df = df.loc[df["kfold"] != args.fold_num].reset_index(drop=True).copy()
    valid_df = df.loc[df["kfold"] == args.fold_num].reset_index(drop=True).copy()


    print("finish data setting")
    print(train_df.head())
    print(valid_df.head())

    train_dataset = KaggleDataset(
        df=train_df,
        transforms=config.train_transforms,
        preprocessing=config.preprocessing,
        mode="train",
        ind=False,
    )

    validation_dataset = KaggleDataset(
        df=valid_df,
        transforms=config.valid_transforms,
        preprocessing=config.preprocessing,
        mode="val",
        ind=False,

    )

    train_loader = DataLoader(
        train_dataset,
        # sampler=BalanceClassSampler(labels=train_dataset.get_labels(), mode="upsampling"),
        batch_size=45,
        pin_memory=True,
        num_workers=4,
    )
    

    valid_loader = DataLoader(
        validation_dataset,
        batch_size=32,
        pin_memory=True,
        num_workers=4,
    )

    print("model setting")


    


    s_net = CassavaNet(net_type=config.net_type, pretrained=True, bn=config.bn)

    t_net = MODEL_LIST["effcientnet"](net_type="tf_efficientnet_b4_ns", pretrained=True, bn=False)
    ch = torch.load(chs[args.fold_num])
    t_net.load_state_dict(ch["model_state_dict"], strict=True)
    t_net = t_net.cuda()
    t_net.eval()

    optimizer, scheduler = get_optimizer(s_net, config.optimizer_name, config.optimizer_params, 
                                                config.scheduler_name, config.scheduler_params, config.n_epochs)

    criterion = SoftTarget(T=4.0).cuda()


    # wandb.watch(net, log="all")

    logname = f"checkpoint/{config.dir}/" + s_net.__class__.__name__ + \
            '_' + "stage2_" + f'{args.fold_num}.csv'
    if not os.path.exists(logname):
        with open(logname, 'w') as logfile:
            logwriter = csv.writer(logfile, delimiter=',')
            logwriter.writerow(
                ['epoch', 'train loss', 'train acc', 'test loss', 'test acc'])

    start_epoch=0


    s_net = s_net.to(device)
    for epoch in range(start_epoch, config.n_epochs):
        print("lr: ", optimizer.param_groups[0]['lr'])
        if epoch < config.freeze_bn_epoch:
            print("freeze_batch_norm")
            s_net.freeze_batchnorm_stats()
        train_loss, train_acc = train(epoch, train_loader, t_net, s_net, criterion, optimizer)
        test_loss, test_acc = test(epoch, valid_loader, t_net, s_net, criterion)

        with open(logname, 'a') as logfile:
            logwriter = csv.writer(logfile, delimiter=',')
            logwriter.writerow([epoch, train_loss, train_acc, test_loss, test_acc])
        scheduler.step()