Exemple #1
0
    n_epochs = 10000
    input_size = 256
    batch_size = 30
    lr = 3e-4
    weight_decay = 5e-5
    num_workers = 8
    ignore_index = 250
    """ Scribble2Label Params """
    thr_epoch = 100
    period_epoch = 5
    thr_conf = 0.8
    alpha = 0.2


if __name__ == '__main__':
    seed_everything(config.seed)

    model = Unet(encoder_name='resnet50', encoder_weights='imagenet', decoder_use_batchnorm=True,
                 decoder_attention_type='scse', classes=2, activation=None)

    df = pd.read_csv(config.df_path)
    train_df = df[df.fold != config.fold].reset_index(drop=True)
    valid_df = df[df.fold == config.fold].reset_index(drop=True)
    transforms = get_transforms(config.input_size, need=('train', 'val'))

    train_dataset = dsbDataset(config.data_dir, config.scr_dir, config.mask_dir, train_df,
                               tfms=transforms['train'], return_id=False)
    valid_dataset = dsbDataset(config.data_dir, config.scr_dir, config.mask_dir, valid_df,
                               tfms=transforms['val'], return_id=True)
    train_loader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, num_workers=config.num_workers,
                              shuffle=True)
Exemple #2
0
    distrib_config = {}
    distrib_config['LOCAL_RANK'] = args.local_rank

    root_dir = Path(train_config['DIRECTORY']['ROOT_DIRECTORY'])
    data_dir = Path(train_config['DIRECTORY']['DATA_DIRECTORY'])
    log_dir = root_dir / train_config['DIRECTORY']['LOGGER_DIRECTORY']
    log_dir.mkdir(exist_ok=True, parents=True)

    if distrib_config['LOCAL_RANK'] == 0:
        main_logger = init_logger(log_dir, 'train_main.log')

    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.deterministic = True
    SEED = train_config['SEED']
    seed_everything(SEED)
    if distrib_config['LOCAL_RANK'] == 0:
        main_logger.info(train_config)

    # if "DEVICE_LIST" in train_config:
    #     os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, train_config["DEVICE_LIST"]))

    # if len(train_config['DEVICE_LIST']) > 1:
    #     distrib_config['DISTRIBUTED'] = True
    #     torch.cuda.set_device(distrib_config['LOCAL_RANK'])
    #     torch.distributed.init_process_group(backend='nccl',
    #                                          init_method='env://')
    #     distrib_config['WORLD_SIZE'] = torch.distributed.get_world_size()
    #     train_config['OPTIMIZER']['ARGS']['lr'] = train_config['OPTIMIZER']['ARGS']['lr'] * float(
    #         train_config['BATCH_SIZE'] * distrib_config['WORLD_SIZE']) / 256
    # else:
    if config.cat_emb_dim > 0:  # cat_emb_dim이 0보다 크면 모델에서 cate col 사용
        input_cols = cate_cols + input_cols
    if config.use_autoencoder:
        input_cols = input_cols + ae_cols

    if config.normalize:
        pass  # TODO; add normalization

    is_cuda = config.device == 'cuda'

    oof_preds = []
    oof_targets = []
    scores = []

    seed = config.seed[0]  # use one seed
    seed_everything(seed)

    for search_iter in range(NUM_SEARCH):
        # change
        rp(config)

        for fold in [0]:
            # for fold in range(config.n_folds):
            print(f'[[ ITER {search_iter} Fold {fold} ]]')
            fold_train = train_features[
                train_features.fold != fold].reset_index(drop=True)
            fold_valid = train_features[train_features.fold ==
                                        fold].reset_index(drop=True)

            input_features = []
            if config.cat_emb_dim > 0:  # cat_emb_dim이 0보다 크면 모델에서 cate col 사용; 항상 제일 앞에 와야함