n_epochs = 10000 input_size = 256 batch_size = 30 lr = 3e-4 weight_decay = 5e-5 num_workers = 8 ignore_index = 250 """ Scribble2Label Params """ thr_epoch = 100 period_epoch = 5 thr_conf = 0.8 alpha = 0.2 if __name__ == '__main__': seed_everything(config.seed) model = Unet(encoder_name='resnet50', encoder_weights='imagenet', decoder_use_batchnorm=True, decoder_attention_type='scse', classes=2, activation=None) df = pd.read_csv(config.df_path) train_df = df[df.fold != config.fold].reset_index(drop=True) valid_df = df[df.fold == config.fold].reset_index(drop=True) transforms = get_transforms(config.input_size, need=('train', 'val')) train_dataset = dsbDataset(config.data_dir, config.scr_dir, config.mask_dir, train_df, tfms=transforms['train'], return_id=False) valid_dataset = dsbDataset(config.data_dir, config.scr_dir, config.mask_dir, valid_df, tfms=transforms['val'], return_id=True) train_loader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, num_workers=config.num_workers, shuffle=True)
distrib_config = {} distrib_config['LOCAL_RANK'] = args.local_rank root_dir = Path(train_config['DIRECTORY']['ROOT_DIRECTORY']) data_dir = Path(train_config['DIRECTORY']['DATA_DIRECTORY']) log_dir = root_dir / train_config['DIRECTORY']['LOGGER_DIRECTORY'] log_dir.mkdir(exist_ok=True, parents=True) if distrib_config['LOCAL_RANK'] == 0: main_logger = init_logger(log_dir, 'train_main.log') torch.backends.cudnn.benchmark = True torch.backends.cudnn.enabled = True torch.backends.cudnn.deterministic = True SEED = train_config['SEED'] seed_everything(SEED) if distrib_config['LOCAL_RANK'] == 0: main_logger.info(train_config) # if "DEVICE_LIST" in train_config: # os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(map(str, train_config["DEVICE_LIST"])) # if len(train_config['DEVICE_LIST']) > 1: # distrib_config['DISTRIBUTED'] = True # torch.cuda.set_device(distrib_config['LOCAL_RANK']) # torch.distributed.init_process_group(backend='nccl', # init_method='env://') # distrib_config['WORLD_SIZE'] = torch.distributed.get_world_size() # train_config['OPTIMIZER']['ARGS']['lr'] = train_config['OPTIMIZER']['ARGS']['lr'] * float( # train_config['BATCH_SIZE'] * distrib_config['WORLD_SIZE']) / 256 # else:
if config.cat_emb_dim > 0: # cat_emb_dim이 0보다 크면 모델에서 cate col 사용 input_cols = cate_cols + input_cols if config.use_autoencoder: input_cols = input_cols + ae_cols if config.normalize: pass # TODO; add normalization is_cuda = config.device == 'cuda' oof_preds = [] oof_targets = [] scores = [] seed = config.seed[0] # use one seed seed_everything(seed) for search_iter in range(NUM_SEARCH): # change rp(config) for fold in [0]: # for fold in range(config.n_folds): print(f'[[ ITER {search_iter} Fold {fold} ]]') fold_train = train_features[ train_features.fold != fold].reset_index(drop=True) fold_valid = train_features[train_features.fold == fold].reset_index(drop=True) input_features = [] if config.cat_emb_dim > 0: # cat_emb_dim이 0보다 크면 모델에서 cate col 사용; 항상 제일 앞에 와야함