def train_random_holdout(config: dict): """""" psm = configs_train['path_save_model'] mn = configs_train['model_name'] ph = configs_train['path_history'] path_history = ph + mn config_dataloader = config['dataloader'] path_df = config_dataloader['path_df'] df = dl.transform_df(path_df) train_df, valid_df = dl.get_train_valid_df(path_df, valid_size=0.1) train_trf = trfs.ImgAugTrainTransform() valid_trf = trfs.to_tensor() train_data_loader, valid_data_loader = dl.get_train_valid_dataloaders( config_dataloader, train_df, valid_df, dl.collate_fn, train_trf, valid_trf) model = get_model() params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005) lr_scheduler = None num_epochs = configs_train['epochs'] df_history, df_scores_train, df_scores_valid = train_model(train_data_loader, valid_data_loader, model, optimizer, num_epochs, lr_scheduler, path_save_model) df_history.to_csv(path_history + '_history.csv', index=False) df_scores_train.to_csv(path_history + '_scores_train.csv', index=False) df_scores_valid.to_csv(path_history + '_scores_valid.csv', index=False)
def get_train_valid_data_loaders( config_data_loader: dict, train_df: pd.DataFrame, valid_df: pd.DataFrame, collate_fn: Callable, train_trf: Callable = None, valid_trf: Callable = None, ) -> Tuple[DataLoader, DataLoader]: """""" config_train_loader = config_data_loader['train_loader'] config_valid_loader = config_data_loader['valid_loader'] dir_train = config_data_loader['train_dataset']['dir_train'] train_dataset = SixRayDataset( train_df, dir_train, transforms=train_trf, to_tensor=transforms.to_tensor(), ) valid_dataset = SixRayDataset( valid_df, dir_train, transforms=None, to_tensor=valid_trf, ) train_data_loader = DataLoader(train_dataset, collate_fn=collate_fn, **config_train_loader) valid_data_loader = DataLoader(valid_dataset, collate_fn=collate_fn, **config_valid_loader) return train_data_loader, valid_data_loader
def train_random_holdout(config: dict): """""" cwd = os.getcwd() en = config['experiment_name'] ph = os.path.join(cwd, *config['path_history'], en) psm = os.path.join(cwd, *config['path_save_model'], en + '.pth') config_dataloader = config['dataloader'] config_train = config['train'] path_df = os.path.join(cwd, *config_dataloader['path_df']) optimizer_config = config_train['optimizer'] num_epochs = config_train['epochs'] batch_size = config_dataloader['train_loader']['batch_size'] lr_scheduler = None hparams = { **optimizer_config, 'epochs': num_epochs, 'batch_size': batch_size } make_dir(ph) writer = SummaryWriter(ph, comment=en) #writer.add_hparams(hparams) train_df, valid_df = dl.get_train_valid_df(path_df, valid_size=0.2) train_trf = trfs.ImgAugTrainTransform() valid_trf = trfs.to_tensor() train_data_loader, valid_data_loader = dl.get_train_valid_data_loaders( config_dataloader, train_df, valid_df, dl.collate_fn, train_trf, valid_trf) model = get_model() params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, **optimizer_config) df_history, df_scores_train, df_scores_valid = train_model( train_data_loader, valid_data_loader, model, optimizer, num_epochs, psm, writer, lr_scheduler) df_history.iloc[0].to_dict() df_history.to_csv(os.path.join(ph, en + '_history.csv'), index=False) df_scores_train.to_csv( os.path.join(ph, en + '_scores_train.csv'), index=False ) df_scores_valid.to_csv( os.path.join(ph, en + '_scores_valid.csv'), index=False )
def set_up_evaluation(config: dict, weights_path, output_path): cwd = os.getcwd() config_dataloader = config['dataloader'] path_df = os.path.join(cwd, *config_dataloader['path_df']) train_df, valid_df = dl.get_train_valid_df(path_df, valid_size=0.1) valid_trf = trfs.to_tensor() _, valid_data_loader = dl.get_train_valid_data_loaders(config_dataloader, train_df, valid_df, dl.collate_fn, valid_trf=valid_trf) model = get_model() predict_model(valid_data_loader, model, weights_path, output_path)
def train_skfold(config: dict): """""" configs_train = config['train'] psm = configs_train['path_save_model'] mn = configs_train['model_name'] ph = configs_train['path_history'] path_history = ph + mn config_dataloader = config['dataloader'] path_df = config_dataloader['path_df'] df = dl.transform_df(path_df) df = dl.split_stratifiedKFolds_bbox_count(df, config_dataloader['n_splits']) folds = list(df['fold'].unique()) train_trf = trfs.ImgAugTrainTransform() valid_trf = trfs.to_tensor() for i, fold in enumerate(folds): print(f"{'_'*30}Training on fold {fold}...{'_'*30}") path_save_model = psm + mn + f'_fold{fold}.pth' train_df, valid_df = dl.get_train_valid_df_skfold(df, fold) train_data_loader, valid_data_loader = dl.get_train_valid_dataloaders( config_dataloader, train_df, valid_df, dl.collate_fn, train_trf, valid_trf) model = get_model() params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005) lr_scheduler = None num_epochs = configs_train['epochs'] df_history, df_scores_train, df_scores_valid = train_model( train_data_loader, valid_data_loader, model, optimizer, num_epochs, lr_scheduler, path_save_model ) df_history.to_csv(path_history + f'fold{fold}_history.csv', index=False) df_scores_train.to_csv(path_history + f'fold{fold}_scores_train.csv', index=False) df_scores_valid.to_csv(path_history + f'fold{fold}_scores_valid.csv', index=False)
train_ids = image_ids[:-valid_size] valid_ids = image_ids[-valid_size:] train_df = df.loc[(df['img_name'].isin(train_ids))].copy() valid_df = df.loc[(df['img_name'].isin(valid_ids))].copy() return train_df, valid_df if __name__ == '__main__': args = parse_args() config = parse_yaml(args.pyaml) config_dataloader = config['dataloader'] path_df = os.path.join(os.getcwd(), *config_dataloader['path_df']) train_trf = transforms.ImgAugTrainTransform() valid_trf = transforms.to_tensor() train_df, valid_df = get_train_valid_df(path_df) train_data_loader, valid_data_loader = get_train_valid_data_loaders( config_dataloader, train_df, valid_df, collate_fn, train_trf, valid_trf) print(len(train_data_loader)) images, targets, image_ids = next(iter(train_data_loader)) print(f"Length of Train dataset: {len(train_data_loader.dataset)}")