def train_random_holdout(config: dict):
    """"""
    psm = configs_train['path_save_model']
    mn = configs_train['model_name']
    ph = configs_train['path_history']
    path_history = ph + mn
    config_dataloader = config['dataloader']
    path_df = config_dataloader['path_df']

    df = dl.transform_df(path_df)
    train_df, valid_df = dl.get_train_valid_df(path_df, valid_size=0.1)
    train_trf = trfs.ImgAugTrainTransform()
    valid_trf = trfs.to_tensor()

    train_data_loader, valid_data_loader = dl.get_train_valid_dataloaders(
        config_dataloader, train_df, valid_df, dl.collate_fn,
        train_trf, valid_trf)

    model = get_model()
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9,
        weight_decay=0.0005)
    lr_scheduler = None
    num_epochs = configs_train['epochs']

    df_history, df_scores_train, df_scores_valid = train_model(train_data_loader,
        valid_data_loader, model, optimizer, num_epochs, lr_scheduler,
        path_save_model)

    df_history.to_csv(path_history + '_history.csv', index=False)
    df_scores_train.to_csv(path_history + '_scores_train.csv', index=False)
    df_scores_valid.to_csv(path_history + '_scores_valid.csv', index=False)
Esempio n. 2
0
def get_train_valid_data_loaders(
    config_data_loader: dict,
    train_df: pd.DataFrame,
    valid_df: pd.DataFrame,
    collate_fn: Callable,
    train_trf: Callable = None,
    valid_trf: Callable = None,
) -> Tuple[DataLoader, DataLoader]:
    """"""
    config_train_loader = config_data_loader['train_loader']
    config_valid_loader = config_data_loader['valid_loader']
    dir_train = config_data_loader['train_dataset']['dir_train']

    train_dataset = SixRayDataset(
        train_df,
        dir_train,
        transforms=train_trf,
        to_tensor=transforms.to_tensor(),
    )
    valid_dataset = SixRayDataset(
        valid_df,
        dir_train,
        transforms=None,
        to_tensor=valid_trf,
    )
    train_data_loader = DataLoader(train_dataset,
                                   collate_fn=collate_fn,
                                   **config_train_loader)
    valid_data_loader = DataLoader(valid_dataset,
                                   collate_fn=collate_fn,
                                   **config_valid_loader)
    return train_data_loader, valid_data_loader
Esempio n. 3
0
def train_random_holdout(config: dict):
    """"""
    cwd = os.getcwd()
    en = config['experiment_name']
    ph = os.path.join(cwd, *config['path_history'], en)
    psm = os.path.join(cwd, *config['path_save_model'], en + '.pth')
    config_dataloader = config['dataloader']
    config_train = config['train']
    path_df = os.path.join(cwd, *config_dataloader['path_df'])

    optimizer_config = config_train['optimizer']
    num_epochs = config_train['epochs']
    batch_size = config_dataloader['train_loader']['batch_size']
    lr_scheduler = None
    hparams = {
        **optimizer_config,
        'epochs': num_epochs,
        'batch_size': batch_size
    }

    make_dir(ph)
    writer = SummaryWriter(ph, comment=en)
    #writer.add_hparams(hparams)
    train_df, valid_df = dl.get_train_valid_df(path_df, valid_size=0.2)
    train_trf = trfs.ImgAugTrainTransform()
    valid_trf = trfs.to_tensor()

    train_data_loader, valid_data_loader = dl.get_train_valid_data_loaders(
        config_dataloader, train_df, valid_df, dl.collate_fn,
        train_trf, valid_trf)

    model = get_model()
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, **optimizer_config)

    df_history, df_scores_train, df_scores_valid = train_model(
        train_data_loader, valid_data_loader, model, optimizer,
        num_epochs, psm, writer, lr_scheduler)

    df_history.iloc[0].to_dict()

    df_history.to_csv(os.path.join(ph, en + '_history.csv'), index=False)
    df_scores_train.to_csv(
        os.path.join(ph, en + '_scores_train.csv'),
        index=False
    )
    df_scores_valid.to_csv(
        os.path.join(ph, en + '_scores_valid.csv'),
        index=False
    )
Esempio n. 4
0
def set_up_evaluation(config: dict, weights_path, output_path):
    cwd = os.getcwd()

    config_dataloader = config['dataloader']
    path_df = os.path.join(cwd, *config_dataloader['path_df'])

    train_df, valid_df = dl.get_train_valid_df(path_df, valid_size=0.1)
    valid_trf = trfs.to_tensor()
    _, valid_data_loader = dl.get_train_valid_data_loaders(config_dataloader,
                                                           train_df,
                                                           valid_df,
                                                           dl.collate_fn,
                                                           valid_trf=valid_trf)

    model = get_model()
    predict_model(valid_data_loader, model, weights_path, output_path)
def train_skfold(config: dict):
    """"""
    configs_train = config['train']
    psm = configs_train['path_save_model']
    mn = configs_train['model_name']
    ph = configs_train['path_history']
    path_history = ph + mn
    config_dataloader = config['dataloader']
    path_df = config_dataloader['path_df']

    df = dl.transform_df(path_df)
    df = dl.split_stratifiedKFolds_bbox_count(df, config_dataloader['n_splits'])
    folds = list(df['fold'].unique())
    train_trf = trfs.ImgAugTrainTransform()
    valid_trf = trfs.to_tensor()

    for i, fold in enumerate(folds):

        print(f"{'_'*30}Training on fold {fold}...{'_'*30}")
        path_save_model = psm + mn + f'_fold{fold}.pth'
        train_df, valid_df = dl.get_train_valid_df_skfold(df, fold)
        train_data_loader, valid_data_loader = dl.get_train_valid_dataloaders(
            config_dataloader, train_df, valid_df, dl.collate_fn,
        train_trf, valid_trf)

        model = get_model()
        params = [p for p in model.parameters() if p.requires_grad]
        optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9,
            weight_decay=0.0005)
        lr_scheduler = None
        num_epochs = configs_train['epochs']

        df_history, df_scores_train, df_scores_valid = train_model(
            train_data_loader, valid_data_loader, model, optimizer, num_epochs,
            lr_scheduler, path_save_model
        )
        df_history.to_csv(path_history + f'fold{fold}_history.csv', index=False)
        df_scores_train.to_csv(path_history + f'fold{fold}_scores_train.csv',
            index=False)
        df_scores_valid.to_csv(path_history + f'fold{fold}_scores_valid.csv',
            index=False)
Esempio n. 6
0
    train_ids = image_ids[:-valid_size]
    valid_ids = image_ids[-valid_size:]

    train_df = df.loc[(df['img_name'].isin(train_ids))].copy()
    valid_df = df.loc[(df['img_name'].isin(valid_ids))].copy()

    return train_df, valid_df


if __name__ == '__main__':

    args = parse_args()
    config = parse_yaml(args.pyaml)
    config_dataloader = config['dataloader']

    path_df = os.path.join(os.getcwd(), *config_dataloader['path_df'])

    train_trf = transforms.ImgAugTrainTransform()
    valid_trf = transforms.to_tensor()

    train_df, valid_df = get_train_valid_df(path_df)

    train_data_loader, valid_data_loader = get_train_valid_data_loaders(
        config_dataloader, train_df, valid_df, collate_fn, train_trf,
        valid_trf)
    print(len(train_data_loader))

    images, targets, image_ids = next(iter(train_data_loader))

    print(f"Length of Train dataset: {len(train_data_loader.dataset)}")