def k_fold_predict(n_splits): overall_pred, overall_pred_add = [], [] model = Resnet34(num_classes=17).cuda() kf = KFold(n_splits=4) for i, (train_index, val_index) in enumerate(kf.split(train_IDs)): logger.info("Fold %d" % (i + 1)) inner_train_IDs = list(np.array(train_IDs)[train_index]) val_IDs = list(np.array(train_IDs)[val_index]) partition = {'inner_train': inner_train_IDs, 'validation': val_IDs} train_ds = PlanetDataset(os.path.join(DATA_DIR, 'train-jpg'), partition['inner_train'], os.path.join(DATA_DIR, 'train_v2.csv'), True) val_ds = PlanetDataset(os.path.join(DATA_DIR, 'train-jpg'), partition['validation'], os.path.join(DATA_DIR, 'train_v2.csv')) train_dl = DataLoader(train_ds, batch_size=batch_size, num_workers=4, pin_memory=True) val_dl = DataLoader(val_ds, batch_size=batch_size, num_workers=4, pin_memory=True) best_model_path = train(model, 0.01, 30, train_dl, val_dl) logger.info("Training complete") best_model = Resnet34(num_classes=17).cuda() load_model(best_model, best_model_path) logger.info("Loading best model") logger.info("Making TTA predictions") tta_pred = make_tta_prediction(best_model, test_dl, test_dl_aug, 4) tta_add_pred = make_tta_prediction(model, test_add_dl, test_add_dl_aug, 4) logger.info("TTA predictions complete") overall_pred.append(tta_pred) overall_pred_add.append(tta_add_pred) overall_pred = np.mean(overall_pred, axis=0) overall_pred_add = np.mean(overall_pred_add, axis=0) return overall_pred, overall_pred_add
def load_model_for_inference(model_checkpoint_path): """ Load model from a checkpoint saved during training, following the recommendations by the PyTorch team. References ---------- https://pytorch.org/tutorials/beginner/saving_loading_models.html#save-load-state-dict-recommended """ model = Resnet34() model.load_state_dict(torch.load(model_checkpoint_path)) # Set up model in eval mode so that batch norm and similar layers are # properly set up () model.eval() return model
import copy import diagnostics from pathlib import Path from datasets import ChestXRayPneumoniaDataset, COVIDChestXRayDataset from models import Resnet34 from trainer import Trainer from sklearn.model_selection import train_test_split, StratifiedKFold batch_size = 64 size = 256 n_splits = 5 # Pretrain with Chest XRay Pneumonia dataset (>5k images) pneumonia_classifier = Resnet34() dataset = ChestXRayPneumoniaDataset(Path('input/chest-xray-pneumonia'), size) train_idx, validation_idx = train_test_split( list(range(len(dataset))), test_size=0.2, stratify=dataset.labels ) trainer = Trainer(pneumonia_classifier, dataset, batch_size, train_idx, validation_idx) trainer.run(max_epochs=2) # Fine tune with COVID-19 Chest XRay dataset (~120 images) dataset = COVIDChestXRayDataset(Path('input/covid_chestxray'), size) print('Executing a {}-fold cross validation'.format(n_splits)) split = 1 skf = StratifiedKFold(n_splits=n_splits) for train_idx, validation_idx in skf.split(dataset.df, dataset.labels): print('===Split #{}==='.format(split)) # Start from the pneumonia classifier
def main(experiment_dir, baseline_epochs=20, finetune_epochs=15, seed=None, batch_size=64, image_size=256, n_splits=None): """ Main training loop Parameters ---------- seed : int or None, optional batch_size : int, optional image_size : int, optional n_splits : int or None, optional If None, the model will be trained on all the available data. Default is None. """ # Create experiment dir and checkpoints dir experiment_dir = Path(experiment_dir) experiment_dir.mkdir(exist_ok=True, parents=True) checkpoints_dir = experiment_dir / "checkpoints" checkpoints_dir.mkdir(exist_ok=True) # Set up root logger logger_path = experiment_dir / "train.log" logger = logging.getLogger() logger.setLevel(logging.INFO) formatter = logging.Formatter( "[%(asctime)s] %(name)s:%(lineno)d %(levelname)s :: %(message)s") file_handler = logging.FileHandler(logger_path) file_handler.setFormatter(formatter) logger.addHandler(file_handler) stream_handler = logging.StreamHandler(sys.stdout) stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) # Get train logger logger = logging.getLogger("defeatcovid19.train") if seed is not None: # Fix seed to improve reproducibility random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True # Pretrain with Chest XRay Pneumonia dataset (>5k images) pneumonia_classifier = Resnet34() dataset = ChestXRayPneumoniaDataset(Path("/data/chest_xray_pneumonia"), image_size) dataset.build() # dataset = NIHCX38Dataset(Path('/data/nih-cx38'), size, balance=True) # dataset.build() train_idx, validation_idx = train_test_split(list(range(len(dataset))), test_size=0.2, stratify=dataset.labels) trainer = Trainer("baseline_classifier", pneumonia_classifier, dataset, batch_size, train_idx, validation_idx, checkpoints_dir) trainer.run(max_epochs=baseline_epochs) # Fine tune with COVID-19 Chest XRay dataset (~120 images) dataset = COVIDChestXRayDataset(Path("/data/covid-chestxray-dataset"), image_size) dataset.build() if n_splits is not None: logger.info(f"Executing a {n_splits}-fold cross validation") kfold_metrics = { "train": { "loss": [], "roc": [], "accuracy": [] }, "val": { "loss": [], "roc": [], "accuracy": [] }, } split = 1 skf = StratifiedKFold(n_splits=n_splits) for train_idx, validation_idx in skf.split(dataset.df, dataset.labels): logger.info("===Split #{}===".format(split)) # Start from the pneumonia classifier classifier = copy.deepcopy(pneumonia_classifier) # Checkpoints per split checkpoints_dir_split = checkpoints_dir / f"split_{split}" checkpoints_dir_split.mkdir(exist_ok=True) trainer = Trainer("covid19_classifier", classifier, dataset, batch_size, train_idx, validation_idx, checkpoints_dir_split) trainer_metrics = trainer.run(max_epochs=finetune_epochs) # Record metrics for the current split for data_split_id, data_split_metrics in trainer_metrics.items(): for metric_id, metric in data_split_metrics.items(): kfold_metrics[data_split_id][metric_id].append(metric) split += 1 # Summarize metrics from all splits and compute mean and std table = BeautifulTable() table.column_headers = ( ["Metric name"] + [f"Split {split_num+1}" for split_num in range(n_splits)] + ["Mean", "Std"]) for data_split_id, data_split_metrics in kfold_metrics.items(): for metric_id, metric in data_split_metrics.items(): metric_vals = kfold_metrics[data_split_id][metric_id] mean_metric = np.mean(metric_vals) std_metric = np.std(metric_vals) table_row = [f"{data_split_id} {metric_id}" ] + metric_vals + [mean_metric, std_metric] table.append_row(table_row) logger.info(f"SUMMARY\n{table}") else: logger.info("Training with a fixed split") # Train / test split for covid data train_idx, validation_idx = train_test_split(list(range(len(dataset))), test_size=0.2, stratify=dataset.labels) # Start from the pneumonia classifier classifier = copy.deepcopy(pneumonia_classifier) trainer = Trainer("covid19_classifier", classifier, dataset, batch_size, train_idx, validation_idx, checkpoints_dir) trainer_metrics = trainer.run(max_epochs=15) # Summarize metrics from training table = BeautifulTable() table.column_headers = ["Metric name", "Metric value"] for data_split_id, data_split_metrics in trainer_metrics.items(): for metric_id, metric in data_split_metrics.items(): table_row = [f"{data_split_id} {metric_id}", metric] table.append_row(table_row) logger.info(f"SUMMARY\n{table}")
from optimizer import lr_scheduler, create_optimizer from logger import setup_logs from helper_functions import save_model from validation import validate import mlflow import argparse # directories DATA_DIR = './data' LOG_DIR = './logs' MODEL_DIR = './models' # training parameters BASE_OPTIMIZER = optim.Adam DIFF_LR_FACTORS = [9, 3, 1] MODEL = Resnet34(num_classes=17).cuda() parser = argparse.ArgumentParser(description='Resnet34 Training') parser.add_argument('--init-lr-0', type=int, default=0.01, help='initial learning rate for group 0 (default: 0.01') parser.add_argument('--lr-decay-epoch', type=int, default=5, help='epoch number before lr decay (default: 5') parser.add_argument('--lr-decay-factor', type=int, default=0.1, help='epoch number before lr decay (default: 0.1') parser.add_argument('--batch-size', type=int,
f1_mavg = sum(f1_list) / len(f1_list) if f1 > best_f1: best_f1 = f1 if f1_mavg > best_f1_mavg: best_f1_mavg = f1_mavg return {'f1': best_f1, 'f1_mavg': best_f1_mavg} import warnings warnings.filterwarnings("ignore") for i in range(4): train_dir = './results233/resnet34.' + str(i) os.makedirs(os.path.join(train_dir, 'checkpoint'), exist_ok=True) model = Resnet34().cuda() criterion = binary_focal_loss() optimizer = adam(model.parameters()) checkpoint = utils_checkpoint.get_initial_checkpoint(train_dir) if checkpoint is not None: last_epoch, step = utils_checkpoint.load_checkpoint( model, optimizer, checkpoint) else: last_epoch, step = -1, -1 dataloaders = {} path = './' train_dataset = HPADataset(path, i, 'train') val_dataset = HPADataset(path, i, 'val') train_sampler = torch.utils.data.WeightedRandomSampler( train_dataset.weights, len(train_dataset.weights))