def main (_folder, _csv_path_train, _imgs_folder_train, _lr_init, _sched_factor, _sched_min_lr, _sched_patience, _batch_size, _epochs, _early_stop, _weights, _model_name, _pretrained, _save_folder, _csv_path_test, _best_metric, _neurons_reducer_block, _comb_method, _comb_config, _use_meta_data): meta_data_columns = ['age_approx', 'female', 'male', 'anterior torso', 'head/neck', "lateral torso", 'lower extremity', 'oral/genital', 'palms/soles', 'posterior torso', 'upper extremity'] _metric_options = { 'save_all_path': os.path.join(_save_folder, "best_metrics"), 'pred_name_scores': 'predictions_best_test.csv', 'normalize_conf_matrix': True} _checkpoint_best = os.path.join(_save_folder, 'best-checkpoint/best-checkpoint.pth') # Loading the csv file csv_all_folders = pd.read_csv(_csv_path_train) print("-" * 50) print("- Loading validation data...") val_csv_folder = csv_all_folders[ (csv_all_folders['folder'] == _folder) ] train_csv_folder = csv_all_folders[ csv_all_folders['folder'] != _folder ] # Loading validation data val_imgs_id = val_csv_folder['image'].values val_imgs_path = ["{}/{}.jpg".format(_imgs_folder_train, img_id) for img_id in val_imgs_id] val_labels = val_csv_folder['diagnostic_number'].values if _use_meta_data: val_meta_data = val_csv_folder[meta_data_columns].values print("-- Using {} meta-data features".format(len(meta_data_columns))) else: print("-- No metadata") val_meta_data = None val_data_loader = get_data_loader (val_imgs_path, val_labels, val_meta_data, transform=ImgEvalTransform(), batch_size=_batch_size, shuf=True, num_workers=16, pin_memory=True) print("-- Validation partition loaded with {} images".format(len(val_data_loader)*_batch_size)) print("- Loading training data...") train_imgs_id = train_csv_folder['image'].values train_imgs_path = ["{}/{}.jpg".format(_imgs_folder_train, img_id) for img_id in train_imgs_id] train_labels = train_csv_folder['diagnostic_number'].values if _use_meta_data: train_meta_data = train_csv_folder[meta_data_columns].values print("-- Using {} meta-data features".format(len(meta_data_columns))) else: print("-- No metadata") train_meta_data = None train_data_loader = get_data_loader (train_imgs_path, train_labels, train_meta_data, transform=ImgTrainTransform(), batch_size=_batch_size, shuf=True, num_workers=16, pin_memory=True) print("-- Training partition loaded with {} images".format(len(train_data_loader)*_batch_size)) print("-"*50) #################################################################################################################### ser_lab_freq = get_labels_frequency(train_csv_folder, "diagnostic", "image") _labels_name = ser_lab_freq.index.values _freq = ser_lab_freq.values #################################################################################################################### print("- Loading", _model_name) model = set_model(_model_name, len(_labels_name), neurons_reducer_block=_neurons_reducer_block, comb_method=_comb_method, comb_config=_comb_config, pretrained=_pretrained) #################################################################################################################### if _weights == 'frequency': _weights = (_freq.sum() / _freq).round(3) loss_fn = nn.CrossEntropyLoss(weight=torch.Tensor(_weights).cuda()) # optimizer = optim.Adam(model.parameters(), lr=_lr_init) optimizer = optim.SGD(model.parameters(), lr=_lr_init, momentum=0.9, weight_decay=0.001) scheduler_lr = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=_sched_factor, min_lr=_sched_min_lr, patience=_sched_patience) #################################################################################################################### print("- Starting the training phase...") print("-" * 50) fit_model (model, train_data_loader, val_data_loader, optimizer=optimizer, loss_fn=loss_fn, epochs=_epochs, epochs_early_stop=_early_stop, save_folder=_save_folder, initial_model=None, device=None, schedule_lr=scheduler_lr, config_bot=None, model_name="CNN", resume_train=False, history_plot=True, val_metrics=["auc"], best_metric=_best_metric) #################################################################################################################### # Testing the validation partition print("- Evaluating the validation partition...") test_model (model, val_data_loader, checkpoint_path=_checkpoint_best, loss_fn=loss_fn, save_pred=True, partition_name='eval', metrics_to_comp='all', class_names=_labels_name, metrics_options=_metric_options, apply_softmax=True, verbose=False) #################################################################################################################### #################################################################################################################### print("- Loading test data...") csv_test = pd.read_csv(_csv_path_test) test_imgs_id = csv_test['image'].values test_imgs_path = ["{}/{}.jpg".format(_imgs_folder_train, img_id) for img_id in test_imgs_id] test_labels = csv_test['diagnostic_number'].values if _use_meta_data: test_meta_data = csv_test[meta_data_columns].values print("-- Using {} meta-data features".format(len(meta_data_columns))) else: test_meta_data = None print("-- No metadata") _metric_options = { 'save_all_path': os.path.join(_save_folder, "test_pred"), 'pred_name_scores': 'predictions.csv', 'normalize_conf_matrix': True} test_data_loader = get_data_loader(test_imgs_path, test_labels, test_meta_data, transform=ImgEvalTransform(), batch_size=_batch_size, shuf=False, num_workers=16, pin_memory=True) print("-" * 50) # Testing the test partition print("\n- Evaluating the validation partition...") test_model(model, test_data_loader, checkpoint_path=None, metrics_to_comp="all", class_names=_labels_name, metrics_options=_metric_options, save_pred=True, verbose=False)
def main(_folder, _csv_path_train, _imgs_folder_train, _lr_init, _sched_factor, _sched_min_lr, _sched_patience, _batch_size, _epochs, _early_stop, _weights, _model_name, _pretrained, _save_folder, _best_metric): _metric_options = { 'save_all_path': os.path.join(_save_folder, "best_metrics"), 'pred_name_scores': 'predictions_best_test.csv', 'normalize_conf_matrix': True } _checkpoint_best = os.path.join(_save_folder, 'best-checkpoint/best-checkpoint.pth') # Loading the csv file csv_all_folders = pd.read_csv(_csv_path_train) print("-" * 50) print("- Loading validation data...") val_csv_folder = csv_all_folders[(csv_all_folders['folder'] == _folder)] train_csv_folder = csv_all_folders[csv_all_folders['folder'] != _folder] # Loading validation data val_labels = val_csv_folder['target_number'].values val_labels_str = val_csv_folder['target'].values val_imgs_path_ = val_csv_folder['Image_id'].values val_imgs_path = [ "{}/{}/{}".format(_imgs_folder_train, lab, img_id) for img_id, lab in zip(val_imgs_path_, val_labels_str) ] val_meta_data = None val_data_loader = get_data_loader(val_imgs_path, val_labels, val_meta_data, transform=ImgEvalTransform(), batch_size=_batch_size, shuf=True, num_workers=16, pin_memory=True) print("-- Validation partition loaded with {} images".format( len(val_data_loader) * _batch_size)) print("- Loading training data...") train_labels = train_csv_folder['target_number'].values train_labels_str = train_csv_folder['target'].values train_imgs_path_ = train_csv_folder['Image_id'].values train_imgs_path = [ "{}/{}/{}".format(_imgs_folder_train, lab, img_id) for img_id, lab in zip(train_imgs_path_, train_labels_str) ] train_meta_data = None train_data_loader = get_data_loader(train_imgs_path, train_labels, train_meta_data, transform=ImgTrainTransform(), batch_size=_batch_size, shuf=True, num_workers=16, pin_memory=True) print("-- Training partition loaded with {} images".format( len(train_data_loader) * _batch_size)) print("-" * 50) #################################################################################################################### ser_lab_freq = get_labels_frequency(train_csv_folder, "target", "Image_id") _labels_name = ser_lab_freq.index.values _freq = ser_lab_freq.values #################################################################################################################### print("- Loading", _model_name) model = set_model(_model_name, len(_labels_name), pretrained=_pretrained) #################################################################################################################### if _weights == 'frequency': _weights = (_freq.sum() / _freq).round(3) loss_fn = nn.CrossEntropyLoss(weight=torch.Tensor(_weights).cuda()) optimizer = optim.SGD(model.parameters(), lr=_lr_init, momentum=0.9, weight_decay=0.001) scheduler_lr = optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=_sched_factor, min_lr=_sched_min_lr, patience=_sched_patience) #################################################################################################################### print("- Starting the training phase...") print("-" * 50) fit_model(model, train_data_loader, val_data_loader, optimizer=optimizer, loss_fn=loss_fn, epochs=_epochs, epochs_early_stop=_early_stop, save_folder=_save_folder, initial_model=None, device=None, schedule_lr=scheduler_lr, config_bot=None, model_name="CNN", resume_train=False, history_plot=True, val_metrics=["auc"], best_metric=_best_metric) #################################################################################################################### # Testing the validation partition print("- Evaluating the validation partition...") test_model(model, val_data_loader, checkpoint_path=_checkpoint_best, loss_fn=loss_fn, save_pred=True, partition_name='eval', metrics_to_comp='all', class_names=_labels_name, metrics_options=_metric_options, apply_softmax=True, verbose=False)
def main(_folder, _csv_path_train, _imgs_folder_train, _lr_init, _sched_factor, _sched_min_lr, _sched_patience, _batch_size, _epochs, _early_stop, _weights, _model_name, _pretrained, _save_folder, _csv_path_test, _best_metric, _neurons_reducer_block, _comb_method, _comb_config, _use_meta_data, _metric_early_stop): meta_data_columns = [ "smoke_False", "smoke_True", "drink_False", "drink_True", "background_father_POMERANIA", "background_father_GERMANY", "background_father_BRAZIL", "background_father_NETHERLANDS", "background_father_ITALY", "background_father_POLAND", "background_father_UNK", "background_father_PORTUGAL", "background_father_BRASIL", "background_father_CZECH", "background_father_AUSTRIA", "background_father_SPAIN", "background_father_ISRAEL", "background_mother_POMERANIA", "background_mother_ITALY", "background_mother_GERMANY", "background_mother_BRAZIL", "background_mother_UNK", "background_mother_POLAND", "background_mother_NORWAY", "background_mother_PORTUGAL", "background_mother_NETHERLANDS", "background_mother_FRANCE", "background_mother_SPAIN", "age", "pesticide_False", "pesticide_True", "gender_FEMALE", "gender_MALE", "skin_cancer_history_True", "skin_cancer_history_False", "cancer_history_True", "cancer_history_False", "has_piped_water_True", "has_piped_water_False", "has_sewage_system_True", "has_sewage_system_False", "fitspatrick_3.0", "fitspatrick_1.0", "fitspatrick_2.0", "fitspatrick_4.0", "fitspatrick_5.0", "fitspatrick_6.0", "region_ARM", "region_NECK", "region_FACE", "region_HAND", "region_FOREARM", "region_CHEST", "region_NOSE", "region_THIGH", "region_SCALP", "region_EAR", "region_BACK", "region_FOOT", "region_ABDOMEN", "region_LIP", "diameter_1", "diameter_2", "itch_False", "itch_True", "itch_UNK", "grew_False", "grew_True", "grew_UNK", "hurt_False", "hurt_True", "hurt_UNK", "changed_False", "changed_True", "changed_UNK", "bleed_False", "bleed_True", "bleed_UNK", "elevation_False", "elevation_True", "elevation_UNK" ] _metric_options = { 'save_all_path': os.path.join(_save_folder, "best_metrics"), 'pred_name_scores': 'predictions_best_test.csv', 'normalize_conf_matrix': True } _checkpoint_best = os.path.join(_save_folder, 'best-checkpoint/best-checkpoint.pth') # Loading the csv file csv_all_folders = pd.read_csv(_csv_path_train) print("-" * 50) print("- Loading validation data...") val_csv_folder = csv_all_folders[(csv_all_folders['folder'] == _folder)] train_csv_folder = csv_all_folders[csv_all_folders['folder'] != _folder] # Loading validation data val_imgs_id = val_csv_folder['img_id'].values val_imgs_path = [ "{}/{}".format(_imgs_folder_train, img_id) for img_id in val_imgs_id ] val_labels = val_csv_folder['diagnostic_number'].values if _use_meta_data: val_meta_data = val_csv_folder[meta_data_columns].values print("-- Using {} meta-data features".format(len(meta_data_columns))) else: print("-- No metadata") val_meta_data = None val_data_loader = get_data_loader(val_imgs_path, val_labels, val_meta_data, transform=ImgEvalTransform(), batch_size=_batch_size, shuf=True, num_workers=16, pin_memory=True) print("-- Validation partition loaded with {} images".format( len(val_data_loader) * _batch_size)) print("- Loading training data...") train_imgs_id = train_csv_folder['img_id'].values train_imgs_path = [ "{}/{}".format(_imgs_folder_train, img_id) for img_id in train_imgs_id ] train_labels = train_csv_folder['diagnostic_number'].values if _use_meta_data: train_meta_data = train_csv_folder[meta_data_columns].values print("-- Using {} meta-data features".format(len(meta_data_columns))) else: print("-- No metadata") train_meta_data = None train_data_loader = get_data_loader(train_imgs_path, train_labels, train_meta_data, transform=ImgTrainTransform(), batch_size=_batch_size, shuf=True, num_workers=16, pin_memory=True) print("-- Training partition loaded with {} images".format( len(train_data_loader) * _batch_size)) print("-" * 50) #################################################################################################################### ser_lab_freq = get_labels_frequency(train_csv_folder, "diagnostic", "img_id") _labels_name = ser_lab_freq.index.values _freq = ser_lab_freq.values print(ser_lab_freq) #################################################################################################################### print("- Loading", _model_name) model = set_model(_model_name, len(_labels_name), neurons_reducer_block=_neurons_reducer_block, comb_method=_comb_method, comb_config=_comb_config, pretrained=_pretrained) #################################################################################################################### if _weights == 'frequency': _weights = (_freq.sum() / _freq).round(3) loss_fn = nn.CrossEntropyLoss(weight=torch.Tensor(_weights).cuda()) optimizer = optim.SGD(model.parameters(), lr=_lr_init, momentum=0.9, weight_decay=0.001) scheduler_lr = optim.lr_scheduler.ReduceLROnPlateau( optimizer, factor=_sched_factor, min_lr=_sched_min_lr, patience=_sched_patience) #################################################################################################################### print("- Starting the training phase...") print("-" * 50) fit_model(model, train_data_loader, val_data_loader, optimizer=optimizer, loss_fn=loss_fn, epochs=_epochs, epochs_early_stop=_early_stop, save_folder=_save_folder, initial_model=None, metric_early_stop=_metric_early_stop, device=None, schedule_lr=scheduler_lr, config_bot=None, model_name="CNN", resume_train=False, history_plot=True, val_metrics=["balanced_accuracy"], best_metric=_best_metric) #################################################################################################################### # Testing the validation partition print("- Evaluating the validation partition...") test_model(model, val_data_loader, checkpoint_path=_checkpoint_best, loss_fn=loss_fn, save_pred=True, partition_name='eval', metrics_to_comp='all', class_names=_labels_name, metrics_options=_metric_options, apply_softmax=True, verbose=False) #################################################################################################################### #################################################################################################################### print("- Loading test data...") csv_test = pd.read_csv(_csv_path_test) test_imgs_id = csv_test['img_id'].values test_imgs_path = [ "{}/{}".format(_imgs_folder_train, img_id) for img_id in test_imgs_id ] test_labels = csv_test['diagnostic_number'].values if _use_meta_data: test_meta_data = csv_test[meta_data_columns].values print("-- Using {} meta-data features".format(len(meta_data_columns))) else: test_meta_data = None print("-- No metadata") _metric_options = { 'save_all_path': os.path.join(_save_folder, "test_pred"), 'pred_name_scores': 'predictions.csv', 'normalize_conf_matrix': True } test_data_loader = get_data_loader(test_imgs_path, test_labels, test_meta_data, transform=ImgEvalTransform(), batch_size=_batch_size, shuf=False, num_workers=16, pin_memory=True) print("-" * 50) # Testing the test partition print("\n- Evaluating the validation partition...") test_model(model, test_data_loader, checkpoint_path=None, metrics_to_comp="all", class_names=_labels_name, metrics_options=_metric_options, save_pred=True, verbose=False)