def predict(learn: Learner, name: str): # submission.csv preds, _ = learn.get_preds(ds_type=DatasetType.Test) test['has_cactus'] = preds.numpy()[:, 0] test.to_csv('submission_{}.csv'.format(name), index=False) print('Finish creating submission_{}.csv'.format(name)) # loss.csv id_ = range(len(learn.recorder.losses)) loss_df = pd.DataFrame({ 'id': id_, 'loss': np.array(learn.recorder.losses) }) loss_df.to_csv('loss_{}.csv'.format(name), index=False) print('Finish creating loss_{}.csv'.format(name)) # Calculate some metrics on the training set preds, targets = learn.get_preds(ds_type=DatasetType.Train) preds_label = np.argmax(preds.numpy(), axis=1) id_ = range(len(preds)) train_pred_df = pd.DataFrame({ 'id': id_, 'preds': preds.numpy()[:, 0], 'preds_label': preds_label, 'targets': targets.numpy() }) train_pred_df.to_csv('./train_pred_{}.csv'.format(name)) print('Finish creating train_pred_{}.csv'.format(name)) correct_count = np.equal(preds_label, targets.numpy()).sum() len_preds = len(preds) incorrect_count = len_preds - correct_count fpr, tpr = metrics.roc_curve(preds[:, 0], targets) fpr, tpr = fpr.numpy(), tpr.numpy() FP = np.floor(fpr * len_preds) FN = incorrect_count - FP TP = np.floor(tpr * len_preds) TN = correct_count - TP id_ = range(len(fpr)) train_index_df = pd.DataFrame({ 'id': id_, 'fpr': fpr, 'tpr': tpr, 'TP': TP, 'TN': TN, 'FP': FP, 'FN': FN }) train_index_df.to_csv('./train_index_{}.csv'.format(name)) print('Finish creating train_index_{}.csv'.format(name)) # Destroy learn and save the model learn.export('./model_{}.pth'.format(name), destroy=True)
def save_preds(input_csv, output_csv): df = pd.read_csv(input_csv) try: df = df[['Study']] except: try: df = df[['Path']] except: raise ValueError('csv has no attribute for path/study.') for lbl in ALL_LBLS: df[lbl] = np.zeros(len(df)) test = ImageDataBunch.from_df( path=folder_path, df=df, folder=chexpert_folder, seed=0, label_col=ALL_LBLS, suffix='', valid_pct=1, ds_tfms=data_tfms, bs=BS, size=IMG_SZ) #.normalize([IMG_MEAN, IMG_STD]) IDs, outputs = test.valid_ds.x.items, [] learn = cnn_learner(test, models.densenet121, model_dir=model_path, pretrained=False) learn.load(model_names[0]) output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True) outputs.append(output) learn.load(model_names[1]) output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True) outputs.append(output) learn = cnn_learner(test, models.resnet152, model_dir=model_path, pretrained=False) learn.load(model_names[2]) output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True) outputs.append(output) learn.load(model_names[3]) output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True) outputs.append(output) model = resnext101_64x4d(pretrained=None) model.last_linear = nn.Sequential(nn.Linear(32768, 2048), nn.ReLU(True), nn.Dropout(), nn.Linear(2048, 14)) learn = Learner(test, model, model_dir=model_path) learn.load(model_names[4]) output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True) outputs.append(output) learn = cnn_learner(test, models.vgg19_bn, model_dir=model_path, pretrained=False) learn.load(model_names[5]) output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True) outputs.append(output) learn.load(model_names[6]) output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True) outputs.append(output) learn = cnn_learner(test, models.densenet121, model_dir=model_path, pretrained=False) learn.load(model_names[7]) output, y, _ = learn.get_preds(ds_type=DatasetType.Valid, with_loss=True) outputs.append(output) output = ensemble_method(outputs, mode='avg') if torch.cuda.is_available(): output = output.cpu() output = output.numpy() df = pd.DataFrame({ 'Path': IDs, EVAL_LBLS[0]: output[:, 1], EVAL_LBLS[1]: output[:, 2], EVAL_LBLS[2]: output[:, 3], EVAL_LBLS[3]: output[:, 4], EVAL_LBLS[4]: output[:, 5] }) df.to_csv(output_csv, index=False) print('submission saved.')