def main(): with timer('load data'): df = pd.read_csv(TEST_PATH) df[['ID', 'Image', 'Diagnosis']] = df['ID'].str.split('_', expand=True) df = df[['Image', 'Diagnosis', 'Label']] df.drop_duplicates(inplace=True) df = df.pivot(index='Image', columns='Diagnosis', values='Label').reset_index() df['Image'] = 'ID_' + df['Image'] df = df[["Image"]] ids = df["Image"].values gc.collect() with timer('preprocessing'): test_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), Resize(img_size, img_size, p=1) ]) test_dataset = RSNADatasetTest(df, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, black_crop=False, subdural_window=True, conc_type="concat_all", conc_type2="concat_prepost", n_tta=N_TTA) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True) del df, test_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") model.load_state_dict(torch.load(model_path)) model.to(device) model = torch.nn.DataParallel(model) with timer('predict'): pred = predict(model, test_loader, device, n_tta=N_TTA) pred = np.clip(pred, 1e-6, 1 - 1e-6) with timer('sub'): sub = pd.DataFrame(pred, columns=TARGET_COLUMNS) sub["ID"] = ids sub = sub.set_index("ID") sub = sub.unstack().reset_index() sub["ID"] = sub["ID"] + "_" + sub["level_0"] sub = sub.rename(columns={0: "Label"}) sub = sub.drop("level_0", axis=1) LOGGER.info(sub.head()) sub.to_csv("../output/{}_sub_st2.csv".format(EXP_ID), index=False)
def main(): with timer('load data'): df = pd.read_csv(TRAIN_PATH)[:10] df = df[df.Image != "ID_6431af929"].reset_index(drop=True) df.loc[df.pre_SOPInstanceUID == "ID_6431af929", "pre1_SOPInstanceUID"] = df.loc[df.pre_SOPInstanceUID == "ID_6431af929", "Image"] df.loc[df.post_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] = df.loc[df.post_SOPInstanceUID == "ID_6431af929", "Image"] df = df[["Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID"]] ids = df["Image"].values gc.collect() with timer('preprocessing'): test_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), Resize(img_size, img_size, p=1) ]) test_dataset = RSNADatasetTest(df, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, black_crop=False, three_window=True, rescaling=False, n_tta=N_TTA) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True) del df, test_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") model.load_state_dict(torch.load(model_path)) model.to(device) model = torch.nn.DataParallel(model) with timer('predict'): pred = predict(model, test_loader, device, n_tta=N_TTA) pred = np.clip(pred, 1e-6, 1 - 1e-6) with timer('sub'): sub = pd.DataFrame(pred, columns=TARGET_COLUMNS) sub["ID"] = ids sub = sub.set_index("ID") sub = sub.unstack().reset_index() sub["ID"] = sub["ID"] + "_" + sub["level_0"] sub = sub.rename(columns={0: "Label"}) sub = sub.drop("level_0", axis=1) LOGGER.info(sub.head()) sub.to_csv("../output/{}_train.csv".format(EXP_ID), index=False)
def predict_loss(prev, paths, split_df): # prev: true value # paths: paths to the model weights t_preds = [] for tm in range(3): tf.reset_default_graph() t_preds.append( predict(paths[-1:], build_hparams(hparams.params_s32), back_offset=0, predict_window=288, n_models=3, target_model=tm, seed=2, batch_size=50, asgd=True, split_df=split_df)) preds = sum(t_preds) / 3 preds.index = [idx.decode('ascii') for idx in preds.index] # mean mae res = 0 for idx in preds.index: res += np.abs(preds.loc[idx, :] - prev.loc[idx, -288:]).sum() res /= len(preds.index) * 288 return preds, res
def predict_loss(paths): # paths: paths to the model weights t_preds = [] for tm in range(3): tf.reset_default_graph() t_preds.append(predict(paths[-1:], build_hparams(hparams.params_s32), n_models=3, target_model=tm, seed=5, batch_size=50, asgd=True)) preds=sum(t_preds) /3 return preds
def main(): with timer('load data'): df = pd.read_csv(TEST_PATH) df["post_SOPInstanceUID"] = df["post_SOPInstanceUID"].fillna( df["SOPInstanceUID"]) df["pre_SOPInstanceUID"] = df["pre_SOPInstanceUID"].fillna( df["SOPInstanceUID"]) df = df[["Image", "pre_SOPInstanceUID", "post_SOPInstanceUID"]] ids = df["Image"].values pre_ids = df["pre_SOPInstanceUID"].values pos_ids = df["post_SOPInstanceUID"].values gc.collect() with timer('preprocessing'): test_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), Resize(img_size, img_size, p=1) ]) test_dataset = RSNADatasetTest(df, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, black_crop=False, subdural_window=True, n_tta=N_TTA) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True) del df, test_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") model.load_state_dict(torch.load(model_path)) model.to(device) model = torch.nn.DataParallel(model) with timer('predict'): pred = predict(model, test_loader, device, n_tta=N_TTA) pred = np.clip(pred, 1e-6, 1 - 1e-6) with timer('sub'): sub = pd.DataFrame(pred, columns=TARGET_COLUMNS) sub["ID"] = ids sub["PRE_ID"] = pre_ids sub["POST_ID"] = pos_ids sub = postprocess_multitarget(sub) LOGGER.info(sub.head()) sub.to_csv("../output/{}_sub_st2.csv".format(EXP_ID), index=False)
def predict(self, text_x): if not hasattr(self, 'dataset'): raise ValueError( 'Cannnot call predict or evaluate in untrained model. Train First!' ) self.dataset.clear_test_set() for _x in text_x: self.dataset.add_data_entry(_x.tolist(), 0, part='test') return predict( model=self.model, iterator_function=self.dataset.get_next_test_batch, _batch_count=self.dataset.initialize_test_batches(), cuda_device=0 if self.cuda else -1, )
def index(): learning_gif_url = get_gif_url("learning") # Get search query if request.method == "POST": user_input = request.form.get("user_input") # If no query, user_input is 'You're awesome!' else: user_input = "You're awesome!" print("user_input = ", user_input) # Predict and get sentiment text sentiment = predict(user_input, cv, log_model) return render_template("index.html", user_input=user_input, sentiment=sentiment, learning_gif_url=learning_gif_url)
def predict_loss(prev, paths): # prev: true value # paths: paths to the model weights t_preds = [] for tm in range(3): tf.reset_default_graph() t_preds.append( predict(paths[-1:], build_hparams(hparams.params_s32), n_models=3, target_model=tm, seed=5, batch_size=50, asgd=True)) preds = sum(t_preds) / 3 # mean mae res = 0 for idx in preds.index: res += np.abs(preds[idx] - prev[idx]) / prev[idx] res /= 72 return preds, res
def main(): with timer('load data'): df = pd.read_csv(TRAIN_PATH) df["loc_x"] = df["loc_x"] / 100 df["loc_y"] = df["loc_y"] / 100 y = df[TARGET_COLUMNS].values df = df[[ID_COLUMNS]] gc.collect() with timer("split data"): folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0).split(df, y) for n_fold, (train_index, val_index) in enumerate(folds): train_df = df.loc[train_index] val_df = df.loc[val_index] y_train = y[train_index] y_val = y[val_index] if n_fold == fold_id: break with timer('preprocessing'): train_augmentation = Compose([ HorizontalFlip(p=0.5), OneOf([ ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(p=0.5), OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5) ], p=0.5), RandomBrightnessContrast(p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), Resize(img_size, img_size, p=1) ]) val_augmentation = Compose([ Resize(img_size, img_size, p=1) ]) train_dataset = KDDataset(train_df, y_train, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=train_augmentation) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True) val_dataset = KDDataset(val_df, y_val, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True) del df, train_dataset, val_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="../input/pytorch-pretrained-models/se_resnext50_32x4d-a260b3a4.pth", pool_type="avg") if model_path is not None: model.load_state_dict(torch.load(model_path)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4) # model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) with timer('train'): best_score = 0 for epoch in range(1, epochs + 1): seed_torch(SEED + epoch) if epoch == epochs - 3: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.1 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, N_CLASSES) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) y_pred, target, val_loss = validate(model, val_loader, criterion, device, N_CLASSES) score = roc_auc_score(target, y_pred) LOGGER.info('Mean val loss: {}'.format(round(val_loss, 5))) LOGGER.info('val score: {}'.format(round(score, 5))) if score > best_score: best_score = score np.save("y_pred.npy", y_pred) torch.save(model.state_dict(), save_path) np.save("target.npy", target) with timer('predict'): test_df = pd.read_csv(TEST_PATH) test_ids = test_df["id"].values test_augmentation = Compose([ Resize(img_size, img_size, p=1) ]) test_dataset = KDDatasetTest(test_df, img_size, TEST_IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, n_tta=2) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True) model.load_state_dict(torch.load(save_path)) pred = predict(model, test_loader, device, N_CLASSES, n_tta=2) print(pred.shape) results = pd.DataFrame({"id": test_ids, "is_star": pred.reshape(-1)}) results.to_csv("results.csv", index=False)
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam") model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() with timer('predict'): valid_loss, y_pred, y_true, cls = predict( model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scores = [] for i, (th, remove_mask_pixel) in enumerate(zip(ths, remove_pixels)): sum_val_preds = np.sum( y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1) cls_ = cls[:, i] best = 0 for th_cls in np.linspace(0, 1, 101): val_preds_ = copy.deepcopy(y_pred[:, i, :, :]) val_preds_[sum_val_preds < remove_mask_pixel] = 0 val_preds_[cls_ <= th_cls] = 0 scores = [] for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_): score = dice(y_val_, y_pred_ > 0.5) if np.isnan(score): scores.append(1) else: scores.append(score) if np.mean(scores) >= best: best = np.mean(scores) best_th = th_cls else: break LOGGER.info('dice={} on {}'.format(best, best_th)) scores.append(best) LOGGER.info('holdout dice={}'.format(np.mean(scores)))
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) if N_CLASSES == 3: df.drop("EncodedPixels_2", axis=1, inplace=True) df = df.rename(columns={"EncodedPixels_3": "EncodedPixels_2"}) df = df.rename(columns={"EncodedPixels_4": "EncodedPixels_3"}) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model.load_state_dict(torch.load(base_model)) model.to(device) model.eval() criterion = torch.nn.BCEWithLogitsLoss() with timer('predict'): valid_loss, y_pred, y_true, cls = predict(model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scores = [] all_scores = [] min_sizes = [300, 0, 600, 1600] for i in range(N_CLASSES): if i == 1: continue best = 0 count = 0 min_size = min_sizes[i] for th in [0.7+i*0.01 for i in range(30)]: val_preds_ = copy.deepcopy(y_pred[:, i, :, :]) scores_ = [] all_scores_ = [] for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_): y_pred_ = post_process(y_pred_ > 0.5, y_pred_, min_size, th) score = dice(y_val_, y_pred_) if np.isnan(score): scores_.append(1) else: scores_.append(score) LOGGER.info('dice={} on {}'.format(np.mean(scores_), th)) if np.mean(scores_) >= best: best = np.mean(scores_) count = 0 else: count += 1 if count == 3: break scores.append(best) all_scores.append(all_scores_) LOGGER.info('holdout dice={}'.format(np.mean(scores)))
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) y1 = (df.EncodedPixels_1 != "-1").astype("float32").values.reshape(-1, 1) y2 = (df.EncodedPixels_2 != "-1").astype("float32").values.reshape(-1, 1) y3 = (df.EncodedPixels_3 != "-1").astype("float32").values.reshape(-1, 1) y4 = (df.EncodedPixels_4 != "-1").astype("float32").values.reshape(-1, 1) y = np.concatenate([y1, y2, y3, y4], axis=1) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] y_val = y[df.fold_id == FOLD_ID] val_dataset = SeverCLSDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, y_val, id_colname=ID_COLUMNS, transforms=None) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8, pin_memory=True) models = [] model = cls_models.SEResNext(num_classes=N_CLASSES) model.load_state_dict(torch.load(base_model_cls)) model.to(device) model.eval() models.append(model) """model = cls_models.ResNet(num_classes=N_CLASSES, pretrained=None) model.load_state_dict(torch.load(base_model_res)) model.to(device) model.eval() models.append(model)""" with timer('predict cls'): criterion = torch.nn.BCEWithLogitsLoss() valid_loss, y_val, y_true = predict_cls(models, val_loader, criterion, device) #y_val = np.load("../exp_cls/y_pred_ema_ckpt8.npy") LOGGER.info("val loss={}".format(valid_loss)) with timer('preprocessing seg'): val_augmentation = None val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation, class_y=y_val) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True) model.load_state_dict(torch.load(base_model)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() with timer('predict'): valid_loss, y_pred, y_true, cls = predict(model, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scores = [] for i, (th, remove_mask_pixel) in enumerate(zip(ths, remove_pixels)): if i <= 1: continue sum_val_preds = np.sum(y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1) cls_ = cls[:, i] best = 0 for th_cls in np.linspace(0, 1, 101): val_preds_ = copy.deepcopy(y_pred[:, i, :, :]) val_preds_[sum_val_preds < remove_mask_pixel] = 0 val_preds_[cls_ <= th_cls] = 0 scores = [] for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_): score = dice(y_val_, y_pred_ > 0.5) if np.isnan(score): scores.append(1) else: scores.append(score) if np.mean(scores) >= best: best = np.mean(scores) best_th = th_cls #else: # break LOGGER.info('dice={} on {}'.format(best, best_th)) scores.append(best) LOGGER.info('holdout dice={}'.format(np.mean(scores)))
# embed_size=len(dct), # device=device) # model = Bi_RNN_ATTN(class_num=len(CATEGIRY_LIST), # embed_size=len(dct), # embed_dim=64, # device=device) lr = 0.001 optimizer = optim.Adam(model.parameters(), lr=lr) # train logger.info('training...') history = trainer.train(model, optimizer, train_dl, valid_dl, device=device, epochs=5) # evaluate loss, acc = trainer.evaluate(model, valid_dl, device=device) # predict logger.info('predicting...') y_pred = trainer.predict(model, test_dl, device=device) y_true = test_ds.labels test_acc = (y_true == y_pred).sum() / y_pred.shape[0] logger.info('test - acc: {}'.format(test_acc))
raw_smape = smape(true, pred) masked_smape = np.ma.array(raw_smape, mask=np.isnan(raw_smape)) return masked_smape.mean() from make_features import read_all df_all = read_all() df_all.columns prev = df_all paths = [p for p in tf.train.get_checkpoint_state('data/cpt/s32').all_model_checkpoint_paths] t_preds = [] for tm in range(3): tf.reset_default_graph() t_preds.append(predict(paths, build_hparams(hparams.params_s32), back_offset=0, predict_window=63, n_models=3, target_model=tm, seed=2, batch_size=2048, asgd=True)) preds=sum(t_preds) /3 missing_pages = prev.index.difference(preds.index) # Use zeros for missing pages rmdf = pd.DataFrame(index=missing_pages, data=np.tile(0, (len(preds.columns),len(missing_pages))).T, columns=preds.columns) f_preds = preds.append(rmdf).sort_index() # Use zero for negative predictions f_preds[f_preds < 0.5] = 0 # Rouns predictions to nearest int f_preds = np.round(f_preds).astype(np.int64)
import argparse from trainer import train, evaluate, predict if __name__ == '__main__': parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('mode', help='train, eval or predict') args = parser.parse_args() if args.mode == "train": train() if args.mode == "eval": evaluate() if args.mode == "predict": predict()
def main(seed): with timer('load data'): df = pd.read_csv(FOLD_PATH) with timer('preprocessing'): val_df = df[df.fold_id == FOLD_ID] val_augmentation = None val_dataset = SeverDataset(val_df, IMG_DIR, IMG_SIZE, N_CLASSES, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=8) del val_df, df, val_dataset gc.collect() with timer('create model'): models = [] for p in base_model_res: model = smp.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION, attention_type="cbam", center=True) model.load_state_dict(torch.load(p)) model.to(device) model.eval() models.append(model) model = smp_old.Unet('resnet34', encoder_weights="imagenet", classes=N_CLASSES, encoder_se_module=True, decoder_semodule=True, h_columns=False, skip=True, act="swish", freeze_bn=True, classification=CLASSIFICATION) model.load_state_dict(torch.load(base_model_res_old)) model.to(device) model.eval() models.append(model) criterion = torch.nn.BCEWithLogitsLoss() with timer('predict'): valid_loss, y_pred, y_true, cls = predict( models, val_loader, criterion, device, classification=CLASSIFICATION) LOGGER.info('Mean valid loss: {}'.format(round(valid_loss, 5))) scores = [] all_scores = [] for i, th in enumerate(ths): sum_val_preds = np.sum( y_pred[:, i, :, :].reshape(len(y_pred), -1) > th, axis=1) best = 0 for n_th, remove_mask_pixel in enumerate( [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800]): val_preds_ = copy.deepcopy(y_pred[:, i, :, :]) val_preds_[sum_val_preds < remove_mask_pixel] = 0 scores_ = [] all_scores_ = [] for y_val_, y_pred_ in zip(y_true[:, i, :, :], val_preds_): score = dice(y_val_, y_pred_ > 0.5) if np.isnan(score): scores_.append(1) else: scores_.append(score) LOGGER.info('dice={} on {}'.format(np.mean(scores_), remove_mask_pixel)) if np.mean(scores_) >= best: best = np.mean(scores_) all_scores_.append(np.mean(scores_)) scores.append(np.mean(scores_)) all_scores.append(all_scores_) LOGGER.info('holdout dice={}'.format(np.mean(scores))) np.save("all_scores_fold{}.npy".format(FOLD_ID), np.array(all_scores))