def __init__(self, fold=0, mode="train", image_size=1024, normalized=False): assert mode in ("train", "valid", "test"), mode self.df = pd.read_csv("tables/folds_v6_st2.csv") if mode == "train": self.df = self.df[self.df["fold_id"] != fold] elif mode == "valid": self.df = self.df[self.df["fold_id"] == fold] else: self.df = pd.read_csv("tables/stage_2_sample_submission.csv") self.df[" EncodedPixels"] = ["-1"] * self.df.shape[0] print(self.df.head()) print(f"{mode} {self.df.shape[0]}") self.gb = self.df.groupby("ImageId") self.fnames = list(self.gb.groups.keys()) paths = get_paths() self.paths = paths self.height = image_size self.width = image_size self.image_dir = os.path.join(paths["dataset"]["path"], paths["dataset"]["images_dir"]) self.mask_dir = os.path.join(paths["dataset"]["path"], paths["dataset"]["masks_dir"]) if mode == "test": self.image_dir = os.path.join(paths["dataset"]["path"], paths["dataset"]["test_dir"]) self.augs = False if mode == "train": self.augs = True self.transform = strong_aug() self.cache = False self.norm_transform = Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD) self.normalized = normalized
def check_iter(): batch_size = 8 paths = get_paths()['dataset'] data_params = get_params()['data_params'] df = pd.read_csv('tables/folds_n01.csv') # df = pd.read_csv(os.path.join(paths['path'], paths['sample'])) #df = pd.read_csv('subm/subm1.csv') print(df.head()) train_dataset = CifarDataset('test', data_params, paths, df, valid_transform()) train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=12, drop_last=False) for batch in train_loader: img = batch['image'] y = batch['y'].numpy() img = np.transpose(img.numpy(), (0, 2, 3, 1)) plt.figure(figsize=(25, 35)) for i in range(batch_size): plt.subplot(2, batch_size // 2, i + 1) plt.title(idx2klass.get(y[i])) shw = IMAGENET_STD * img[i] + IMAGENET_MEAN plt.imshow(shw) plt.show()
def main(): args = parse_args() print(args) paths = get_paths() weights_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["weights"]) dumps_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["predictions"]) models = args.models if "+" in models: models = models.split('+') else: models = [models] scores = [] for model in models: for mode in ["test"]: # 'test' if args.fold >= 0: lst = [args.fold] else: lst = list(range(8)) for fold in lst: score = predict_fold(model, fold=fold, mode=mode, out_folder=dumps_dir, weights_dir=weights_dir) scores.append(score) print(scores[:10])
def get_loaders(batch_size=8): paths = get_paths()['dataset'] data_params = get_params()['data_params'] df = pd.read_csv('tables/folds_n01.csv') train_dataset = CifarDataset('train', data_params, paths, df, train_transform()) train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=32, drop_last=False) valid_dataset = CifarDataset('valid', data_params, paths, df, valid_transform()) valid_loader = data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=32, drop_last=False) test_df = pd.read_csv(os.path.join(paths['path'], paths['sample'])) test_dataset = CifarDataset('test', data_params, paths, test_df, valid_transform()) test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=32, drop_last=False) return train_loader, valid_loader, test_loader
def main(): global model_name, n_fold, mask_thresh, min_size_thresh, dilation, dumps_dir model_name = "se154+sx50+sx101" n_fold = 8 mask_thresh = 0.5 min_size_thresh = 1000 dilation = 0 paths = get_paths() dumps_dir = "/mnt/ssd2/dataset/pneumo/predictions/old/sota_predictions" # dst = osp.join(dumps_dir, f"{fold}_{model_name}_valid") df = pd.read_csv("tables/sample_submission.csv") with Pool() as p: rles = p.map(convert_one, df["ImageId"]) df["EncodedPixels"] = rles os.makedirs("subm", exist_ok=True) df.to_csv( f"subm/{model_name}_nf{n_fold}_{mask_thresh}_{min_size_thresh}_{dilation}.csv", index=False) empty = df[df["EncodedPixels"] == " -1"] print(f"empty {empty.shape[0] / df.shape[0]}")
def main_bad(): global model_name, n_fold, mask_thresh, min_size_thresh, dilation, dumps_dir model_name = "se154" n_fold = 2 mask_thresh = 0.4892385788890003 min_size_thresh = 1598 dilation = 1 paths = get_paths() dumps_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["predictions"]) # dst = osp.join(dumps_dir, f"{fold}_{model_name}_valid") y_preds, ids = get_all_data(model_name, n_fold) print(ids[:10]) print(y_preds.shape) # df = pd.DataFrame() df["ImageId"] = ids y_preds_lst = [y_preds[n] for n, sample_id in enumerate(ids)] for n, sample_id in enumerate(ids): cv2.imwrite( f"/mnt/ssd2/dataset/pneumo/predictions/uint8/se154/debug{sample_id}.png", np.uint8(255 * y_preds_lst[n])) with Pool() as p: rles = list( tqdm(p.imap_unordered(convert_mask, y_preds_lst), total=len(y_preds_lst), desc="converting")) df["EncodedPixels"] = rles df.drop_duplicates("ImageId", inplace=True) os.makedirs("subm", exist_ok=True) sample = pd.read_csv("tables/sample_submission.csv") df = df[df["ImageId"].isin(sample["ImageId"])] df.to_csv( f"subm/{model_name}_nf{n_fold}_{mask_thresh}_{min_size_thresh}_{dilation}.csv", index=False) empty = df[df["EncodedPixels"] == " -1"] print(f"empty {empty.shape[0] / df.shape[0]}")
def get_data(model_name="sx101", fold=0): paths = get_paths() dumps_dir = osp.join(paths["dumps"]["path"], paths["dumps"]["predictions"]) dst = osp.join(dumps_dir, model_name, f"{fold}_{model_name}_valid") dataset_valid = SIIMDataset_Unet(mode="valid", fold=fold) vloader = torch.utils.data.DataLoader(dataset_valid, batch_size=1, shuffle=False, num_workers=NCORE) progress_bar = tqdm(enumerate(vloader), total=len(vloader), desc=f"generating masks f{fold}") y_trues, ids = [], [] for i, batch in progress_bar: images, targets, batch_ids = batch y_trues.append(np.array(targets[0, 0] > 0.5)) ids.append(batch_ids[0]) filenames = [osp.join(dst, f"{sample_id}.png") for sample_id in ids] with Pool(NCORE) as p: y_preds = list( tqdm(p.imap_unordered(read_prediction, filenames), total=len(filenames), desc="reading predictions")) scores = [] for yp, yt in zip(y_preds, y_trues): # print(np.amax(yp), np.amin(yp)) scores.append(dice_coef_metric(yp > 0.5, yt)) # print(scores) print(np.mean(scores)) return y_preds, y_trues, scores, ids
import os import cv2 import matplotlib.pyplot as plt import pandas as pd from n01_config import get_paths DATA = get_paths()['dataset'] LABELS = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] def main(): df = pd.read_csv(os.path.join(DATA['path'], DATA['lables_csv'])) print(df.head()) labels = sorted(set(df['label'].tolist())) print(labels) for label in LABELS: tdf = df[df['label'] == label] print(f'{label}: {tdf.shape[0]}') label = LABELS[0] tdf = df[df['label'] == label] tdf.reset_index(inplace=True, drop=True) plt.figure() for i in range(9): plt.subplot(3,3,1+i) filename = f'{tdf.loc[i, "id"]}.png'
import numpy as np from scipy import ndimage from skimage import measure, morphology from tqdm import tqdm from n01_config import get_paths from n03_loss_metric import dice_coef_metric, dice_coef_metric_batch # DEVICE = torch.device("cuda:0") os.environ["MKL_NUM_THREADS"] = "1" os.environ["NUMEXPR_NUM_THREADS"] = "1" os.environ["OMP_NUM_THREADS"] = "1" PATHS = get_paths() PREDICTS = PATHS["dumps"]["predictions"] def get_data_npz(model_name="sx101", fold=0, mode="valid"): if "+" not in model_name: models = [model_name] else: models = model_name.split("+") name_pattern = f"{fold}_{models[0]}_{mode}" filename = osp.join(PREDICTS, models[0], name_pattern, f"{name_pattern}_fp32_d.npz") tfz = np.load(filename) y_preds, ids, gts, disagreements = tfz["outputs"], tfz["ids"], tfz[ "gts"], tfz['disagreements']
rle.append(str(runStart)) rle.append(str(runLength)) runStart = -1 runLength = 0 currentPixel = 0 elif runStart > -1: runLength += 1 lastColor = currentColor currentPixel += 1 return " " + " ".join(rle) if __name__ == "__main__": fold = 5 model_name = 'UnetSEResNext101' paths = get_paths() test_dir = os.path.join(paths["dataset"]["path"], paths["dataset"]["test_dir"]) IMG_SIZE = 1024 # 448 SMALL_OBJ_THRESHOLD = 2000 device = torch.device("cuda:0") model_ft = torch.load(f"outs/{model_name}_fold{fold}_best.pth") dst = 'outs/tmp' dst_dir = osp.join(dst, f'{model_name}_fold{fold}') os.makedirs(dst_dir, exist_ok=True) model_ft.to(device)