def get_learner(model_path, model_file, test_path, test_file): """ Loads the model learner from given model and test path and file. :param model_path: Path to dir where .pkl file is located. :param model_file: If multiple .pkl files are located in the same path, provide the exact model file name. :param test_path: Path to dir where test data is located :param test_file: Preprocessed test_labels.csv file, as was done in preprocess.py. It eases the fetching of ImageList. :return: The model learner. """ learn = load_learner(model_path, file=model_file, test=ImageList.from_csv(test_path, test_file, folder='test')) return learn
def getdata(self, bs=32, num_workers=16, noise=True, blur=True, basic=True): """Returns the dataloader to be used during training. The returned data is normalized and the image are resized to 224x224px. Parameters ---------- bs : int, optional Batch size, by default 32 num_workers : int, optional Num of process used for fetching data, by default 16 noise : bool, optional Whether to add noisy patches as augmentation, by default True blur : bool, optional Whether to add blur augmentation, by default True basic : bool, optional Whether to do basic augmentation like rotation, flipping, etc. , by default True Returns ------- dataloader Dataloader with random sampling enabled. """ print("Going through the data..") filenames = ["test", "val", "train"] filenames = [self.root / (x + ".txt") for x in filenames] with open(self.root / "list.txt", "w") as fout: fin = fileinput.input(filenames) for line in fin: fout.write(line) fin.close() self.data = ( ( ImageList.from_csv( path=self.root, folder="images", csv_name="list.txt", delimiter=" " ) ) .split_by_idx(list(range(22169))) .label_from_df() .transform(self.transforms(noise, blur, basic), size=224) .databunch(bs=bs, num_workers=num_workers) ).normalize() return self.data
def __init__(self, learn_name, tta, exp_name): """Logs test info to 'saved/test_info.csv' after initialization. Args: learn_name (str): Name of the saved Learner file, loads from f'saved/{learn_name}.pkl' tta (boolean): Whether to perform test time augmentation. exp_name (str): Experiment name for logging. """ self.exp_name = exp_name # Initialize test ImageList test_imgs = ImageList.from_csv(path=DATA_DIR, folder=TEST_FOLDER, csv_name=TEST_DF_NAME, cols=IMG_COL) # Initialize Learner from test data self.learn = load_learner( path=SAVED_DIR, file=f'{learn_name}.pkl', test=test_imgs, ) # Get classes list self.classes = self.learn.data.classes # Initialize ground truth labels self._init_labels() # Get probability scores from model if tta: self.y_prob, _ = self.learn.TTA(ds_type=DatasetType.Test) else: self.y_prob, _ = self.learn.get_preds(ds_type=DatasetType.Test) # Extract predicted labels from probability scores self.y_pred = np.argmax(self.y_prob, axis=1) # Compute metrics self._init_metrics() # Log test info self._log_info()
def __init__(self): # Read in the training DataFrame df = pd.read_csv(os.path.join(DATA_DIR, TRAIN_DF_NAME)) # Get stratified split indices train_idx, val_idx = get_indices_split(df, CLASS_COL, 0.2) # Initialize the augmentation/transformation function. self._init_tfms() # Initialize the ImageList # (source image data and labels before any transformations) self.src = ( ImageList.from_csv(path=DATA_DIR, csv_name=TRAIN_DF_NAME, folder=TRAIN_FOLDER, cols=IMG_COL) # Stratified split .split_by_idxs(train_idx, val_idx) # Get labels .label_from_df(CLASS_COL))
filenames = train['fname'].values filenames = filenames.reshape(-1, 1) oof_preds = np.zeros((len(train), 80)) test_preds = np.zeros((len(test), 80)) tfms = get_transforms(do_flip=False, max_rotate=0, max_lighting=0.1, max_zoom=0, max_warp=0.) df = pd.read_csv(CSV_TRN_MERGED) cols = list(df.columns[1:]) i = 0 val_index = range(len(train)) #Our clasifier stuff src = (ImageList.from_csv(WORK/'image', Path('../../')/DATA/'train_merged.csv', folder='trn_merged2', suffix='.jpg') .split_by_idx(val_index) .label_from_df(cols=list(df.columns[1:]))) #.label_from_df(label_delim=',')) data = (src.transform(tfms, size=128).databunch(bs=64).normalize()) f_score = partial(fbeta, thresh=0.2) learn = cnn_learner(data, models.xresnet50, pretrained=False, metrics=[f_score]).mixup(stack_y=False) learn.fit_one_cycle(125, 1e-2) all_preds = list(custom_tta(learn)) stacked = torch.stack(all_preds) new_preds = []