Esempio n. 1
0
def get_learner(model_path, model_file, test_path, test_file):
    """
    Loads the model learner from given model and test path and file.

    :param model_path: Path to dir where .pkl file is located.
    :param model_file: If multiple .pkl files are located in the same path, provide the exact model file name.
    :param test_path: Path to dir where test data is located
    :param test_file: Preprocessed test_labels.csv file, as was done in preprocess.py. It eases the fetching of ImageList.
    :return: The model learner.
    """
    learn = load_learner(model_path, file=model_file, test=ImageList.from_csv(test_path, test_file, folder='test'))
    return learn
Esempio n. 2
0
    def getdata(self, bs=32, num_workers=16, noise=True, blur=True, basic=True):
        """Returns the dataloader to be used during training.

        The returned data is normalized and the image are resized to 224x224px.

        Parameters
        ----------
        bs : int, optional
            Batch size, by default 32
        num_workers : int, optional
            Num of process used for fetching data, by default 16
        noise : bool, optional
            Whether to add noisy patches as augmentation, by default True
        blur : bool, optional
            Whether to add blur augmentation, by default True
        basic : bool, optional
            Whether to do basic augmentation like rotation, flipping, etc.
            , by default True

        Returns
        -------
        dataloader
            Dataloader with random sampling enabled.
        """
        print("Going through the data..")

        filenames = ["test", "val", "train"]
        filenames = [self.root / (x + ".txt") for x in filenames]
        with open(self.root / "list.txt", "w") as fout:
            fin = fileinput.input(filenames)
            for line in fin:
                fout.write(line)
            fin.close()
        self.data = (
            (
                ImageList.from_csv(
                    path=self.root, folder="images", csv_name="list.txt", delimiter=" "
                )
            )
            .split_by_idx(list(range(22169)))
            .label_from_df()
            .transform(self.transforms(noise, blur, basic), size=224)
            .databunch(bs=bs, num_workers=num_workers)
        ).normalize()
        return self.data
Esempio n. 3
0
    def __init__(self, learn_name, tta, exp_name):
        """Logs test info to 'saved/test_info.csv' after initialization.

        Args:
            learn_name (str): Name of the saved Learner file,
                loads from f'saved/{learn_name}.pkl'
            tta (boolean): Whether to perform test time augmentation.
            exp_name (str): Experiment name for logging.
        """
        self.exp_name = exp_name

        # Initialize test ImageList
        test_imgs = ImageList.from_csv(path=DATA_DIR,
                                       folder=TEST_FOLDER,
                                       csv_name=TEST_DF_NAME,
                                       cols=IMG_COL)

        # Initialize Learner from test data
        self.learn = load_learner(
            path=SAVED_DIR,
            file=f'{learn_name}.pkl',
            test=test_imgs,
        )

        # Get classes list
        self.classes = self.learn.data.classes

        # Initialize ground truth labels
        self._init_labels()

        # Get probability scores from model
        if tta:
            self.y_prob, _ = self.learn.TTA(ds_type=DatasetType.Test)
        else:
            self.y_prob, _ = self.learn.get_preds(ds_type=DatasetType.Test)

        # Extract predicted labels from probability scores
        self.y_pred = np.argmax(self.y_prob, axis=1)

        # Compute metrics
        self._init_metrics()

        # Log test info
        self._log_info()
Esempio n. 4
0
    def __init__(self):
        # Read in the training DataFrame
        df = pd.read_csv(os.path.join(DATA_DIR, TRAIN_DF_NAME))
        # Get stratified split indices
        train_idx, val_idx = get_indices_split(df, CLASS_COL, 0.2)

        # Initialize the augmentation/transformation function.
        self._init_tfms()

        # Initialize the ImageList
        # (source image data and labels before any transformations)
        self.src = (
            ImageList.from_csv(path=DATA_DIR,
                               csv_name=TRAIN_DF_NAME,
                               folder=TRAIN_FOLDER,
                               cols=IMG_COL)
            # Stratified split
            .split_by_idxs(train_idx, val_idx)
            # Get labels
            .label_from_df(CLASS_COL))
Esempio n. 5
0
filenames = train['fname'].values
filenames = filenames.reshape(-1, 1)

oof_preds = np.zeros((len(train), 80))
test_preds = np.zeros((len(test), 80))

tfms = get_transforms(do_flip=False, max_rotate=0, max_lighting=0.1, max_zoom=0, max_warp=0.)
df = pd.read_csv(CSV_TRN_MERGED)
cols = list(df.columns[1:])
i = 0

val_index = range(len(train))

#Our clasifier stuff    
src = (ImageList.from_csv(WORK/'image', Path('../../')/DATA/'train_merged.csv', folder='trn_merged2', suffix='.jpg')
    .split_by_idx(val_index)
    .label_from_df(cols=list(df.columns[1:])))
    #.label_from_df(label_delim=','))

data = (src.transform(tfms, size=128).databunch(bs=64).normalize())

f_score = partial(fbeta, thresh=0.2)

learn = cnn_learner(data, models.xresnet50, pretrained=False, metrics=[f_score]).mixup(stack_y=False)
learn.fit_one_cycle(125, 1e-2)

all_preds = list(custom_tta(learn))

stacked = torch.stack(all_preds)

new_preds = []