Ejemplo n.º 1
0
    def __init__(self,
                 class_type,
                 image_dir,
                 label_filepath,
                 split,
                 label2id=None,
                 randomize=False):

        self.class_type = class_type

        self.label2id = label2id
        self.__labels_csv = csv_utils.read(label_filepath)

        self.__metadata_train, self.__metadata_dev, self.__metadata_test = \
            self.__read_folders(image_dir, split, randomize)

        self.train = CustomDataset(self.__metadata_train, self.label2id,
                                   self.__labels_csv)
        self.dev = CustomDataset(self.__metadata_dev, self.label2id,
                                 self.__labels_csv)
        self.test = CustomDataset(self.__metadata_test, self.label2id,
                                  self.__labels_csv)

        print(len(self.train))
        print(len(self.__metadata_train))
        print(len(self.dev))
        print(len(self.__metadata_dev))
        print(len(self.test))
        print(len(self.__metadata_test))

        assert len(self.train) + len(self.dev) + len(self.test) == \
               len(self.__metadata_train) + len(self.__metadata_dev) + len(self.__metadata_test)
Ejemplo n.º 2
0
    def gen_label(self, path, pos_label="mitosis", neg_label="no-mitosis"):
        # assuming path = <root>/<img_id>/<img_name>.png
        # ex path = root/01/0-101.png
        # path = <root>/tiles/12/04/0-600-90.png
        img_id = os.path.dirname(path).replace(self.tiles_dir + '/', '')
        # img_id = 12/04
        try:
            x, y, r = os.path.basename(os.path.splitext(path)[0]).split('-')
        except:
            print(path)
        # x = 0, y = 600, r = 90
        lbl_path = os.path.join(self.label_dir, img_id + ".csv")
        # lbl_path = <root>/labels/12/04.csv
        true_labels = []
        if os.path.isfile(lbl_path):
            true_labels = csv_utils.read(lbl_path, is_headers=False)

        # TODO verify row columns for label file
        labels = [(int(row[0]), int(row[1])) for row in true_labels]
        # labels = [(70, 1782)]
        for label in labels:
            if (int(x) < int(label[0]) <= int(x) + self.tile_size) and \
                    (int(y) < int(label[1]) <= int(y) + self.tile_size):
                return pos_label

        return neg_label
Ejemplo n.º 3
0
    def __init__(self, train_dir, test_dir, train_save_dir, test_save_dir, label_2_id, mu, std, tile_size=299, stride=150,
                 dev_per=20, order=None, randomize=True, transform=None, redo_preprocessing=False):

        self.dir_status_file = ".dir_status"
        self.tile_size = tile_size
        self.stride = stride

        self.randomize = randomize

        self.train_dir = train_dir
        self.test_dir = test_dir

        if transform == 'normalize':
            self.transform = self.__normalize

        self.mu = mu
        self.std = std

        # dict [benign:0, normal:1 ...]
        self.label_2_id = label_2_id
        # get labels ids from csv file
        self.train_lbls = csv_utils.read(os.path.join(train_dir, "labels.csv"))
        self.test_lbls = csv_utils.read(os.path.join(test_dir, "labels.csv"))

        self.dispatch = {
            'tile': self.__tile,
            'normal_stain': self.__normal_stain,
        }

        if order is None:
            order = ['normal_stain', 'tile']

        self.__preprocess_images(train_dir, train_save_dir, order, redo_preprocessing)
        self.__preprocess_images(test_dir, test_save_dir, order, redo_preprocessing)

        self.__create_dataset(train_save_dir, test_save_dir, dev_per)
Ejemplo n.º 4
0
    def get_label(self, path, x, y):
        # assuming path = <root>/<img_id>/<img_name>.png
        # path = <root>/tiles/12/04/0-600-90.png
        img_id = os.path.splitext(path)[0].replace(self.srcdir + '/', '')
        # img_id = 12/04

        # x = 0, y = 600, r = 90
        lbl_path = os.path.join(self.label_dir, img_id + ".csv")
        # lbl_path = <root>/labels/12/04.csv
        true_labels = []
        if os.path.isfile(lbl_path):
            true_labels = csv_utils.read(lbl_path, is_headers=False)

        # TODO verify row columns for label file
        labels = [(int(row[0]), int(row[1])) for row in true_labels]
        # labels = [(70, 1782)]
        for label in labels:
            if (int(x) < int(label[0]) <= int(x) + self.tile_size) and \
                    (int(y) < int(label[1]) <= int(y) + self.tile_size):
                return self.pos_label

        return self.neg_label
Ejemplo n.º 5
0
    def __init__(self,
                 class_type,
                 image_dir,
                 label_filepath,
                 split,
                 label2id=None,
                 transform=None,
                 filter_model=None,
                 filter_percent=100):

        self.class_type = class_type
        self.__labels_csv = csv_utils.read(label_filepath)

        if label2id:
            self.label2id = label2id
            print(self.label2id)
        else:
            unique = list(OrderedSet(self.__all_labels(self.class_type)))
            self.label2id = dict(zip(unique, range(len(unique))))
            print(self.label2id)

        self.no_labels = len(self.label2id.keys())
        self.train, self.dev, self.test = self.gen_triples(image_dir, split, transform, filter_model, filter_percent)