Example #1
0
def load_train_image_and_annot(dataset_dir, train_annot_dir):
    max_attempts = 60
    attempts = 0
    while attempts < max_attempts:
        attempts += 1
        # file systems are unpredictable.
        # We may have problems reading the file.
        # try-catch to avoid this.
        # (just try again)
        try:
            # This might take ages, profile and optimize
            fnames = ls(train_annot_dir)
            fnames = [a for a in fnames if is_photo(a)]
            fname = random.sample(fnames, 1)[0]
            annot_path = os.path.join(train_annot_dir, fname)
            image_path_part = os.path.join(dataset_dir,
                                           os.path.splitext(fname)[0])
            # it's possible the image has a different extenstion
            # so use glob to get it
            image_path = glob.glob(image_path_part + '.*')[0]
            image = load_image(image_path)
            annot = imread(annot_path).astype(bool)
            assert np.sum(annot) > 0
            assert image.shape[2] == 3 # should be RGB
            # also return fname for debugging purposes.
            return image, annot, fname
        except Exception as e:
            # This could be due to an empty annotation saved by the user.
            # Which happens rarely due to deleting all labels in an 
            # existing annotation and is not a problem.
            # give it some time and try again.
            time.sleep(0.1)
    if attempts == max_attempts:
        raise Exception('Could not load annotation and photo')
Example #2
0
 def check_for_instructions(self):
     try:
         for fname in ls(self.instruction_dir):
             if self.execute_instruction(fname):
                 os.remove(os.path.join(self.instruction_dir, fname))
     except Exception as e:
         print('Exception checking for instruction', e)
Example #3
0
    def filter_files(self, files, dirs):

        # Generally we expect lists, but single file or dirnames are handled.
        if isinstance(files, str):
            files = [files]
        if isinstance(dirs, str):
            dirs = [dirs]

        # Absolutize paths, and filter files against ``self.skip`` and
        # ``self.match``
        filtered_files = []
        filtered_files.extend(self.filter_filelist(files))
        for directory in dirs:
            try:
                filtered_files.extend(
                    self.filter_filelist(file_utils.ls(directory)))
            except OSError:
                if not self.skip_err:
                    raise

        # Take only the files that belong to this bin.  Sort first for
        # consistency.  We'll either bin files based on the hash of the
        # filename (by relying on t4k's inbin function, or we'll bin by taking
        # every nth file to be in this bin
        filtered_files.sort()
        # if self.use_hash:
        #     is_in_bin = lambda i,f: t4k.inbin(f,self.num_folds, self.fold)
        # else:
        is_in_bin = lambda i, f: i % self.num_folds == self.fold
        filtered_files = [
            f for i, f in enumerate(filtered_files) if is_in_bin(i, f)
        ]

        return filtered_files
Example #4
0
def get_val_metrics(cnn, val_annot_dir, dataset_dir, in_w, out_w, bs):
    """
    Return the TP, FP, TN, FN, defined_sum, duration
    for the {cnn} on the validation set

    TODO - This is too similar to the train loop. Merge both and use flags.
    """
    start = time.time()
    fnames = ls(val_annot_dir)
    fnames = [a for a in fnames if im_utils.is_photo(a)]
    cnn.half()
    # TODO: In order to speed things up, be a bit smarter here
    # by only segmenting the parts of the image where we have
    # some annotation defined.
    # implement a 'partial segment' which exlcudes tiles with no
    # annotation defined.
    tps = 0
    fps = 0
    tns = 0
    fns = 0
    defined_sum = 0
    for fname in fnames:
        annot_path = os.path.join(val_annot_dir,
                                  os.path.splitext(fname)[0] + '.png')
        # reading the image may throw an exception.
        # I suspect this is due to it being only partially written to disk
        # simply retry if this happens.
        try:
            annot = imread(annot_path)
        except Exception as ex:
            print('Exception reading annotation inside validation method.'
                  'Will retry in 0.1 seconsds')
            print(fname, ex)
            time.sleep(0.1)
            annot = imread(annot_path)

        annot = np.array(annot)
        foreground = annot[:, :, 0].astype(bool).astype(int)
        background = annot[:, :, 1].astype(bool).astype(int)
        image_path_part = os.path.join(dataset_dir, os.path.splitext(fname)[0])
        image_path = glob.glob(image_path_part + '.*')[0]
        image = im_utils.load_image(image_path)
        predicted = unet_segment(cnn, image, bs, in_w, out_w, threshold=0.5)
        # mask defines which pixels are defined in the annotation.
        mask = foreground + background
        mask = mask.astype(bool).astype(int)
        predicted *= mask
        predicted = predicted.astype(bool).astype(int)
        y_defined = mask.reshape(-1)
        y_pred = predicted.reshape(-1)[y_defined > 0]
        y_true = foreground.reshape(-1)[y_defined > 0]
        tps += np.sum(np.logical_and(y_pred == 1, y_true == 1))
        tns += np.sum(np.logical_and(y_pred == 0, y_true == 0))
        fps += np.sum(np.logical_and(y_pred == 1, y_true == 0))
        fns += np.sum(np.logical_and(y_pred == 0, y_true == 1))
        defined_sum += np.sum(y_defined > 0)
    duration = round(time.time() - start, 3)
    metrics = get_metrics(tps, fps, tns, fns, defined_sum, duration)
    return metrics
Example #5
0
 def reset_progress_if_annots_changed(self):
     train_annot_dir = self.train_config['train_annot_dir']
     val_annot_dir = self.train_config['val_annot_dir']
     new_annot_mtimes = []
     for annot_dir in [train_annot_dir, val_annot_dir]:
         for fname in ls(annot_dir):
             fpath = os.path.join(annot_dir, fname)
             new_annot_mtimes.append(os.path.getmtime(fpath))
     new_annot_mtimes = sorted(new_annot_mtimes)
     if new_annot_mtimes != self.annot_mtimes:
         print('reset epochs without progress as annotations have changed')
         self.epochs_without_progress = 0
     self.annot_mtimes = new_annot_mtimes
    def read(self):

        self.key_order = []
        self.index_lookup = {}
        self.data = {}

        i = 0
        for fname in ls(self.path, dirs=False):

            fname = os.path.abspath(fname)

            if self.verbose:
                print fname

            # ensure that files are in expected order,
            # that none are missing, and that no lines are missing.
            if fname != self.path_from_int(i):
                raise PersistentOrderedDictIntegrityException(
                    'Expected %s but found %s.' %
                    (self.path_from_int(i), fname))

            if i > 0:
                prev_file_path = self.path_from_int(i - 1)
                num_lines_prev_file = len(
                    self.open(prev_file_path, 'r').readlines())
                if num_lines_prev_file != self.lines_per_file:
                    raise PersistentOrderedDictIntegrityException(
                        "PersistentOrderedDict: "
                        "A file on disk appears to be corrupted, because "
                        "it's missing lines: %s " % prev_file_path)

            i += 1

            for entry in self.open(os.path.join(fname)):

                # skip blank lines (there's always one at end of file)
                if entry == '':
                    continue

                key, json_record = entry.split('\t', 1)
                key = self.UNESCAPE_TAB_PATTERN.sub('\g<prefix>\t', key)
                key = self.UNESCAPE_SLASH_PATTERN.sub(r'\\', key)
                key = key.decode('utf8')

                # remove the newline of the end of json_record, and read it
                record = json.loads(json_record[:-1])
                self.data[key] = record
                self.key_order.append(key)
                self.index_lookup[key] = len(self.key_order) - 1
Example #7
0
    def segment(self, segment_config):
        """
        Segment {file_names} from {dataset_dir} using {model_paths}
        and save to {seg_dir}.

        If model paths are not specified then use
        the latest model in {model_dir}.

        If no models are in {model_dir} then create a
        random weights model and use that.

        TODO: model saving is a counter-intuitve side effect,
        re-think project creation process to avoid this
        """
        in_dir = segment_config['dataset_dir']
        seg_dir = segment_config['seg_dir']
        if "file_names" in segment_config:
            fnames = segment_config['file_names']
        else:
            # default to using all files in the directory if file_names is not specified.
            fnames = ls(in_dir)

        # if model paths not specified use latest.
        if "model_paths" in segment_config:
            model_paths = segment_config['model_paths']
        else:
            model_dir = segment_config['model_dir']
            model_paths = model_utils.get_latest_model_paths(model_dir, 1)
            # if latest is not found then create a model with random weights
            # and use that.
            if not model_paths:
                create_first_model_with_random_weights(model_dir)
                model_paths = model_utils.get_latest_model_paths(model_dir, 1)
        start = time.time()
        for fname in fnames:
            self.segment_file(in_dir,
                              seg_dir,
                              fname,
                              model_paths,
                              sync_save=len(fnames) == 1)
        duration = time.time() - start
        print(f'Seconds to segment {len(fnames)} images: ', round(duration, 3))
Example #8
0
    def train_one_epoch(self):
        train_annot_dir = self.train_config['train_annot_dir']
        val_annot_dir = self.train_config['val_annot_dir']
        if not [is_photo(a) for a in ls(train_annot_dir)]:
            return
        if not [is_photo(a) for a in ls(val_annot_dir)]:
            return

        if self.first_loop:
            self.first_loop = False
            self.write_message('Training started')
            self.log('Starting Training')

        train_loader = DataLoader(
            self.train_set,
            self.bs,
            shuffle=True,
            # 12 workers is good for performance
            # on 2 RTX2080 Tis
            # 0 workers is good for debugging
            num_workers=12,
            drop_last=False,
            pin_memory=True)
        epoch_start = time.time()
        self.model.train()
        tps = 0
        fps = 0
        tns = 0
        fns = 0
        defined_total = 0
        loss_sum = 0
        for step, (photo_tiles, foreground_tiles,
                   defined_tiles) in enumerate(train_loader):

            self.check_for_instructions()
            photo_tiles = photo_tiles.cuda()
            foreground_tiles = foreground_tiles.cuda()
            defined_tiles = defined_tiles.cuda()
            self.optimizer.zero_grad()
            outputs = self.model(photo_tiles)
            softmaxed = softmax(outputs, 1)
            # just the foreground probability.
            foreground_probs = softmaxed[:, 1, :]
            # remove any of the predictions for which we don't have ground truth
            # Set outputs to 0 where annotation undefined so that
            # The network can predict whatever it wants without any penalty.
            outputs[:, 0] *= defined_tiles
            outputs[:, 1] *= defined_tiles
            loss = criterion(outputs, foreground_tiles)
            loss.backward()
            self.optimizer.step()
            foreground_probs *= defined_tiles
            predicted = foreground_probs > 0.5

            # we only want to calculate metrics on the
            # part of the predictions for which annotations are defined
            # so remove all predictions and foreground labels where
            # we didn't have any annotation.

            defined_list = defined_tiles.view(-1)
            preds_list = predicted.view(-1)[defined_list > 0]
            foregrounds_list = foreground_tiles.view(-1)[defined_list > 0]

            # # calculate all the false positives, false negatives etc
            tps += torch.sum(
                (foregrounds_list == 1) * (preds_list == 1)).cpu().numpy()
            tns += torch.sum(
                (foregrounds_list == 0) * (preds_list == 0)).cpu().numpy()
            fps += torch.sum(
                (foregrounds_list == 0) * (preds_list == 1)).cpu().numpy()
            fns += torch.sum(
                (foregrounds_list == 1) * (preds_list == 0)).cpu().numpy()
            defined_total += torch.sum(defined_list > 0).cpu().numpy()
            loss_sum += loss.item()  # float
            sys.stdout.write(f"Training {(step+1) * self.bs}/"
                             f"{len(train_loader.dataset)} "
                             f" loss={round(loss.item(), 3)} \r")
            self.check_for_instructions()  # could update training parameter
            if not self.training:
                return

        duration = round(time.time() - epoch_start, 3)
        print('epoch train duration', duration)
        self.log_metrics(
            'train', get_metrics(tps, fps, tns, fns, defined_total, duration))
        before_val_time = time.time()
        self.validation()
        print('epoch validation duration', time.time() - before_val_time)
Example #9
0
 def check_for_instructions(self):
     for fname in ls(self.instruction_dir):
         if self.execute_instruction(fname):
             os.remove(os.path.join(self.instruction_dir, fname))
Example #10
0
def get_latest_model_paths(model_dir, k):
    fnames = ls(model_dir)
    fnames = sorted(fnames)[-k:]
    fpaths = [os.path.join(model_dir, f) for f in fnames]
    return fpaths
Example #11
0
 def __len__(self):
     # use at least 612 but when dataset gets bigger start to expand
     # to prevent validation from taking all the time (relatively)
     return max(612, len(ls(self.train_annot_dir)) * 2)