def load_train_image_and_annot(dataset_dir, train_annot_dir): max_attempts = 60 attempts = 0 while attempts < max_attempts: attempts += 1 # file systems are unpredictable. # We may have problems reading the file. # try-catch to avoid this. # (just try again) try: # This might take ages, profile and optimize fnames = ls(train_annot_dir) fnames = [a for a in fnames if is_photo(a)] fname = random.sample(fnames, 1)[0] annot_path = os.path.join(train_annot_dir, fname) image_path_part = os.path.join(dataset_dir, os.path.splitext(fname)[0]) # it's possible the image has a different extenstion # so use glob to get it image_path = glob.glob(image_path_part + '.*')[0] image = load_image(image_path) annot = imread(annot_path).astype(bool) assert np.sum(annot) > 0 assert image.shape[2] == 3 # should be RGB # also return fname for debugging purposes. return image, annot, fname except Exception as e: # This could be due to an empty annotation saved by the user. # Which happens rarely due to deleting all labels in an # existing annotation and is not a problem. # give it some time and try again. time.sleep(0.1) if attempts == max_attempts: raise Exception('Could not load annotation and photo')
def check_for_instructions(self): try: for fname in ls(self.instruction_dir): if self.execute_instruction(fname): os.remove(os.path.join(self.instruction_dir, fname)) except Exception as e: print('Exception checking for instruction', e)
def filter_files(self, files, dirs): # Generally we expect lists, but single file or dirnames are handled. if isinstance(files, str): files = [files] if isinstance(dirs, str): dirs = [dirs] # Absolutize paths, and filter files against ``self.skip`` and # ``self.match`` filtered_files = [] filtered_files.extend(self.filter_filelist(files)) for directory in dirs: try: filtered_files.extend( self.filter_filelist(file_utils.ls(directory))) except OSError: if not self.skip_err: raise # Take only the files that belong to this bin. Sort first for # consistency. We'll either bin files based on the hash of the # filename (by relying on t4k's inbin function, or we'll bin by taking # every nth file to be in this bin filtered_files.sort() # if self.use_hash: # is_in_bin = lambda i,f: t4k.inbin(f,self.num_folds, self.fold) # else: is_in_bin = lambda i, f: i % self.num_folds == self.fold filtered_files = [ f for i, f in enumerate(filtered_files) if is_in_bin(i, f) ] return filtered_files
def get_val_metrics(cnn, val_annot_dir, dataset_dir, in_w, out_w, bs): """ Return the TP, FP, TN, FN, defined_sum, duration for the {cnn} on the validation set TODO - This is too similar to the train loop. Merge both and use flags. """ start = time.time() fnames = ls(val_annot_dir) fnames = [a for a in fnames if im_utils.is_photo(a)] cnn.half() # TODO: In order to speed things up, be a bit smarter here # by only segmenting the parts of the image where we have # some annotation defined. # implement a 'partial segment' which exlcudes tiles with no # annotation defined. tps = 0 fps = 0 tns = 0 fns = 0 defined_sum = 0 for fname in fnames: annot_path = os.path.join(val_annot_dir, os.path.splitext(fname)[0] + '.png') # reading the image may throw an exception. # I suspect this is due to it being only partially written to disk # simply retry if this happens. try: annot = imread(annot_path) except Exception as ex: print('Exception reading annotation inside validation method.' 'Will retry in 0.1 seconsds') print(fname, ex) time.sleep(0.1) annot = imread(annot_path) annot = np.array(annot) foreground = annot[:, :, 0].astype(bool).astype(int) background = annot[:, :, 1].astype(bool).astype(int) image_path_part = os.path.join(dataset_dir, os.path.splitext(fname)[0]) image_path = glob.glob(image_path_part + '.*')[0] image = im_utils.load_image(image_path) predicted = unet_segment(cnn, image, bs, in_w, out_w, threshold=0.5) # mask defines which pixels are defined in the annotation. mask = foreground + background mask = mask.astype(bool).astype(int) predicted *= mask predicted = predicted.astype(bool).astype(int) y_defined = mask.reshape(-1) y_pred = predicted.reshape(-1)[y_defined > 0] y_true = foreground.reshape(-1)[y_defined > 0] tps += np.sum(np.logical_and(y_pred == 1, y_true == 1)) tns += np.sum(np.logical_and(y_pred == 0, y_true == 0)) fps += np.sum(np.logical_and(y_pred == 1, y_true == 0)) fns += np.sum(np.logical_and(y_pred == 0, y_true == 1)) defined_sum += np.sum(y_defined > 0) duration = round(time.time() - start, 3) metrics = get_metrics(tps, fps, tns, fns, defined_sum, duration) return metrics
def reset_progress_if_annots_changed(self): train_annot_dir = self.train_config['train_annot_dir'] val_annot_dir = self.train_config['val_annot_dir'] new_annot_mtimes = [] for annot_dir in [train_annot_dir, val_annot_dir]: for fname in ls(annot_dir): fpath = os.path.join(annot_dir, fname) new_annot_mtimes.append(os.path.getmtime(fpath)) new_annot_mtimes = sorted(new_annot_mtimes) if new_annot_mtimes != self.annot_mtimes: print('reset epochs without progress as annotations have changed') self.epochs_without_progress = 0 self.annot_mtimes = new_annot_mtimes
def read(self): self.key_order = [] self.index_lookup = {} self.data = {} i = 0 for fname in ls(self.path, dirs=False): fname = os.path.abspath(fname) if self.verbose: print fname # ensure that files are in expected order, # that none are missing, and that no lines are missing. if fname != self.path_from_int(i): raise PersistentOrderedDictIntegrityException( 'Expected %s but found %s.' % (self.path_from_int(i), fname)) if i > 0: prev_file_path = self.path_from_int(i - 1) num_lines_prev_file = len( self.open(prev_file_path, 'r').readlines()) if num_lines_prev_file != self.lines_per_file: raise PersistentOrderedDictIntegrityException( "PersistentOrderedDict: " "A file on disk appears to be corrupted, because " "it's missing lines: %s " % prev_file_path) i += 1 for entry in self.open(os.path.join(fname)): # skip blank lines (there's always one at end of file) if entry == '': continue key, json_record = entry.split('\t', 1) key = self.UNESCAPE_TAB_PATTERN.sub('\g<prefix>\t', key) key = self.UNESCAPE_SLASH_PATTERN.sub(r'\\', key) key = key.decode('utf8') # remove the newline of the end of json_record, and read it record = json.loads(json_record[:-1]) self.data[key] = record self.key_order.append(key) self.index_lookup[key] = len(self.key_order) - 1
def segment(self, segment_config): """ Segment {file_names} from {dataset_dir} using {model_paths} and save to {seg_dir}. If model paths are not specified then use the latest model in {model_dir}. If no models are in {model_dir} then create a random weights model and use that. TODO: model saving is a counter-intuitve side effect, re-think project creation process to avoid this """ in_dir = segment_config['dataset_dir'] seg_dir = segment_config['seg_dir'] if "file_names" in segment_config: fnames = segment_config['file_names'] else: # default to using all files in the directory if file_names is not specified. fnames = ls(in_dir) # if model paths not specified use latest. if "model_paths" in segment_config: model_paths = segment_config['model_paths'] else: model_dir = segment_config['model_dir'] model_paths = model_utils.get_latest_model_paths(model_dir, 1) # if latest is not found then create a model with random weights # and use that. if not model_paths: create_first_model_with_random_weights(model_dir) model_paths = model_utils.get_latest_model_paths(model_dir, 1) start = time.time() for fname in fnames: self.segment_file(in_dir, seg_dir, fname, model_paths, sync_save=len(fnames) == 1) duration = time.time() - start print(f'Seconds to segment {len(fnames)} images: ', round(duration, 3))
def train_one_epoch(self): train_annot_dir = self.train_config['train_annot_dir'] val_annot_dir = self.train_config['val_annot_dir'] if not [is_photo(a) for a in ls(train_annot_dir)]: return if not [is_photo(a) for a in ls(val_annot_dir)]: return if self.first_loop: self.first_loop = False self.write_message('Training started') self.log('Starting Training') train_loader = DataLoader( self.train_set, self.bs, shuffle=True, # 12 workers is good for performance # on 2 RTX2080 Tis # 0 workers is good for debugging num_workers=12, drop_last=False, pin_memory=True) epoch_start = time.time() self.model.train() tps = 0 fps = 0 tns = 0 fns = 0 defined_total = 0 loss_sum = 0 for step, (photo_tiles, foreground_tiles, defined_tiles) in enumerate(train_loader): self.check_for_instructions() photo_tiles = photo_tiles.cuda() foreground_tiles = foreground_tiles.cuda() defined_tiles = defined_tiles.cuda() self.optimizer.zero_grad() outputs = self.model(photo_tiles) softmaxed = softmax(outputs, 1) # just the foreground probability. foreground_probs = softmaxed[:, 1, :] # remove any of the predictions for which we don't have ground truth # Set outputs to 0 where annotation undefined so that # The network can predict whatever it wants without any penalty. outputs[:, 0] *= defined_tiles outputs[:, 1] *= defined_tiles loss = criterion(outputs, foreground_tiles) loss.backward() self.optimizer.step() foreground_probs *= defined_tiles predicted = foreground_probs > 0.5 # we only want to calculate metrics on the # part of the predictions for which annotations are defined # so remove all predictions and foreground labels where # we didn't have any annotation. defined_list = defined_tiles.view(-1) preds_list = predicted.view(-1)[defined_list > 0] foregrounds_list = foreground_tiles.view(-1)[defined_list > 0] # # calculate all the false positives, false negatives etc tps += torch.sum( (foregrounds_list == 1) * (preds_list == 1)).cpu().numpy() tns += torch.sum( (foregrounds_list == 0) * (preds_list == 0)).cpu().numpy() fps += torch.sum( (foregrounds_list == 0) * (preds_list == 1)).cpu().numpy() fns += torch.sum( (foregrounds_list == 1) * (preds_list == 0)).cpu().numpy() defined_total += torch.sum(defined_list > 0).cpu().numpy() loss_sum += loss.item() # float sys.stdout.write(f"Training {(step+1) * self.bs}/" f"{len(train_loader.dataset)} " f" loss={round(loss.item(), 3)} \r") self.check_for_instructions() # could update training parameter if not self.training: return duration = round(time.time() - epoch_start, 3) print('epoch train duration', duration) self.log_metrics( 'train', get_metrics(tps, fps, tns, fns, defined_total, duration)) before_val_time = time.time() self.validation() print('epoch validation duration', time.time() - before_val_time)
def check_for_instructions(self): for fname in ls(self.instruction_dir): if self.execute_instruction(fname): os.remove(os.path.join(self.instruction_dir, fname))
def get_latest_model_paths(model_dir, k): fnames = ls(model_dir) fnames = sorted(fnames)[-k:] fpaths = [os.path.join(model_dir, f) for f in fnames] return fpaths
def __len__(self): # use at least 612 but when dataset gets bigger start to expand # to prevent validation from taking all the time (relatively) return max(612, len(ls(self.train_annot_dir)) * 2)