def sample(self): if hasattr(self, '_sample'): return self._sample, self._sample_size self._sample = {} self._sample_size = 0 _, n = self.positive positives = self._positive_expanded n = int(n * self.ratio) TT.debug("Collecting", n, "random samples.") pixels_per_image = int(np.prod(self.image_size)) indices = xrange(len(self.files) * pixels_per_image) ignored = 0 for index in random.sample(indices, n): data_file, label_file = self.files[index / pixels_per_image] if data_file not in self._sample: self._sample[data_file] = [] pixel = index % pixels_per_image p = 0.0 if data_file in positives and pixel in positives[data_file]: p = 1.0 ignored += 1 col, row = pixel_at_index(pixel, self.image_size) self._sample[data_file].append([col, row, p]) self._sample_size += 1 TT.debug(ignored, "samples out of", self._sample_size, "random samples are positive.") self.positive_in_sample = ignored return self.sample
def on_dataset_epoch_end(self, epoch, logs={}): numpy.savetxt(open(self.log_file, 'a'), [[self.old_epochs + self.epoch, self.loss]], fmt="%g") if self.last_loss > self.loss: filename = self.weights_file % (self.old_epochs + epoch) TT.debug("Saving weights to", filename) self.model.save_weights(filename) self.last_loss = min(self.last_loss, self.loss)
def icpr2012(): """ Filter dataset and labels. :return: """ TT.debug("Loading configurations for ICPR 2012.") def filename_filter(name): """ Filter dataset files. Return True if file is in dataset. :type name: str :rtype: bool """ return re.compile(r'.+\.bmp').search(name) def mapper(name): """ Map dataset with its labels. :type name: str :rtype: str """ return re.compile(r'\.[a-z]+$').sub('.csv', name) return filename_filter, mapper
def dump(self): _ = self.data # Create data if not already created. TT.debug("Current dataset has", self._dataset_size, "images.", self._positive_size, "positive and", self._sample_size, "negative.") json.dump({'data': self._dataset, 'size': self._dataset_size, 'positive_size': self._positive_size + self.positive_in_sample, 'sample_size': self._sample_size - self.positive_in_sample}, open(self.dataset_store_path, 'w'))
def files(self): if not hasattr(self, '_files'): self._files = list_all_files(self.root_path, filename_filter=self.filename_filter, mapper=self.label_mapper) TT.debug("Found", len(self._files), "matching files in", self.root_path) return self._files
def __init__(self, root_path, patch_size=(101, 101), verbose=False, ratio=1.0, name='dataset', mapper=None, filename_filter=None, rotation=True): TT.debug("Dataset root path set to:", root_path) self.name = name self.patch_size = patch_size self.ratio = ratio self.root_path = os.path.abspath(root_path) self.verbose = verbose self.label_mapper = mapper self.filename_filter = filename_filter self.rotation = rotation
def task_train_filter(args): ff, mapper = getattr(__import__('dataset'), args.dataset)() dataset = Dataset(root_path=args.path, verbose=args.verbose, name='base-model', mapper=mapper, filename_filter=ff, rotation=False) dataset_batches = BatchGenerator(dataset, args.batch) from mitosis import model_base TT.debug("Compile base model.") model = model_base(args.lr) model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy') if os.path.exists(model_saved_weights_path): TT.info("Loading weights from %s" % model_saved_weights_path) model.load_weights(model_saved_weights_path) train_start = time.time() log = LearnLog("filter", args.path) for epoch in xrange(args.epoch): TT.debug(epoch + 1, "of", args.epoch, "epochs") log.on_dataset_epoch_begin(epoch + 1) for x, y in dataset_batches: model.fit(x, y, batch_size=args.mini_batch, nb_epoch=1, validation_split=.1, callbacks=[log], show_accuracy=True, shuffle=True) log.on_dataset_epoch_end(epoch + 1) log.on_dataset_train_end() TT.success("Training finished in %.2f hours." % ((time.time() - train_start) / 3600.))
def load(self): if os.path.exists(self.dataset_store_path): TT.debug("Loading dataset from", self.dataset_store_path) data = json.load(open(self.dataset_store_path)) self._dataset = data['data'] self._dataset_size = data['size'] self._positive = {} self._positive_size = data['positive_size'] self._sample = {} self._sample_size = data['sample_size'] self.positive_in_sample = 0 TT.debug("Current dataset has", self._dataset_size, "images.", self._positive_size, "positive and", self._sample_size, "negative.") return True return False
def __iter__(self): data = Queue(self.MAX_NUM) def append(dst, pool, item): if item is not None: pool.append(item) if len(pool) < min(self.pool_size, self.batch_size): return dst, pool if dst is None: return np.asarray(pool, dtype=np.float64), [] if len(pool): return np.concatenate((dst, pool)), [] return dst, [] def produce(): i = 1 count = 0 data_x = data_y = None pool_x = [] pool_y = [] for x, y in self.dataset: data_x, pool_x = append(data_x, pool_x, x) data_y, pool_y = append(data_y, pool_y, (y, 1 - y)) count += 1 if count >= self.batch_size: data_x, pool_x = append(data_x, pool_x, None) data_y, pool_y = append(data_y, pool_y, None) data.put([data_x, data_y]) i += 1 count = 0 data_x = data_y = None if count > 0: data_x, pool_x = append(data_x, pool_x, None) data_y, pool_y = append(data_y, pool_y, None) data.put([data_x, data_y]) start_new_thread(produce, ()) i = 1 while i <= self.n: start = time.clock() X, Y = data.get() if self.verbose: TT.debug("batch", i, "of", self.n, "completed in", time.clock() - start, "seconds. This batch has", int(np.sum(Y[:, 0])), "positive pixels and", int(np.sum(Y[:, 1])), "negative pixels.") yield X, Y i += 1
def positive(self): if hasattr(self, '_positive'): return self._positive, self._positive_size TT.debug("Collecting positive samples.") self._positive = {} self._positive_size = 0 self._positive_expanded = {} for data_file, label_file in self.files: labels = load_csv(os.path.join(self.root_path, label_file)) self._positive[data_file] = labels self._positive_size += len(labels) self._positive_expanded[data_file] = {} for col, row, p in labels: self._positive_expanded[data_file][index_at_pixel(col=col, row=row, size=self.image_size)] = p TT.debug("Found", self._positive_size, "positive samples.") return self.positive
def data(self): if hasattr(self, '_dataset'): return self._dataset, self._dataset_size if self.load(): return self.data TT.debug("Creating new dataset.") pos, pos_c = self.positive sam, sam_c = self.sample for filename in pos: if filename not in sam: sam[filename] = pos[filename] else: sam[filename] += pos[filename] self._dataset = sam self._dataset_size = sam_c + pos_c self.dump() return self.data
def main(): parser, args = parse_args() TT.verbose = args.verbose if args.task == 'train-filter': TT.debug("Running: Task Train Filter") task_train_filter(args) if args.task == 'train-cnn': TT.debug("Running: Task Train CNN") task_train_cnn(args) elif args.task == 'test-filter': TT.debug("Running: Task Test Filter") task_test_filter(args) elif args.task == 'test-cnn': TT.debug("Running: Task Test CNN") task_test_cnn(args) else: parser.print_help() exit(0)
def task_test_filter(args): dataset = ImageIterator(args.input, args.output) dataset_batches = BatchGenerator(dataset, args.batch) from mitosis import model_base TT.debug("Compile base model.") model = model_base(args.lr) model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy') TT.info("Loading weights from %s" % model_saved_weights_path) model.load_weights(model_saved_weights_path) test_start = time.time() out = None for x, y in dataset_batches: tmp = model.predict(x, args.mini_batch, args.verbose) out = np_append(out, tmp) width, height = dataset.image_size out = numpy.reshape(out[:, 0], (height, width)) numpy.save(change_ext(args.input, 'predicted.npy'), out) numpy.save(change_ext(args.input, 'expected.npy'), dataset.output) TT.success("Testing finished in %.2f minutes." % ((time.time() - test_start) / 60.))
def on_dataset_train_end(self, logs={}): filename = self.weights_file % 0 TT.debug("Saving weights to", filename) self.model.save_weights(filename)
def task_test_cnn(args): dataset = ImageIterator(args.input, args.output) dataset_batches = BatchGenerator(dataset, args.batch) from mitosis import model_base, model_1, model_2 TT.debug("Compile base model.") model = model_base(0) TT.debug("Compile model 1.") model1 = model_1(0) TT.debug("Compile model 2.") model2 = model_2(0) model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy') model1_saved_weights_path = os.path.join(args.path, 'model1.weights.npy') model2_saved_weights_path = os.path.join(args.path, 'model2.weights.npy') TT.info("Loading weights from %s" % model_saved_weights_path) model.load_weights(model_saved_weights_path) TT.info("Loading weights from %s" % model1_saved_weights_path) model1.load_weights(model1_saved_weights_path) TT.info("Loading weights from %s" % model2_saved_weights_path) model2.load_weights(model2_saved_weights_path) test_start = time.time() out = out1 = out2 = None for x, y in dataset_batches: tmp = model.predict(x, args.mini_batch, args.verbose) local1 = numpy.zeros(tmp.shape) local2 = numpy.zeros(tmp.shape) out = np_append(out, tmp) x = 1. - x x_new = [] indices = [] for i in range(len(tmp)): if tmp[i][0] > .6: x_new.append(x[i]) indices.append(i) x_new = numpy.asarray(x_new) if len(x_new): tmp1 = model1.predict(x_new, args.mini_batch, args.verbose) local1[indices] = tmp1 out1 = np_append(out1, local1) if len(x_new): tmp2 = model2.predict(x_new, args.mini_batch, args.verbose) local2[indices] = tmp2 out2 = np_append(out2, local2) width, height = dataset.image_size out = numpy.reshape(out[:, 0], (height, width)) out1 = numpy.reshape(out1[:, 0], (height, width)) out2 = numpy.reshape(out2[:, 0], (height, width)) numpy.save(change_ext(args.input, 'predicted.npy'), out) numpy.save(change_ext(args.input, 'model1.predicted.npy'), out1) numpy.save(change_ext(args.input, 'model2.predicted.npy'), out2) numpy.save(change_ext(args.input, 'expected.npy'), dataset.output) TT.success("Testing finished in %.2f minutes." % ((time.time() - test_start) / 60.))
def task_train_cnn(args): ff, mapper = getattr(__import__('dataset'), args.dataset)() dataset = Dataset(root_path=args.path, verbose=args.verbose, name='cnn', mapper=mapper, filename_filter=ff, ratio=9) dataset_batches = BatchGenerator(dataset, args.batch) from mitosis import model_base, model_1, model_2 TT.debug("Compile base model.") model = model_base(lr=0) TT.debug("Compile model 1.") model1 = model_1(args.lr) TT.debug("Compile model 2.") model2 = model_2(args.lr) model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy') model1_saved_weights_path = os.path.join(args.path, 'model1.weights.npy') model2_saved_weights_path = os.path.join(args.path, 'model2.weights.npy') if os.path.exists(model_saved_weights_path): TT.info("Loading weights from %s" % model_saved_weights_path) model.load_weights(model_saved_weights_path) if os.path.exists(model1_saved_weights_path): TT.info("Loading weights from %s" % model1_saved_weights_path) model1.load_weights(model1_saved_weights_path) if os.path.exists(model2_saved_weights_path): TT.info("Loading weights from %s" % model2_saved_weights_path) model2.load_weights(model2_saved_weights_path) train_start = time.time() log1 = LearnLog("model1", args.path) log2 = LearnLog("model2", args.path) for epoch in xrange(args.epoch): TT.debug(epoch + 1, "of", args.epoch, "epochs") log1.on_dataset_epoch_begin(epoch + 1) log2.on_dataset_epoch_begin(epoch + 1) for x, y in dataset_batches: outputs = model.predict(x, batch_size=args.mini_batch, verbose=args.verbose) # Multiply each window with it's prediction and then pass it to the next layer # x_new = [] # y_new = [] x = 1. - x for i in range(len(outputs)): if y[i][0] < 1.: # x_new.append(x[i]) # y_new.append(y[i]) x[i] *= outputs[i][0] TT.debug("Model 1 on epoch %d" % (epoch + 1)) model1.fit(numpy.asarray(x), numpy.asarray(y), batch_size=args.mini_batch, nb_epoch=1, validation_split=.1, callbacks=[log1], show_accuracy=True, shuffle=True) TT.debug("Model 2 on epoch %d" % (epoch + 1)) model2.fit(numpy.asarray(x), numpy.asarray(y), batch_size=args.mini_batch, nb_epoch=1, validation_split=.1, callbacks=[log2], show_accuracy=True, shuffle=True) log1.on_dataset_epoch_end(epoch + 1) log2.on_dataset_epoch_end(epoch + 1) log1.on_dataset_train_end() log2.on_dataset_train_end() TT.success("Training finished in %.2f hours." % ((time.time() - train_start) / 3600.))