Ejemplo n.º 1
0
    def sample(self):
        if hasattr(self, '_sample'):
            return self._sample, self._sample_size

        self._sample = {}
        self._sample_size = 0
        _, n = self.positive
        positives = self._positive_expanded
        n = int(n * self.ratio)
        TT.debug("Collecting", n, "random samples.")
        pixels_per_image = int(np.prod(self.image_size))
        indices = xrange(len(self.files) * pixels_per_image)
        ignored = 0
        for index in random.sample(indices, n):
            data_file, label_file = self.files[index / pixels_per_image]
            if data_file not in self._sample:
                self._sample[data_file] = []
            pixel = index % pixels_per_image
            p = 0.0
            if data_file in positives and pixel in positives[data_file]:
                p = 1.0
                ignored += 1
            col, row = pixel_at_index(pixel, self.image_size)
            self._sample[data_file].append([col, row, p])
            self._sample_size += 1
        TT.debug(ignored, "samples out of", self._sample_size,
                 "random samples are positive.")
        self.positive_in_sample = ignored
        return self.sample
Ejemplo n.º 2
0
 def on_dataset_epoch_end(self, epoch, logs={}):
     numpy.savetxt(open(self.log_file, 'a'), [[self.old_epochs + self.epoch, self.loss]], fmt="%g")
     if self.last_loss > self.loss:
         filename = self.weights_file % (self.old_epochs + epoch)
         TT.debug("Saving weights to", filename)
         self.model.save_weights(filename)
     self.last_loss = min(self.last_loss, self.loss)
Ejemplo n.º 3
0
def icpr2012():
    """
    Filter dataset and labels.
    :return:
    """
    TT.debug("Loading configurations for ICPR 2012.")

    def filename_filter(name):
        """
        Filter dataset files.
        Return True if file is in dataset.
        :type name: str
        :rtype: bool
        """
        return re.compile(r'.+\.bmp').search(name)

    def mapper(name):
        """
        Map dataset with its labels.
        :type name: str
        :rtype: str
        """
        return re.compile(r'\.[a-z]+$').sub('.csv', name)

    return filename_filter, mapper
Ejemplo n.º 4
0
 def dump(self):
     _ = self.data  # Create data if not already created.
     TT.debug("Current dataset has", self._dataset_size, "images.",
              self._positive_size, "positive and", self._sample_size, "negative.")
     json.dump({'data': self._dataset, 'size': self._dataset_size,
                'positive_size': self._positive_size + self.positive_in_sample,
                'sample_size': self._sample_size - self.positive_in_sample},
               open(self.dataset_store_path, 'w'))
Ejemplo n.º 5
0
 def files(self):
     if not hasattr(self, '_files'):
         self._files = list_all_files(self.root_path,
                                      filename_filter=self.filename_filter,
                                      mapper=self.label_mapper)
         TT.debug("Found", len(self._files), "matching files in",
                  self.root_path)
     return self._files
Ejemplo n.º 6
0
 def on_dataset_epoch_end(self, epoch, logs={}):
     numpy.savetxt(open(self.log_file, 'a'),
                   [[self.old_epochs + self.epoch, self.loss]],
                   fmt="%g")
     if self.last_loss > self.loss:
         filename = self.weights_file % (self.old_epochs + epoch)
         TT.debug("Saving weights to", filename)
         self.model.save_weights(filename)
     self.last_loss = min(self.last_loss, self.loss)
Ejemplo n.º 7
0
 def __init__(self, root_path, patch_size=(101, 101), verbose=False, ratio=1.0, name='dataset', mapper=None,
              filename_filter=None, rotation=True):
     TT.debug("Dataset root path set to:", root_path)
     self.name = name
     self.patch_size = patch_size
     self.ratio = ratio
     self.root_path = os.path.abspath(root_path)
     self.verbose = verbose
     self.label_mapper = mapper
     self.filename_filter = filename_filter
     self.rotation = rotation
Ejemplo n.º 8
0
def task_train_filter(args):
    ff, mapper = getattr(__import__('dataset'), args.dataset)()
    dataset = Dataset(root_path=args.path,
                      verbose=args.verbose,
                      name='base-model',
                      mapper=mapper,
                      filename_filter=ff,
                      rotation=False)
    dataset_batches = BatchGenerator(dataset, args.batch)
    from mitosis import model_base
    TT.debug("Compile base model.")
    model = model_base(args.lr)
    model_saved_weights_path = os.path.join(args.path,
                                            'base-model.weights.npy')
    if os.path.exists(model_saved_weights_path):
        TT.info("Loading weights from %s" % model_saved_weights_path)
        model.load_weights(model_saved_weights_path)
    train_start = time.time()
    log = LearnLog("filter", args.path)
    for epoch in xrange(args.epoch):
        TT.debug(epoch + 1, "of", args.epoch, "epochs")
        log.on_dataset_epoch_begin(epoch + 1)
        for x, y in dataset_batches:
            model.fit(x,
                      y,
                      batch_size=args.mini_batch,
                      nb_epoch=1,
                      validation_split=.1,
                      callbacks=[log],
                      show_accuracy=True,
                      shuffle=True)
        log.on_dataset_epoch_end(epoch + 1)
    log.on_dataset_train_end()
    TT.success("Training finished in %.2f hours." %
               ((time.time() - train_start) / 3600.))
Ejemplo n.º 9
0
 def load(self):
     if os.path.exists(self.dataset_store_path):
         TT.debug("Loading dataset from", self.dataset_store_path)
         data = json.load(open(self.dataset_store_path))
         self._dataset = data['data']
         self._dataset_size = data['size']
         self._positive = {}
         self._positive_size = data['positive_size']
         self._sample = {}
         self._sample_size = data['sample_size']
         self.positive_in_sample = 0
         TT.debug("Current dataset has", self._dataset_size, "images.",
                  self._positive_size, "positive and", self._sample_size, "negative.")
         return True
     return False
Ejemplo n.º 10
0
    def __iter__(self):
        data = Queue(self.MAX_NUM)

        def append(dst, pool, item):
            if item is not None:
                pool.append(item)
                if len(pool) < min(self.pool_size, self.batch_size):
                    return dst, pool
            if dst is None:
                return np.asarray(pool, dtype=np.float64), []
            if len(pool):
                return np.concatenate((dst, pool)), []
            return dst, []

        def produce():
            i = 1
            count = 0
            data_x = data_y = None
            pool_x = []
            pool_y = []
            for x, y in self.dataset:
                data_x, pool_x = append(data_x, pool_x, x)
                data_y, pool_y = append(data_y, pool_y, (y, 1 - y))
                count += 1
                if count >= self.batch_size:
                    data_x, pool_x = append(data_x, pool_x, None)
                    data_y, pool_y = append(data_y, pool_y, None)
                    data.put([data_x, data_y])
                    i += 1
                    count = 0
                    data_x = data_y = None
            if count > 0:
                data_x, pool_x = append(data_x, pool_x, None)
                data_y, pool_y = append(data_y, pool_y, None)
                data.put([data_x, data_y])

        start_new_thread(produce, ())
        i = 1
        while i <= self.n:
            start = time.clock()
            X, Y = data.get()
            if self.verbose:
                TT.debug("batch", i, "of", self.n, "completed in",
                         time.clock() - start, "seconds. This batch has",
                         int(np.sum(Y[:, 0])), "positive pixels and",
                         int(np.sum(Y[:, 1])), "negative pixels.")
            yield X, Y
            i += 1
Ejemplo n.º 11
0
    def positive(self):
        if hasattr(self, '_positive'):
            return self._positive, self._positive_size

        TT.debug("Collecting positive samples.")
        self._positive = {}
        self._positive_size = 0
        self._positive_expanded = {}
        for data_file, label_file in self.files:
            labels = load_csv(os.path.join(self.root_path, label_file))
            self._positive[data_file] = labels
            self._positive_size += len(labels)
            self._positive_expanded[data_file] = {}
            for col, row, p in labels:
                self._positive_expanded[data_file][index_at_pixel(col=col, row=row, size=self.image_size)] = p
        TT.debug("Found", self._positive_size, "positive samples.")
        return self.positive
Ejemplo n.º 12
0
    def data(self):
        if hasattr(self, '_dataset'):
            return self._dataset, self._dataset_size

        if self.load():
            return self.data

        TT.debug("Creating new dataset.")
        pos, pos_c = self.positive
        sam, sam_c = self.sample
        for filename in pos:
            if filename not in sam:
                sam[filename] = pos[filename]
            else:
                sam[filename] += pos[filename]
        self._dataset = sam
        self._dataset_size = sam_c + pos_c
        self.dump()
        return self.data
Ejemplo n.º 13
0
def main():
    parser, args = parse_args()
    TT.verbose = args.verbose
    if args.task == 'train-filter':
        TT.debug("Running: Task Train Filter")
        task_train_filter(args)
    if args.task == 'train-cnn':
        TT.debug("Running: Task Train CNN")
        task_train_cnn(args)
    elif args.task == 'test-filter':
        TT.debug("Running: Task Test Filter")
        task_test_filter(args)
    elif args.task == 'test-cnn':
        TT.debug("Running: Task Test CNN")
        task_test_cnn(args)
    else:
        parser.print_help()
        exit(0)
Ejemplo n.º 14
0
def main():
    parser, args = parse_args()
    TT.verbose = args.verbose
    if args.task == 'train-filter':
        TT.debug("Running: Task Train Filter")
        task_train_filter(args)
    if args.task == 'train-cnn':
        TT.debug("Running: Task Train CNN")
        task_train_cnn(args)
    elif args.task == 'test-filter':
        TT.debug("Running: Task Test Filter")
        task_test_filter(args)
    elif args.task == 'test-cnn':
        TT.debug("Running: Task Test CNN")
        task_test_cnn(args)
    else:
        parser.print_help()
        exit(0)
Ejemplo n.º 15
0
def task_train_filter(args):
    ff, mapper = getattr(__import__('dataset'), args.dataset)()
    dataset = Dataset(root_path=args.path, verbose=args.verbose, name='base-model',
                      mapper=mapper, filename_filter=ff, rotation=False)
    dataset_batches = BatchGenerator(dataset, args.batch)
    from mitosis import model_base
    TT.debug("Compile base model.")
    model = model_base(args.lr)
    model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy')
    if os.path.exists(model_saved_weights_path):
        TT.info("Loading weights from %s" % model_saved_weights_path)
        model.load_weights(model_saved_weights_path)
    train_start = time.time()
    log = LearnLog("filter", args.path)
    for epoch in xrange(args.epoch):
        TT.debug(epoch + 1, "of", args.epoch, "epochs")
        log.on_dataset_epoch_begin(epoch + 1)
        for x, y in dataset_batches:
            model.fit(x, y, batch_size=args.mini_batch, nb_epoch=1, validation_split=.1,
                      callbacks=[log], show_accuracy=True, shuffle=True)
        log.on_dataset_epoch_end(epoch + 1)
    log.on_dataset_train_end()
    TT.success("Training finished in %.2f hours." % ((time.time() - train_start) / 3600.))
Ejemplo n.º 16
0
def task_test_filter(args):
    dataset = ImageIterator(args.input, args.output)
    dataset_batches = BatchGenerator(dataset, args.batch)
    from mitosis import model_base
    TT.debug("Compile base model.")
    model = model_base(args.lr)
    model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy')
    TT.info("Loading weights from %s" % model_saved_weights_path)
    model.load_weights(model_saved_weights_path)
    test_start = time.time()
    out = None
    for x, y in dataset_batches:
        tmp = model.predict(x, args.mini_batch, args.verbose)
        out = np_append(out, tmp)
    width, height = dataset.image_size
    out = numpy.reshape(out[:, 0], (height, width))
    numpy.save(change_ext(args.input, 'predicted.npy'), out)
    numpy.save(change_ext(args.input, 'expected.npy'), dataset.output)
    TT.success("Testing finished in %.2f minutes." % ((time.time() - test_start) / 60.))
Ejemplo n.º 17
0
def task_test_filter(args):
    dataset = ImageIterator(args.input, args.output)
    dataset_batches = BatchGenerator(dataset, args.batch)
    from mitosis import model_base
    TT.debug("Compile base model.")
    model = model_base(args.lr)
    model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy')
    TT.info("Loading weights from %s" % model_saved_weights_path)
    model.load_weights(model_saved_weights_path)
    test_start = time.time()
    out = None
    for x, y in dataset_batches:
        tmp = model.predict(x, args.mini_batch, args.verbose)
        out = np_append(out, tmp)
    width, height = dataset.image_size
    out = numpy.reshape(out[:, 0], (height, width))
    numpy.save(change_ext(args.input, 'predicted.npy'), out)
    numpy.save(change_ext(args.input, 'expected.npy'), dataset.output)
    TT.success("Testing finished in %.2f minutes." % ((time.time() - test_start) / 60.))
Ejemplo n.º 18
0
 def on_dataset_train_end(self, logs={}):
     filename = self.weights_file % 0
     TT.debug("Saving weights to", filename)
     self.model.save_weights(filename)
Ejemplo n.º 19
0
def task_test_cnn(args):
    dataset = ImageIterator(args.input, args.output)
    dataset_batches = BatchGenerator(dataset, args.batch)
    from mitosis import model_base, model_1, model_2
    TT.debug("Compile base model.")
    model = model_base(0)
    TT.debug("Compile model 1.")
    model1 = model_1(0)
    TT.debug("Compile model 2.")
    model2 = model_2(0)
    model_saved_weights_path = os.path.join(args.path,
                                            'base-model.weights.npy')
    model1_saved_weights_path = os.path.join(args.path, 'model1.weights.npy')
    model2_saved_weights_path = os.path.join(args.path, 'model2.weights.npy')
    TT.info("Loading weights from %s" % model_saved_weights_path)
    model.load_weights(model_saved_weights_path)
    TT.info("Loading weights from %s" % model1_saved_weights_path)
    model1.load_weights(model1_saved_weights_path)
    TT.info("Loading weights from %s" % model2_saved_weights_path)
    model2.load_weights(model2_saved_weights_path)
    test_start = time.time()
    out = out1 = out2 = None
    for x, y in dataset_batches:
        tmp = model.predict(x, args.mini_batch, args.verbose)
        local1 = numpy.zeros(tmp.shape)
        local2 = numpy.zeros(tmp.shape)
        out = np_append(out, tmp)
        x = 1. - x
        x_new = []
        indices = []
        for i in range(len(tmp)):
            if tmp[i][0] > .6:
                x_new.append(x[i])
                indices.append(i)

        x_new = numpy.asarray(x_new)
        if len(x_new):
            tmp1 = model1.predict(x_new, args.mini_batch, args.verbose)
            local1[indices] = tmp1
        out1 = np_append(out1, local1)

        if len(x_new):
            tmp2 = model2.predict(x_new, args.mini_batch, args.verbose)
            local2[indices] = tmp2
        out2 = np_append(out2, local2)
    width, height = dataset.image_size
    out = numpy.reshape(out[:, 0], (height, width))
    out1 = numpy.reshape(out1[:, 0], (height, width))
    out2 = numpy.reshape(out2[:, 0], (height, width))
    numpy.save(change_ext(args.input, 'predicted.npy'), out)
    numpy.save(change_ext(args.input, 'model1.predicted.npy'), out1)
    numpy.save(change_ext(args.input, 'model2.predicted.npy'), out2)
    numpy.save(change_ext(args.input, 'expected.npy'), dataset.output)
    TT.success("Testing finished in %.2f minutes." %
               ((time.time() - test_start) / 60.))
Ejemplo n.º 20
0
def task_train_cnn(args):
    ff, mapper = getattr(__import__('dataset'), args.dataset)()
    dataset = Dataset(root_path=args.path, verbose=args.verbose, name='cnn',
                      mapper=mapper, filename_filter=ff, ratio=9)
    dataset_batches = BatchGenerator(dataset, args.batch)
    from mitosis import model_base, model_1, model_2
    TT.debug("Compile base model.")
    model = model_base(lr=0)
    TT.debug("Compile model 1.")
    model1 = model_1(args.lr)
    TT.debug("Compile model 2.")
    model2 = model_2(args.lr)
    model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy')
    model1_saved_weights_path = os.path.join(args.path, 'model1.weights.npy')
    model2_saved_weights_path = os.path.join(args.path, 'model2.weights.npy')
    if os.path.exists(model_saved_weights_path):
        TT.info("Loading weights from %s" % model_saved_weights_path)
        model.load_weights(model_saved_weights_path)
    if os.path.exists(model1_saved_weights_path):
        TT.info("Loading weights from %s" % model1_saved_weights_path)
        model1.load_weights(model1_saved_weights_path)
    if os.path.exists(model2_saved_weights_path):
        TT.info("Loading weights from %s" % model2_saved_weights_path)
        model2.load_weights(model2_saved_weights_path)
    train_start = time.time()
    log1 = LearnLog("model1", args.path)
    log2 = LearnLog("model2", args.path)
    for epoch in xrange(args.epoch):
        TT.debug(epoch + 1, "of", args.epoch, "epochs")
        log1.on_dataset_epoch_begin(epoch + 1)
        log2.on_dataset_epoch_begin(epoch + 1)
        for x, y in dataset_batches:
            outputs = model.predict(x, batch_size=args.mini_batch, verbose=args.verbose)
            # Multiply each window with it's prediction and then pass it to the next layer
            # x_new = []
            # y_new = []
            x = 1. - x
            for i in range(len(outputs)):
                if y[i][0] < 1.:
                    # x_new.append(x[i])
                    # y_new.append(y[i])
                    x[i] *= outputs[i][0]

            TT.debug("Model 1 on epoch %d" % (epoch + 1))
            model1.fit(numpy.asarray(x), numpy.asarray(y), batch_size=args.mini_batch, nb_epoch=1, validation_split=.1,
                       callbacks=[log1], show_accuracy=True, shuffle=True)
            TT.debug("Model 2 on epoch %d" % (epoch + 1))
            model2.fit(numpy.asarray(x), numpy.asarray(y), batch_size=args.mini_batch, nb_epoch=1, validation_split=.1,
                       callbacks=[log2], show_accuracy=True, shuffle=True)
        log1.on_dataset_epoch_end(epoch + 1)
        log2.on_dataset_epoch_end(epoch + 1)
    log1.on_dataset_train_end()
    log2.on_dataset_train_end()
    TT.success("Training finished in %.2f hours." % ((time.time() - train_start) / 3600.))
Ejemplo n.º 21
0
def task_train_cnn(args):
    ff, mapper = getattr(__import__('dataset'), args.dataset)()
    dataset = Dataset(root_path=args.path,
                      verbose=args.verbose,
                      name='cnn',
                      mapper=mapper,
                      filename_filter=ff,
                      ratio=9)
    dataset_batches = BatchGenerator(dataset, args.batch)
    from mitosis import model_base, model_1, model_2
    TT.debug("Compile base model.")
    model = model_base(lr=0)
    TT.debug("Compile model 1.")
    model1 = model_1(args.lr)
    TT.debug("Compile model 2.")
    model2 = model_2(args.lr)
    model_saved_weights_path = os.path.join(args.path,
                                            'base-model.weights.npy')
    model1_saved_weights_path = os.path.join(args.path, 'model1.weights.npy')
    model2_saved_weights_path = os.path.join(args.path, 'model2.weights.npy')
    if os.path.exists(model_saved_weights_path):
        TT.info("Loading weights from %s" % model_saved_weights_path)
        model.load_weights(model_saved_weights_path)
    if os.path.exists(model1_saved_weights_path):
        TT.info("Loading weights from %s" % model1_saved_weights_path)
        model1.load_weights(model1_saved_weights_path)
    if os.path.exists(model2_saved_weights_path):
        TT.info("Loading weights from %s" % model2_saved_weights_path)
        model2.load_weights(model2_saved_weights_path)
    train_start = time.time()
    log1 = LearnLog("model1", args.path)
    log2 = LearnLog("model2", args.path)
    for epoch in xrange(args.epoch):
        TT.debug(epoch + 1, "of", args.epoch, "epochs")
        log1.on_dataset_epoch_begin(epoch + 1)
        log2.on_dataset_epoch_begin(epoch + 1)
        for x, y in dataset_batches:
            outputs = model.predict(x,
                                    batch_size=args.mini_batch,
                                    verbose=args.verbose)
            # Multiply each window with it's prediction and then pass it to the next layer
            # x_new = []
            # y_new = []
            x = 1. - x
            for i in range(len(outputs)):
                if y[i][0] < 1.:
                    # x_new.append(x[i])
                    # y_new.append(y[i])
                    x[i] *= outputs[i][0]

            TT.debug("Model 1 on epoch %d" % (epoch + 1))
            model1.fit(numpy.asarray(x),
                       numpy.asarray(y),
                       batch_size=args.mini_batch,
                       nb_epoch=1,
                       validation_split=.1,
                       callbacks=[log1],
                       show_accuracy=True,
                       shuffle=True)
            TT.debug("Model 2 on epoch %d" % (epoch + 1))
            model2.fit(numpy.asarray(x),
                       numpy.asarray(y),
                       batch_size=args.mini_batch,
                       nb_epoch=1,
                       validation_split=.1,
                       callbacks=[log2],
                       show_accuracy=True,
                       shuffle=True)
        log1.on_dataset_epoch_end(epoch + 1)
        log2.on_dataset_epoch_end(epoch + 1)
    log1.on_dataset_train_end()
    log2.on_dataset_train_end()
    TT.success("Training finished in %.2f hours." %
               ((time.time() - train_start) / 3600.))
Ejemplo n.º 22
0
def task_test_cnn(args):
    dataset = ImageIterator(args.input, args.output)
    dataset_batches = BatchGenerator(dataset, args.batch)
    from mitosis import model_base, model_1, model_2
    TT.debug("Compile base model.")
    model = model_base(0)
    TT.debug("Compile model 1.")
    model1 = model_1(0)
    TT.debug("Compile model 2.")
    model2 = model_2(0)
    model_saved_weights_path = os.path.join(args.path, 'base-model.weights.npy')
    model1_saved_weights_path = os.path.join(args.path, 'model1.weights.npy')
    model2_saved_weights_path = os.path.join(args.path, 'model2.weights.npy')
    TT.info("Loading weights from %s" % model_saved_weights_path)
    model.load_weights(model_saved_weights_path)
    TT.info("Loading weights from %s" % model1_saved_weights_path)
    model1.load_weights(model1_saved_weights_path)
    TT.info("Loading weights from %s" % model2_saved_weights_path)
    model2.load_weights(model2_saved_weights_path)
    test_start = time.time()
    out = out1 = out2 = None
    for x, y in dataset_batches:
        tmp = model.predict(x, args.mini_batch, args.verbose)
        local1 = numpy.zeros(tmp.shape)
        local2 = numpy.zeros(tmp.shape)
        out = np_append(out, tmp)
        x = 1. - x 
        x_new = []
        indices = []
        for i in range(len(tmp)):
            if tmp[i][0] > .6:
                x_new.append(x[i])
                indices.append(i)

        x_new = numpy.asarray(x_new)
        if len(x_new):
            tmp1 = model1.predict(x_new, args.mini_batch, args.verbose)
            local1[indices] = tmp1
        out1 = np_append(out1, local1)

        if len(x_new):
            tmp2 = model2.predict(x_new, args.mini_batch, args.verbose)
            local2[indices] = tmp2
        out2 = np_append(out2, local2)
    width, height = dataset.image_size
    out = numpy.reshape(out[:, 0], (height, width))
    out1 = numpy.reshape(out1[:, 0], (height, width))
    out2 = numpy.reshape(out2[:, 0], (height, width))
    numpy.save(change_ext(args.input, 'predicted.npy'), out)
    numpy.save(change_ext(args.input, 'model1.predicted.npy'), out1)
    numpy.save(change_ext(args.input, 'model2.predicted.npy'), out2)
    numpy.save(change_ext(args.input, 'expected.npy'), dataset.output)
    TT.success("Testing finished in %.2f minutes." % ((time.time() - test_start) / 60.))
Ejemplo n.º 23
0
 def on_dataset_train_end(self, logs={}):
     filename = self.weights_file % 0
     TT.debug("Saving weights to", filename)
     self.model.save_weights(filename)