def prediction_processor(self):
     '''Run a thread to process predictions'''
     try:
         self.__bind_cuda()
         self._load_model()
         if not hasattr(self, "function"):
             t0 = time.time()
             self.function = cPickle.loads(self.function_pickle)
             logger.report_metric("Function load time", time.time()-t0)
             del self.function_pickle
         batches = []
         while True:
             block, x0b, x1b, y0b, y1b, z0b, z1b = self.pred_queue.get()
             if block is not None:
                 batches.append((block, x0b, x1b, y0b, y1b, z0b, z1b))
             if len(batches) >= self.batch_count or\
                (block is None and len(batches) > 0):
                 t0 = time.time()
                 pred = self.function(np.array([_[0] for _ in batches]))[0]
                 delta=(time.time() - t0) / len(batches)
                 for i, (_, x0b, x1b, y0b, y1b, z0b, z1b) \
                     in enumerate(batches):
                     self.out_queue.put(
                         (pred[i], delta, x0b, x1b, y0b, y1b, z0b, z1b))
                 batches = []
             if block is None:
                 break
     except:
         self.exception = sys.exc_value
         logger.report_exception()
     self.out_queue.put([None] * 8)
 def classify(self, image, x, y, z):
     #
     # The threading here may seem a little odd, but Theano/CUDA want
     # to run a function on the same thread every time. So the main
     # thread runs prediction, even if it's in the middle.
     #
     t0_total = time.time()
     self.image_shape = np.array(image.shape)
     self.out_image = np.zeros((
         len(self.classes),
         image.shape[0] - 2 * self.get_z_pad(),
         image.shape[1] - 2 * self.get_y_pad(),
         image.shape[2] - 2 * self.get_x_pad()),
                               np.uint8)
     self.exception = None
     self.pred_queue = Queue.Queue(10)
     self.out_queue = Queue.Queue(10)
     preprocess_thread = threading.Thread(
         target=self.preprocessor,
         args=(image,))
     preprocess_thread.start()
     out_thread = threading.Thread(target=self.output_processor)
     out_thread.start()
     self.prediction_processor()
     preprocess_thread.join()
     out_thread.join()
     if self.exception is not None:
         raise self.exception
     logger.report_metric("keras_volume_classification_time",
                          time.time() - t0_total)
     return dict([(key, self.out_image[i]) 
                  for i, key in enumerate(self.classes)])
Beispiel #3
0
 def test_backend(self):
     #
     # Try all of the logging functions to get enough coverage to
     # run through the code
     #
     logger.start_process("foo", "Hello, world", ["bar", "baz"])
     logger.report_metric("Execution time", "14 Mahayugas")
     logger.report_event("Frobbing the galactopus", "very messy this time")
     try:
         raise Exception("Whoops")
     except:
         logger.report_exception()
     logger.end_process("bye for now", rh_logger.ExitCode.success)
Beispiel #4
0
 def test_backend(self):
     #
     # Try all of the logging functions to get enough coverage to
     # run through the code
     #
     logger.start_process("foo", "Hello, world", ["bar", "baz"])
     logger.report_metric("Execution time", "14 Mahayugas")
     logger.report_event("Frobbing the galactopus", "very messy this time")
     try:
         raise Exception("Whoops")
     except:
         logger.report_exception()
     logger.end_process("bye for now", rh_logger.ExitCode.success)
 def __bind_cuda(cls):
     if cls.has_bound_cuda:
         return
     if "THEANO_FLAGS" in os.environ:
         return
     if "MICRONS_IPC_WORKER_GPU" in os.environ:
         device = int(os.environ["MICRONS_IPC_WORKER_GPU"])
         os.environ["THEANO_FLAGS"]="device=cuda%d" % device
         return
     import keras
     if KerasClassifier.__keras_backend() != 'theano':
         logger.report_event("Using Tensorflow")
         return
     t0 = time.time()
     #
     # OK - pycuda.driver.Device.count() sometimes requires
     #      pycuda.init() which sometimes screws up
     #      theano.sandbox.cuda.use. So I just use nvidia-smi to
     #      tell me about the GPUs.
     # A typical line of output:
     #      GPU 0: GeForce GTX TITAN X ...
     #
     if "MICRONS_IPC_WORKER_GPU" in os.environ:
         device = int(os.environ["MICRONS_IPC_WORKER_GPU"])
         os.environ["THEANO_FLAGS"]="device=cuda%d" % device
     else:
         nvidia_smi_output = subprocess.check_output(["nvidia-smi", "-L"])
         for line in nvidia_smi_output.split("\n"):
             match = re.search("GPU\\s(\\d+)", line)
             if match is None:
                 continue
             device = int(match.group(1))
             try:
                 os.environ["THEANO_FLAGS"]="device=cuda%d" % device
                 import keras
                 break
             except:
                 continue
         else:
             raise RuntimeError("Failed to acquire GPU")
     logger.report_metric("gpu_acquisition_time", time.time() - t0)
     logger.report_event("Acquired GPU %d" % device)
     cls.has_bound_cuda=True
    def output_processor(self):
        '''Run a thread to process the prediction output'''
        try:
            while True:
                pred, delta, x0b, x1b, y0b, y1b, z0b, z1b = self.out_queue.get()
                if pred is None:
                    break
                logger.report_event(
                    "Processed block %d:%d, %d:%d, %d:%d in %f sec" %
                    (x0b, x1b, y0b, y1b, z0b, z1b, delta))
                logger.report_metric("keras_block_classification_time",
                                     delta)
                n_classes = 1 if self.split_positive_negative \
                    else len(self.classes)
                pred.shape = (
                    n_classes,
                    z1b - z0b + 2 * self.z_trim_size, 
                    y1b - y0b + 2 * self.xy_trim_size,
                    x1b - x0b + 2 * self.xy_trim_size)
                pred = pred[:,
                            self.z_trim_size:pred.shape[1] - self.z_trim_size,
                            self.xy_trim_size:pred.shape[2] - self.xy_trim_size,
                            self.xy_trim_size:pred.shape[3] - self.xy_trim_size]
                if self.downsample_factor != 1:
                    pred = np.array([[zoom(plane, self.downsample_factor)
                                      for plane in _]
                                     for _ in pred])
                    y0b, y1b, x0b, x1b = \
                        [int(_ * self.downsample_factor)
                         for _ in y0b, y1b, x0b, x1b]
                # Fix padding
                if x1b > self.out_image.shape[3]:
                    x1b = self.out_image.shape[3]
                    pred = pred[:, :, :, :x1b - x0b]
                    logger.report_event("Fixing X padding: " + str(pred.shape))
                if y1b > self.out_image.shape[2]:
                    y1b = self.out_image.shape[2]
                    pred = pred[:, :, :y1b - y0b, :]
                    logger.report_event("Fixing Y padding): " + str(pred.shape))
                if self.split_positive_negative:
                    assert pred.shape[0] == 1
                    pred = np.array([pred[0], -pred[0]])
                if self.stretch_output:
                    for z in range(pred.shape[0]):
                        pred_min = pred[z].min()
                        pred_max = pred[z].max()
                        pred[z] = (pred[z] - pred_min) / \
                            (pred_max - pred_min + np.finfo(pred.dtype).eps)
                else:
                    pred = np.clip(pred, 0, 1)
                if self.invert:
                    logger.report_event("Inverting output")
                    pred = 1 - pred

                if self.value_range is not None:
                    low, high = self.value_range
                    tmp = np.zeros_like(pred)
                    tmp[pred >= high] = 255
                    mask = (pred > low) & (pred < high)
                    tmp[mask] = 254 * (pred[mask] - low) / (high - low) + 1
                    pred = tmp
                    del tmp
                else:
                    pred = pred * 255
                self.out_image[:, z0b:z1b, y0b:y1b, x0b:x1b] = \
                    np.clip(pred, 0, 255).astype(np.uint8)
        except:
            self.exception = sys.exc_value
            logger.report_exception()
 def preprocessor(self, image):
     '''The preprocessor thread: run normalization and make blocks'''
     import keras
     #
     # Downsample the image as a first step. All coordinates are then in
     # the downsampled size.
     #
     image = self.downsample_and_pad_image(image)
     logger.report_event(
         "Image after downsampling and padding: %d, %d, %d" % 
         (image.shape[0], image.shape[1], image.shape[2]))
     #
     # Coordinates:
     #
     # Reduce the image by the padding
     # Break it into equal-sized blocks that are less than the block size
     #
     # The output image goes from <x, y, z>0 to <x, y, z>1
     # There are n_<x, y, z> blocks in each direction
     # The block coordinates are <x, y, z>s[i]:<x, y, z>s[i+1]
     #
     # The last block ends at the edge of the image.
     #
     output_block_size = self.block_size - \
         np.array([self.zpad_size*2, 
                   self.get_y_pad_ds()*2, 
                   self.get_x_pad_ds()*2])
     xpad_ds = self.get_x_pad_ds()
     ypad_ds = self.get_y_pad_ds()
     input_block_size = self.block_size
     
     z0 = self.get_z_pad()
     z1 = image.shape[0] - self.zpad_size
     n_z = 1 + int((z1-z0 - 1) / output_block_size[0])
     zs = np.linspace(z0, z1, n_z+1).astype(int)
     y0 = ypad_ds
     y1 = image.shape[1] - ypad_ds
     n_y = 1 + int((y1-y0 - 1) / output_block_size[1])
     ys = np.linspace(y0, y1, n_y+1).astype(int)
     x0 = xpad_ds
     x1 = image.shape[2] - xpad_ds
     n_x = 1 + int((x1-x0 - 1) / output_block_size[2])
     xs = np.linspace(x0, x1, n_x+1).astype(int)
     t0 = time.time()
     if self.normalize_offset is None:
         if self.normalize_saturation_level is None:
             norm_img = normalize_image(image, self.normalize_method)
         else:
             norm_img = normalize_image(
                 image, self.normalize_method,
                 saturation_level=self.normalize_saturation_level)
     elif self.normalize_saturation_level is None:
         norm_img = normalize_image(image,
                                    self.normalize_method,
                                    offset=self.normalize_offset)
     else:
         norm_img = normalize_image(
             image, self.normalize_method,
             offset=self.normalize_offset,
             saturation_level=self.normalize_saturation_level)
     logger.report_metric("keras_cpu_block_processing_time",
                          time.time() - t0)
     #
     # Classify each block
     #
     for zi in range(n_z):
         if zi == n_z-1:
             z0a = image.shape[0] - input_block_size[0]
             z1a = image.shape[0]
         else:
             z0a = zs[zi] - self.get_z_pad()
             z1a = z0a + input_block_size[0]
         z0b = z0a
         if self.mirrored:
             z1b = z0b + output_block_size[0]
         else:
             z1b = z1a - self.get_z_pad() * 2
         for yi in range(n_y):
             if yi == n_y - 1:
                 y0a = max(0, image.shape[1] - input_block_size[1])
                 y1a = image.shape[1]
             else:
                 y0a = ys[yi] - ypad_ds
                 y1a = y0a + input_block_size[1]
             y0b = y0a
             y1b = y1a - ypad_ds * 2
             for xi in range(n_x):
                 if xi == n_x-1:
                     x0a = max(0, image.shape[2] - input_block_size[2])
                     x1a = image.shape[2]
                 else:
                     x0a = xs[xi] - xpad_ds
                     x1a = x0a + input_block_size[2]
                 x0b = x0a
                 x1b = x1a - xpad_ds * 2
                 block = np.array([norm_img[z][y0a:y1a, x0a:x1a]
                                   for z in range(z0a, z1a)])
                 if self.transpose is None:
                     # Legacy transpose: guess
                     if block.shape[0] == 1:
                         if KerasClassifier.__keras_backend() == 'theano':
                             block.shape = \
                                 [1, block.shape[-2], block.shape[-1]]
                         else:
                             block.shape = \
                                 [block.shape[-2], block.shape[-1], 1]
                     else:
                         if KerasClassifier.__keras_backend() == 'theano':
                             block.shape = [1] + list(block.shape)
                         else:
                             block.shape = list(block.shape) + [1]
                 else:
                     #
                     # The format of the transpose is "None" for an
                     # unused ("1") slot in the tensor and the given axis
                     # otherwise, e.g. (None, None, 0, 1, 2) means
                     # "don't transpose and reshape as [1, 1] + shape"
                     #
                     reshape = []
                     for slot in self.transpose:
                         if slot is None:
                             reshape.append(1)
                         else:
                             reshape.append(block.shape[slot])
                     transpose = tuple(filter(lambda _:_ is not None,
                                              self.transpose))
                     if len(reshape) == 5:
                         reshape = reshape[1:]
                     if transpose != tuple(sorted(transpose)):
                         block = block.transpose(*transpose)
                     block = block.reshape(*reshape)
                 #
                 # 
                 self.pred_queue.put((block, x0b, x1b, y0b, y1b, z0b, z1b))
     self.pred_queue.put([None] * 7)