コード例 #1
0
def _add_jpeg_decoding(module_spec):
    """Adds operations that perform JPEG decoding and resizing to the graph...
    Args:
      module_spec: The hub.ModuleSpec for the image module being used.

    Returns:
      Tensors for the node to feed JPEG data into, and the output of the
        preprocessing steps.
    """
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)  # 3
    # placeholder Tensor of any size, capable of taking current input.shape() = [?, image_height, image_width, num_channels=3]
    jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
    # Decode a single JPEG-encoded image to a unit8 tensor, with the desired number of color channels (3 in this case) for decoded img:
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    # Convert from full range of uint8 to range [0,1] of float32.
    decoded_image_as_float = tf.image.convert_image_dtype(
        decoded_image, tf.float32)
    # Insert a "batch dimension" of 1 to the existing decoded_image_as_float tensor so size is now: [1, ?, image_height, image_width, 3]
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    ''' 
    Tensors are decoded and represented as 3-d unit8 tensors of shape [height, width, channels], that is shape=(3,)
    (see: https://www.tensorflow.org/api_guides/python/image). This tf.stack call seems to go from:
        [input_height=299, input_width=299] -> [input_height=299, input_width=299] with .shape == (2,) e.g. row vector
    I don't see why this call is here:
    '''
    resize_shape = tf.stack([input_height, input_width])
    # Switch back to int32, not sure why we do this, probably to save memory space? Float precision for [0-255] is unnecessary.
    resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
    # resize the decoded image using bilinear interpolation, this produces shape (1, 299, 299, 3) at runtime for a single image.
    #   I am not sure why this is needed for a scalar decoded image, although I see how this might be needed for a batch of images:
    resized_image = tf.image.resize_bilinear(decoded_image_4d,
                                             resize_shape_as_int)
    return jpeg_data, resized_image
コード例 #2
0
def create_module_graph(module_spec):
    """Creates a graph and loads Hub Module into it.

    Args:
      module_spec: the hub.ModuleSpec for the image module being used.

    Returns:
      graph: the tf.Graph that was created.
      bottleneck_tensor: the bottleneck values output by the module.
      jpeg_data for the node to feed JPEG data into
    """
    height, width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    with tf.Graph().as_default() as graph:
        jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
        decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
        # Convert from full range of uint8 to range [0,1] of float32.
        decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32)
        decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
        resize_shape = tf.stack([height, width])
        resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
        resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int)
        m = hub.Module(module_spec)
        bottleneck_tensor = m(resized_image)
    return graph, bottleneck_tensor, jpeg_data
コード例 #3
0
def get_bottleneck_tensor(input_jpeg_str):
    # type: tf.Tensor -> tf.Tensor
    """Calculates the bottleneck tensor for input JPEG string tensor.

  This function will resize/encode the image as required by Inception V3 model.
  Then it will run it through the InceptionV3 checkpoint to calculate
  bottleneck values.

  Args:
    input_jpeg_str: Tensor for input JPEG image.

  Returns:
    bottleneck_tensor: Tensor for output bottleneck Tensor.
  """
    module_spec = tensorflow_hub.load_module_spec(_FEATURE_VECTORS_MODULE_URL)
    input_height, input_width = tensorflow_hub.get_expected_image_size(
        module_spec)
    input_depth = tensorflow_hub.get_num_image_channels(module_spec)
    decoded_image = tf.image.decode_jpeg(input_jpeg_str, channels=input_depth)
    decoded_image_as_float = tf.image.convert_image_dtype(
        decoded_image, tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    resize_shape = tf.stack([input_height, input_width])
    resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
    resized_image_4d = tf.image.resize_bilinear(decoded_image_4d,
                                                resize_shape_as_int)
    m = tensorflow_hub.Module(module_spec)
    bottleneck_tensor = m(resized_image_4d)
    return bottleneck_tensor
コード例 #4
0
ファイル: ml_utils.py プロジェクト: isabella232/healthcare-1
 def download_image_model(mdl_url):
     # type: str -> (tensorflow_hub.Module, int, int, int)
     """Returns the Tensorflow Hub model used to process images."""
     module_spec = tensorflow_hub.load_module_spec(mdl_url)
     input_height, input_width = tensorflow_hub.get_expected_image_size(
         module_spec)
     input_depth = tensorflow_hub.get_num_image_channels(module_spec)
     m = tensorflow_hub.Module(module_spec)
     return (m, input_height, input_width, input_depth)
コード例 #5
0
def add_jpeg_decoding(module_spec):
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    decoded_image_as_float = tf.image.convert_image_dtype(
        decoded_image, tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    resize_shape = tf.stack([input_height, input_width])
    resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
    resized_image = tf.image.resize_bilinear(decoded_image_4d,
                                             resize_shape_as_int)
    return jpeg_data, resized_image
コード例 #6
0
def add_image_deocding(module):
    input_height, input_width = hub.get_expected_image_size(module)
    input_depth = hub.get_num_image_channels(module)
    input_file = tf.placeholder(tf.string, name='InputFile')
    file_reader = tf.read_file(input_file)
    decoded_image = tf.image.decode_png(file_reader, channels=input_depth)
    # Convert from full range of uint8 to range [0,1] of float32.
    decoded_image_as_float = tf.image.convert_image_dtype(
        decoded_image, tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    resize_shape = tf.stack([input_height, input_width])
    resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
    resized_image = tf.image.resize_bilinear(decoded_image_4d,
                                             resize_shape_as_int)
    return input_file, resized_image
コード例 #7
0
def add_jpeg_decoding(module_spec):
    """Adds operations that perform JPEG decoding and resizing to the graph"""
    input_height, input_width = hub.get_expected_image_size(module_spec)
    jpeg_data = tf.placeholder(
        tf.string, name='DecodeJPGInput'
    )  # Never evaluate the placeholder directly, always feed it.
    decoded_image_as_float = tf.image.convert_image_dtype(
        tf.image.decode_jpeg(jpeg_data,
                             channels=hub.get_num_image_channels(module_spec)),
        tf.float32)
    resize_shape = tf.cast(tf.stack([input_height, input_width]),
                           dtype=tf.int32)  # Cast it as an int
    resized_image = tf.image.resize_bilinear(
        tf.expand_dims(decoded_image_as_float, 0), resize_shape)
    return jpeg_data, resized_image
コード例 #8
0
def add_jpeg_decoding(module_spec):
    """Adds operations that perform JPEG decoding and resizing to the graph."""

    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    # Convert from full range of uint8 to range [0,1] of float32.
    decoded_image_as_float = tf.image.convert_image_dtype(
        decoded_image, tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    resize_shape = tf.stack([input_height, input_width])
    resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
    resized_image = tf.image.resize_bilinear(decoded_image_4d,
                                             resize_shape_as_int)
    return jpeg_data, resized_image
コード例 #9
0
def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness, module_spec):

    tf.logging.info(
        "Adding distortions. Flip: {}, crop {}%, scale {}%, brigthness {}%".
        format(flip_left_right, random_crop, random_scale, random_brightness))

    jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    decoded_image_as_float = tf.image.convert_image_dtype(
        decoded_image, tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)

    margin_scale = 1.0 + (random_crop / 100.0)
    resize_scale = 1.0 + (random_scale / 100.0)
    margin_scale_value = tf.constant(margin_scale)
    resize_scale_value = tf.random_uniform(shape=[],
                                           minval=1.0,
                                           maxval=resize_scale)
    scale_value = tf.multiply(margin_scale_value, resize_scale_value)

    precrop_width = tf.multiply(scale_value, input_width)
    precrop_height = tf.multiply(scale_value, input_height)
    precrop_shape = tf.stack([precrop_height, precrop_width])
    precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
    precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                                precrop_shape_as_int)
    precropped_image_3d = tf.squeeze(precropped_image, axis=[0])
    cropped_image = tf.random_crop(precropped_image_3d,
                                   [input_height, input_width, input_depth])

    if flip_left_right:
        flipped_image = tf.image.random_flip_left_right(cropped_image)
    else:
        flipped_image = cropped_image

    brightness_min = 1.0 - (random_brightness / 100.0)
    brightness_max = 1.0 + (random_brightness / 100.0)
    brightness_value = tf.random_uniform(shape=[],
                                         minval=brightness_min,
                                         maxval=brightness_max)

    brightened_image = tf.multiply(flipped_image, brightness_value)
    distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
    return jpeg_data, distort_result
コード例 #10
0
def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness, module_spec):
    """"Perform the necessary ransom transformations. Used for Data Augmentation."""
    # Extract the required image info
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
    JPEG_image = tf.image.convert_image_dtype(
        tf.image.decode_jpeg(jpeg_data, channels=input_depth),
        tf.float32)  # This is a JPEG representation
    # Scale and crop the image, need to extract the shape and resize the image first.
    scale_value = tf.multiply(
        tf.constant(1.0 + (random_crop / 100.0)),
        tf.random_uniform(shape=[],
                          minval=1.0,
                          maxval=1.0 + (random_scale / 100.0)))
    crop_shape = tf.cast(tf.stack([
        tf.multiply(scale_value, input_height),
        tf.multiply(scale_value, input_width)
    ]),
                         dtype=tf.int32)
    precropped_image = tf.squeeze(tf.image.resize_bilinear(
        tf.expand_dims(JPEG_image, 0), crop_shape),
                                  axis=[0])
    cropped_image = tf.random_crop(precropped_image,
                                   [input_height, input_width, input_depth])
    # Flip the image if needed.
    if flip_left_right:
        flipped_image = tf.image.random_flip_left_right(cropped_image)
    else:
        flipped_image = cropped_image  # Do nothing
    # Change the brightness.
    brightness_value = tf.random_uniform(
        shape=[],
        minval=1.0 - (random_brightness / 100.0),
        maxval=1.0 + (random_brightness / 100.0))
    distort_result = tf.expand_dims(
        tf.multiply(flipped_image, brightness_value), 0, name='DistortResult'
    )  # Inserts a dimension of 1 into the tensor's shape.
    # Return the placeholder and the distorted tensor.
    return jpeg_data, distort_result
コード例 #11
0
def add_jpeg_decoding(module_spec):
    """Adds operations that perform JPEG decoding and resizing to the graph..
    Args:
        module_spec: The hub.ModuleSpec for the image module being used.
    Returns:
        Tensors for the node to feed JPEG data into, and the output of the
            preprocessing steps.
    """
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    # Convert from full range of uint8 to range [0,1] of float32.
    decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
                                                        tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    resize_shape = tf.stack([input_height, input_width])
    resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
    resized_image = tf.image.resize_bilinear(decoded_image_4d,
                                           resize_shape_as_int)
    return jpeg_data, resized_image
コード例 #12
0
def decode_and_resize(hub_module):
	"""Performs image processing steps(decoding and reshaping)
	Args:
		hub_module: Tensorflow Hub module
	Returns:
		placeholder for image data
		reshaped tensor as expected by graph
	"""
	module = hub.load_module_spec(hub_module)
	h, w = hub.get_expected_image_size(module)
	reshape_specs = tf.stack((h, w))
	num_channels = hub.get_num_image_channels(module)
	
	data_placeholder = tf.placeholder(tf.string, name='data_placeholder')
	decode = tf.image.decode_jpeg(data_placeholder, channels=num_channels)
	decode = tf.image.convert_image_dtype(decode, tf.float32)
	decode = tf.expand_dims(decode, 0)
	reshape = tf.cast(reshape_specs, dtype=tf.int32)
	reshaped_image = tf.image.resize_bilinear(decode, reshape)

	return  data_placeholder, reshaped_image
コード例 #13
0
    def __init__(self):

        # member variables.
        # the URL of the pre-trained model.
        self.HUB_MODULE = 'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1'
        # the model spec.
        self.Module_Spec = hub.load_module_spec(self.HUB_MODULE)
        # the image size that is required by this model.
        self.Module_Height, self.Module_Width = hub.get_expected_image_size(
            self.Module_Spec)
        self.Modelu_Depth = hub.get_num_image_channels(self.Module_Spec)
        # A module is understood as instrumented for quantization with TF-Lite
        # if it contains any of these ops.
        self.FAKE_QUANT_OPS = ('FakeQuantWithMinMaxVars',
                               'FakeQuantWithMinMaxVarsPerChannel')

        # the size of our input images.
        self.ImageHeight = self.Module_Height
        self.ImageWidth = self.Module_Width
        self.ImageChannels = self.Modelu_Depth

        # Set up the pre-trained graph.
        self.graph, self.bottleneck_tensor, self.resized_input_tensor, self.wants_quantization = self.create_module_graph(
            self.Module_Spec)
def add_jpeg_decoding(module_spec):
  """
  添加執行JPEG解碼和調整大小的操作。

  ARGS:
     module_spec:正在使用的映像模塊的hub.ModuleSpec。

  返回:
     節點的張量將JPEG數據輸入到輸出中
       預處理步驟。
  """
  input_height, input_width = hub.get_expected_image_size(module_spec)
  input_depth = hub.get_num_image_channels(module_spec)
  jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
  decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
  #從全範圍的uint8轉換為float32的範圍[0,1]。
  decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
                                                        tf.float32)
  decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)#擴充形狀的維度
  resize_shape = tf.stack([input_height, input_width]) #通過合並提升維度
  resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
  resized_image = tf.image.resize_bilinear(decoded_image_4d,
                                           resize_shape_as_int)  #放縮圖像尺寸
  return jpeg_data, resized_image
コード例 #15
0
def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness, module_spec):
    """Creates the operations to apply the specified distortions.

    During training it can help to improve the results if we run the images
    through simple distortions like crops, scales, and flips. These reflect the
    kind of variations we expect in the real world, and so can help train the
    model to cope with natural data more effectively. Here we take the supplied
    parameters and construct a network of operations to apply them to an image.

    Cropping
    ~~~~~~~~

    Cropping is done by placing a bounding box at a random position in the full
    image. The cropping parameter controls the size of that box relative to the
    input image. If it's zero, then the box is the same size as the input and no
    cropping is performed. If the value is 50%, then the crop box will be half the
    width and height of the input. In a diagram it looks like this:

    <       width         >
    +---------------------+
    |                     |
    |   width - crop%     |
    |    <      >         |
    |    +------+         |
    |    |      |         |
    |    |      |         |
    |    |      |         |
    |    +------+         |
    |                     |
    |                     |
    +---------------------+

    Scaling
    ~~~~~~~

    Scaling is a lot like cropping, except that the bounding box is always
    centered and its size varies randomly within the given range. For example if
    the scale percentage is zero, then the bounding box is the same size as the
    input and no scaling is applied. If it's 50%, then the bounding box will be in
    a random range between half the width and height and full size.

    Args:
      flip_left_right: Boolean whether to randomly mirror images horizontally.
      random_crop: Integer percentage setting the total margin used around the
      crop box.
      random_scale: Integer percentage of how much to vary the scale by.
      random_brightness: Integer range to randomly multiply the pixel values by.
      graph.
      module_spec: The hub.ModuleSpec for the image module being used.

    Returns:
      The jpeg input layer and the distorted result tensor.
    """
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    # Convert from full range of uint8 to range [0,1] of float32.
    decoded_image_as_float = tf.image.convert_image_dtype(
        decoded_image, tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    margin_scale = 1.0 + (random_crop / 100.0)
    resize_scale = 1.0 + (random_scale / 100.0)
    margin_scale_value = tf.constant(margin_scale)
    resize_scale_value = tf.random_uniform(shape=[],
                                           minval=1.0,
                                           maxval=resize_scale)
    scale_value = tf.multiply(margin_scale_value, resize_scale_value)
    precrop_width = tf.multiply(scale_value, input_width)
    precrop_height = tf.multiply(scale_value, input_height)
    precrop_shape = tf.stack([precrop_height, precrop_width])
    precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
    precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                                precrop_shape_as_int)
    precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0])
    cropped_image = tf.random_crop(precropped_image_3d,
                                   [input_height, input_width, input_depth])
    if flip_left_right:
        flipped_image = tf.image.random_flip_left_right(cropped_image)
    else:
        flipped_image = cropped_image
    brightness_min = 1.0 - (random_brightness / 100.0)
    brightness_max = 1.0 + (random_brightness / 100.0)
    brightness_value = tf.random_uniform(shape=[],
                                         minval=brightness_min,
                                         maxval=brightness_max)
    brightened_image = tf.multiply(flipped_image, brightness_value)
    distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
    return jpeg_data, distort_result
def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness, module_spec):
  """
  創建應用指定扭曲的操作。
  
  生成變形圖片操作ops的函數add_input_distortions
  在訓練的過程中我們對圖片進行一些變形(裁切、放縮、翻轉或調整亮度),可以利用有限數量的圖片模擬更多的真實情況,進而有效改進模型。

  裁剪
  ~~~~~~~~
   通過將邊界框放置在完整圖像中的隨機位置來完成裁剪。
   cropping參數控制該框相對於輸入圖像的大小。 如果它為零,則該框與輸入的大小相同,並且不執行裁剪。 
   如果值為50%,則裁剪框將為輸入的寬度和高度的一半。 
   在圖中它看起來像這樣:
  <       width         >
  +---------------------+
  |                     |
  |   width - crop%     |
  |    <      >         |
  |    +------+         |
  |    |      |         |
  |    |      |         |
  |    |      |         |
  |    +------+         |
  |                     |
  |                     |
  +---------------------+

  縮放
  ~~~~~~~
    縮放很像裁剪,除了邊界框始終居中並且其大小在給定範圍內隨機變化。 
    例如,如果比例百分比為零,則邊界框與輸入的大小相同,並且不應用縮放。 
    如果它是50%,那麼邊界框將在寬度和高度的一半與全尺寸之間的隨機範圍內。

  ARGS:
     flip_left_right:Boolean是否水平隨機鏡像圖像。
     random_crop:整數百分比設置周圍使用的總保證金
     裁剪框。
     random_scale:縮放比例的整數百分比。
     random_brightness:整數範圍,用於隨機乘以像素值。
     圖形。
     module_spec:正在使用的映像模塊的hub.ModuleSpec。

  返回:
     jpeg輸入層和失真結果張量。
  """
   
  input_height, input_width = hub.get_expected_image_size(module_spec)#獲取已有模型中的寬高要求
  input_depth = hub.get_num_image_channels(module_spec)#獲取模型中圖片通道深度數
  jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') #feed_dict輸入口
  decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) #讀取圖片數據
  #從uint8的全範圍轉換到float32的範圍[0,1]。
  decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
                                                        tf.float32)#數據類型轉換
  decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) #升維
  #對圖片數據進行裁切和放縮
  margin_scale = 1.0 + (random_crop / 100.0)#參數範圍0~100
  resize_scale = 1.0 + (random_scale / 100.0)#參數範圍0~100
  margin_scale_value = tf.constant(margin_scale)#轉為張量
  resize_scale_value = tf.random_uniform(shape=[],
                                         minval=1.0,
                                         maxval=resize_scale)#轉為張量
  scale_value = tf.multiply(margin_scale_value, resize_scale_value)
  precrop_width = tf.multiply(scale_value, input_width)
  precrop_height = tf.multiply(scale_value, input_height)
  precrop_shape = tf.stack([precrop_height, precrop_width])
  precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
  precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                              precrop_shape_as_int)
  precropped_image_3d = tf.squeeze(precropped_image, axis=[0])
  cropped_image = tf.random_crop(precropped_image_3d,
                                 [input_height, input_width, input_depth])
  #對圖片進行翻轉
  if flip_left_right:
    flipped_image = tf.image.random_flip_left_right(cropped_image)
  else:
    flipped_image = cropped_image
  #調整圖片亮度
  brightness_min = 1.0 - (random_brightness / 100.0)#random_brightness參數範圍0~100
  brightness_max = 1.0 + (random_brightness / 100.0)
  brightness_value = tf.random_uniform(shape=[],
                                       minval=brightness_min,
                                       maxval=brightness_max)
  brightened_image = tf.multiply(flipped_image, brightness_value)
  distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
  return jpeg_data, distort_result
コード例 #17
0
def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness, module_spec):
    """Creates the operations to apply the specified distortions.
    During training it can help to improve the results if we run the images
    through simple distortions like crops, scales, and flips. These reflect the
    kind of variations we expect in the real world, and so can help train the
    model to cope with natural data more effectively. Here we take the supplied
    parameters and construct a network of operations to apply them to an image.
    Cropping
    ~~~~~~~~
    Cropping is done by placing a bounding box at a random position in the full
    image. The cropping parameter controls the size of that box relative to the
    input image. If it's zero, then the box is the same size as the input and no
    cropping is performed. If the value is 50%, then the crop box will be half the
    width and height of the input. In a diagram it looks like this:
    <       width         >
    +---------------------+
    |                     |
    |   width - crop%     |
    |    <      >         |
    |    +------+         |
    |    |      |         |
    |    |      |         |
    |    |      |         |
    |    +------+         |
    |                     |
    |                     |
    +---------------------+
    Scaling
    ~~~~~~~
    Scaling is a lot like cropping, except that the bounding box is always
    centered and its size varies randomly within the given range. For example if
    the scale percentage is zero, then the bounding box is the same size as the
    input and no scaling is applied. If it's 50%, then the bounding box will be in
    a random range between half the width and height and full size.
    Args:
        flip_left_right: Boolean whether to randomly mirror images horizontally.
        random_crop: Integer percentage setting the total margin used around the
        crop box.
        random_scale: Integer percentage of how much to vary the scale by.
        random_brightness: Integer range to randomly multiply the pixel values by.
        graph.
        module_spec: The hub.ModuleSpec for the image module being used.
    Returns:
        The jpeg input layer and the distorted result tensor.
    """
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    # Convert from full range of uint8 to range [0,1] of float32.
    decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
                                                        tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    margin_scale = 1.0 + (random_crop / 100.0)
    resize_scale = 1.0 + (random_scale / 100.0)
    margin_scale_value = tf.constant(margin_scale)
    resize_scale_value = tf.random_uniform(shape=[],
                                         minval=1.0,
                                         maxval=resize_scale)
    scale_value = tf.multiply(margin_scale_value, resize_scale_value)
    precrop_width = tf.multiply(scale_value, input_width)
    precrop_height = tf.multiply(scale_value, input_height)
    precrop_shape = tf.stack([precrop_height, precrop_width])
    precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
    precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                              precrop_shape_as_int)
    precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0])
    cropped_image = tf.random_crop(precropped_image_3d,
                                 [input_height, input_width, input_depth])
    if flip_left_right:
        flipped_image = tf.image.random_flip_left_right(cropped_image)
    else:
        flipped_image = cropped_image
    brightness_min = 1.0 - (random_brightness / 100.0)
    brightness_max = 1.0 + (random_brightness / 100.0)
    brightness_value = tf.random_uniform(shape=[],
                                       minval=brightness_min,
                                       maxval=brightness_max)
    brightened_image = tf.multiply(flipped_image, brightness_value)
    distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
    return jpeg_data, distort_result
コード例 #18
0

def prepare_file_system():
  # Set up the directory we'll write summaries to for TensorBoard
  if tf.gfile.Exists(FLAGS.summaries_dir):
    tf.gfile.DeleteRecursively(FLAGS.summaries_dir)
  tf.gfile.MakeDirs(FLAGS.summaries_dir)
  if FLAGS.intermediate_store_frequency > 0:
    ensure_dir_exists(FLAGS.intermediate_output_graphs_dir)
  return


def add_jpeg_decoding(module_spec):
  
  input_height, input_width = hub.get_expected_image_size(module_spec)
  input_depth = hub.get_num_image_channels(module_spec)
  jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
  decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
  # Convert from full range of uint8 to range [0,1] of float32.
  decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
                                                        tf.float32)
  decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
  resize_shape = tf.stack([input_height, input_width])
  resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
  resized_image = tf.image.resize_bilinear(decoded_image_4d,
                                           resize_shape_as_int)
  return jpeg_data, resized_image


def export_model(module_spec, class_count, saved_model_dir):
 
コード例 #19
0
def add_input_distortions(flip_left_right, random_crop, random_scale,
  
  input_height, input_width = hub.get_expected_image_size(module_spec)
  input_depth = hub.get_num_image_channels(module_spec)
コード例 #20
0
    def __init__(self, flags, n_classes):
        self.n_classes = n_classes

        # Pasamos las variables relacionadas a la estructura del experimento
        self.experiment_name = flags.experiment_name
        self.model_name = flags.model_name
        self.logs_and_checkpoints_dir = flags.logs_and_checkpoints_dir
        self.export_model_dir = flags.export_model_dir
        self.results_dir = flags.results_dir
        self.remove_prev_ckpts_and_logs = flags.remove_prev_ckpts_and_logs
        self.random_seed = flags.random_seed
        # Directorios de bottlenecks
        self.train_bottlenecks_dir = flags.train_bottlenecks_dir
        self.validation_bottlenecks_dir = flags.validation_bottlenecks_dir
        self.test_bottlenecks_dir = flags.test_bottlenecks_dir

        # Pasamos las variables relacionadas al dataset
        # images_dir: hacemos esto para asegurar que el directorio esté
        # en formato absoluto, y además para que tenga un slash al final
        # siempre
        self.train_images_dir = os.path.join(
            os.path.abspath(flags.train_images_dir), "")
        self.validation_images_dir = os.path.join(
            os.path.abspath(flags.validation_images_dir), "")
        self.test_images_dir = os.path.join(
            os.path.abspath(flags.test_images_dir), "")

        # Variables de distorsiones aleatorias
        self.flip_left_right = flags.flip_left_right
        self.random_crop = flags.random_crop
        self.random_scale = flags.random_scale
        self.random_brightness = flags.random_brightness

        # Pasamos las variables relacionadas al entrenamiento
        self.train_batch_size = flags.train_batch_size
        self.validation_batch_size = flags.validation_batch_size
        self.test_batch_size = flags.test_batch_size

        self.num_epochs = flags.num_epochs
        self.learning_rate = flags.learning_rate
        self.tensors_to_log_train = flags.tensors_to_log_train
        self.tensors_to_log_val = flags.tensors_to_log_val
        self.save_checkpoints_steps = flags.save_checkpoints_steps
        self.eval_frequency = flags.eval_frequency
        self.fine_tuning = flags.fine_tuning

        # Otras variables importantes
        self.cache_bottlenecks = not self.fine_tuning and \
            not tf_data_utils.should_distort_images(
                self.flip_left_right, self.random_crop,
                self.random_scale, self.random_brightness)
        # Obtenemos el module_spec correspondiente
        module_url = get_module_url(self.model_name)
        self.module_spec = hub.load_module_spec(module_url)
        self.module_image_shape = hub.get_expected_image_size(self.module_spec)
        self.module_image_depth = hub.get_num_image_channels(self.module_spec)

        self.__init_log_and_random_seeds()
        self.__prepare_filesystem()
        self.__save_config_file(flags)
        self.estimator = self.__build_estimator(mode="train")
コード例 #21
0
    epochs = args.epochs
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    csv_out = args.csv_output
    pred_out = args.prediction_output
    dropout = args.dropout
    save_models = args.save
    import_features = args.import_features
    tfhub = args.tfhub_module


    ##### LOAD IMAGES ######
    if tfhub != None:
        module_spec = hub.load_module_spec(tfhub)
        height, width = hub.get_expected_image_size(module_spec)
        channels = hub.get_num_image_channels(module_spec)
    else:
        height, width, channels = 224, 224, 3

    ### training images
    # read paths and labels for each image
    listimgs, listlabels = parse_input(train_paths)
    # load images
    loaded_imgs = [load_image(img, size=height).reshape((height, width, channels)) for img in listimgs]
    print('[TRAINING] Loaded', len(loaded_imgs), 'images and', len(listlabels), 'labels')
    # map string labels to unique integers
    u,indices = np.unique(np.array(listlabels), return_inverse=True)
    print('[TRAINING] Categories: ', u)
    num_categories = len(u)

    ### validation images
コード例 #22
0
from __future__ import division
from __future__ import print_function

import tensorflow as tf
import tensorflow_hub as hub
import numpy as np


#################### Global Variables. ####################
# the URL of the pre-trained model.
HUB_MODULE = 'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1'
# the model spec.
Module_Spec = hub.load_module_spec(HUB_MODULE)
# the image size that is required by this model.
Module_Height, Module_Width = hub.get_expected_image_size(Module_Spec)
Modelu_Depth = hub.get_num_image_channels(Module_Spec)
# A module is understood as instrumented for quantization with TF-Lite
# if it contains any of these ops.
FAKE_QUANT_OPS = ('FakeQuantWithMinMaxVars',
                  'FakeQuantWithMinMaxVarsPerChannel')
				  
# the size of our input images.
ImageHeight = Module_Height
ImageWidth = Module_Width
ImageChannels = Modelu_Depth


#################### Tensorflow Settings. ####################
# Output the logging info.
tf.logging.set_verbosity(tf.logging.INFO)