Beispiel #1
0
def col_preprocess_for_gpu(image, soft=True, down_sample=4, col_knn=False):
    """Preprocesses the given image for evaluation.

  Args:
    image: `Tensor` representing an image of arbitrary size.

  Returns:
    A preprocessed image `Tensor`.
  """
    # image = _random_crop(image, IMAGE_SIZE)
    # image = random_sized_crop(image, COL_IMAGE_SIZE, COL_IMAGE_SIZE)
    #image = _normalize(image)
    #image = _flip(image)
    #image = tf.reshape(image, [COL_IMAGE_SIZE, COL_IMAGE_SIZE, 3])
    #image = tf.div(image, tf.constant(255, dtype=tf.float32))
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    #image = tf.Print(image, [image], message='original image')
    lab_image = rgb_to_lab(image)
    #lab_image = color.rgb2lab(image)

    l_image = lab_image[:, :, :, :1]
    ab_image = lab_image[:, :, :, 1:]

    l_image = l_image - 50
    #l_image = tf.Print(l_image, [l_image], message='l_image')
    ab_image_ss = ab_image[:, ::down_sample, ::down_sample, :]
    Q_label = ab_to_Q(ab_image_ss, soft=soft, col_knn=col_knn)

    #Q_label_ = ab_to_Q(ab_image, soft=soft)
    #Q_label = Q_label_[:, ::down_sample, ::down_sample, :]

    return l_image, ab_image, Q_label
Beispiel #2
0
def preprocess_for_eval(image, std=True, depth_imn_tl=0, l_channel=0):
    """Preprocesses the given image for evaluation.

  Args:
    image: `Tensor` representing an image of arbitrary size.

  Returns:
    A preprocessed image `Tensor`.
  """
    image = _do_scale(image, IMAGE_SIZE + 32)
    if depth_imn_tl == 0:
        if std:
            image = _normalize(image)
        else:
            offset = tf.constant(MEAN_RGB, shape=[1, 1, 3])
            image -= offset

    image = _center_crop(image, IMAGE_SIZE)
    image = tf.reshape(image, [IMAGE_SIZE, IMAGE_SIZE, 3])
    if l_channel == 1:
        lab_image = rgb_to_lab(image)
        l_image = lab_image[:, :, :1]
        l_image = l_image - 50
        image = tf.tile(l_image, [1, 1, 3])
    # resizing
    if IMAGE_RESIZE is not None:
        image = tf.image.resize_images(image, [IMAGE_RESIZE, IMAGE_RESIZE],
                                       align_corners=True)

    return image
Beispiel #3
0
def rp_col_preprocess_for_eval(image, down_sample=8, col_knn=True):

    lab_image = rgb_to_lab(image)
    l_image = lab_image[:, :, :1]

    l_image = l_image - 50
    l_image = tf.tile(l_image, [1, 1, 3])

    return l_image
Beispiel #4
0
def col_preprocess_for_train(image,
                             soft=True,
                             down_sample=8,
                             col_knn=False,
                             col_tl=False,
                             combine_rp=False):
    """Preprocesses the given image for evaluation.

  Args:
    image: `Tensor` representing an image of arbitrary size.

  Returns:
    A preprocessed image `Tensor`.
  """
    if col_tl:
        COL_IMAGE_SIZE = 224
    else:
        COL_IMAGE_SIZE = 256
    # image = _random_crop(image, IMAGE_SIZE)
    image = random_sized_crop(image, COL_IMAGE_SIZE, COL_IMAGE_SIZE)
    #image = _normalize(image)
    image = _flip(image)
    image = tf.reshape(image, [COL_IMAGE_SIZE, COL_IMAGE_SIZE, 3])

    lab_image = rgb_to_lab(image)
    #lab_image = color.rgb2lab(image)

    l_image = lab_image[:, :, :1]
    ab_image = lab_image[:, :, 1:]

    l_image = l_image - 50

    ab_image_ss = ab_image[::down_sample, ::down_sample, :]

    #Q_label = _nnencode(ab_image_ss)

    Q_label = ab_to_Q(ab_image_ss, soft=soft, col_knn=col_knn)

    #Q_label = Q_label_[0:-1:down_sample, 0:-1:down_sample, :]
    if combine_rp:
        l_image = tf.tile(l_image, [1, 1, 3])
    return l_image, Q_label
Beispiel #5
0
def depth_image_preprocess_for_val(original_image,
                                   depth_image,
                                   image_height,
                                   image_width,
                                   down_sample=1,
                                   color_dp_tl=False,
                                   combine_3_task=False):
    """Preprocesses the given image for evaluation.

  Args:
    image: `Tensor` representing an image of arbitrary size.

  Returns:
    A preprocessed image `Tensor`.
  """
    #original_image = _do_scale(original_image, IMAGE_SIZE + 32)
    #depth_image = _do_scale(depth_image, IMAGE_SIZE + 32)

    start_height = int((image_height - 224) / 2)
    start_width = int((image_width - 224) / 2)

    original_image = tf.slice(original_image, [start_height, start_width, 0],
                              [224, 224, 3])
    depth_image = tf.slice(depth_image, [start_height, start_width, 0],
                           [224, 224, 1])

    #original_image = _center_crop(original_image, IMAGE_SIZE)
    #depth_image = _center_crop(depth_image, IMAGE_SIZE)

    original_image = tf.reshape(original_image, [224, 224, 3])
    depth_image = tf.reshape(depth_image, [224, 224, 1])
    if color_dp_tl:
        lab_image = rgb_to_lab(original_image)
        original_image = lab_image[:, :, :1] - 50
        if combine_3_task:
            original_image = tf.tile(original_image, [1, 1, 3])

    if down_sample > 1:
        depth_image = depth_image[::down_sample, ::down_sample, :]

    return original_image, depth_image
Beispiel #6
0
def col_preprocess_for_eval(image,
                            soft=True,
                            down_sample=8,
                            col_knn=False,
                            col_tl=False,
                            combine_rp=False):
    """Preprocesses the given image for evaluation.

  Args:
    image: `Tensor` representing an image of arbitrary size.

  Returns:
    A preprocessed image `Tensor`.
  """
    if col_tl:
        COL_IMAGE_SIZE = 224
    else:
        COL_IMAGE_SIZE = 256
    image = _do_scale(image, COL_IMAGE_SIZE + 32)
    #image = _normalize(image)
    image = _center_crop(image, COL_IMAGE_SIZE)
    image = tf.reshape(image, [COL_IMAGE_SIZE, COL_IMAGE_SIZE, 3])

    lab_image = rgb_to_lab(image)
    l_image = lab_image[:, :, :1]

    l_image = l_image - 50

    ab_image = lab_image[:, :, 1:]
    ab_image_ss = ab_image[::down_sample, ::down_sample, :]

    Q_label = ab_to_Q(ab_image_ss, soft=soft, col_knn=col_knn)

    #Q_label = Q_label_[0:-1:down_sample, 0:-1:down_sample, :]
    if combine_rp:
        l_image = tf.tile(l_image, [1, 1, 3])

    return l_image, Q_label
def get_network_outputs(inputs,
                        prep_type,
                        model_type,
                        setting_name=None,
                        module_name=['encode'],
                        inst_resnet_size=18,
                        train=False,
                        **cfg_kwargs):
    input_image = inputs['images']
    if prep_type == 'only_mean':
        input_image = tf.cast(input_image, tf.float32) / 255
        offset = tf.constant(MEAN_RGB, shape=[1, 1, 1, 3])
        post_input_image = input_image - offset
    elif prep_type == 'mean_std':
        # Divided by 255 is done inside the function
        post_input_image = color_normalize(input_image)
    elif prep_type == 'color_prep':
        input_image = tf.cast(input_image, tf.float32) / 255
        post_input_image = rgb_to_lab(input_image)
        post_input_image = post_input_image[:, :, :, :1] - 50
    elif prep_type == 'no_prep':
        post_input_image = tf.cast(input_image, tf.float32) / 255
    else:
        raise NotImplementedError('Preprocessing type not supported!')

    if model_type == 'vm_model':
        network_cfg, args = get_network_cfg_from_setting(
            setting_name, **cfg_kwargs)
        all_outs = build_vm_model_from_args(args,
                                            post_input_image,
                                            module_name,
                                            train=train)
    elif model_type == 'mt_vm_model':
        network_cfg, args = get_network_cfg_from_setting(
            setting_name, **cfg_kwargs)
        with name_variable_scope("primary", "primary", reuse=tf.AUTO_REUSE) \
                as (name_scope, var_scope):
            all_outs = build_vm_model_from_args(args,
                                                post_input_image,
                                                module_name,
                                                train=train)
        with ema_variable_scope("ema", var_scope, reuse=tf.AUTO_REUSE):
            ema_out_dict = build_vm_model_from_args(args,
                                                    post_input_image,
                                                    module_name,
                                                    train=train)
            # Combine two out dicts, adding ema_ as prefix to keys
            for each_key, each_value in ema_out_dict.items():
                new_key = "ema_%s" % each_key
                assert not new_key in all_outs, \
                        "New key %s already exists" % new_key
                all_outs[new_key] = each_value
    elif model_type.startswith('inst_model'):
        # inst_model:get_all_layers_args
        if ':' not in model_type:
            get_all_layers = 'all_spatial'
        else:
            get_all_layers = model_type.split(':')[1]
        all_outs = resnet_embedding(inputs['images'],
                                    get_all_layers=get_all_layers,
                                    skip_final_dense=True,
                                    resnet_size=inst_resnet_size)
    elif model_type.startswith('vd_inst_model'):
        # model_type should be vd_inst_model:actual_model_type
        sys.path.append(os.path.expanduser('~/video_unsup/'))
        import tf_model.model.instance_model as vd_inst_model
        _model_type = model_type.split(':')[1]
        input_images = inputs['images']
        if _model_type == '3dresnet':
            curr_shape = input_images.get_shape().as_list()[1]
            input_images = tf.image.resize_images(
                input_images, [curr_shape // 2, curr_shape // 2])
            input_images = tf.tile(tf.expand_dims(input_images, axis=1),
                                   [1, 16, 1, 1, 1])
        elif _model_type == '3dresnet_full':
            input_images = tf.tile(tf.expand_dims(input_images, axis=1),
                                   [1, 16, 1, 1, 1])
            _model_type = '3dresnet'
        elif _model_type == 'vanilla3d_single':
            input_images = tf.tile(tf.expand_dims(input_images, axis=1),
                                   [1, 16, 1, 1, 1])
        all_outs = vd_inst_model.resnet_embedding(input_images,
                                                  get_all_layers='all_spatial',
                                                  skip_final_dense=True,
                                                  model_type=_model_type)
    elif model_type.startswith('vd_prednet'):
        # model_type should be vd_prednet:actual_model_type
        _model_type = model_type.split(':')[1]
        images = inputs['images']
        if _model_type == 'prednet_l9':
            curr_shape = images.get_shape().as_list()[1]
            if not curr_shape % 32 == 0:
                new_shape = curr_shape // 32 * 32
                images = tf.image.resize_images(images, [new_shape, new_shape])
        import unsup_vvs.network_training.models.prednet_builder as prednet_builder
        all_outs = prednet_builder.build_all_outs(images, _model_type)
    elif model_type == 'simclr_model':
        ending_points = get_simclr_ending_points(inputs)
        all_outs = {
                'encode_%i' % (_idx+1): _rep \
                for _idx, _rep in enumerate(ending_points)
                }
    elif model_type == 'simclr_model_mid':
        ending_points = get_simclr_ending_points(inputs)
        output = ending_points[-1]
        output = tf.reshape(output, [output.shape[0], -1])
        m = NoramlNetfromConv(seed=0)
        with tf.variable_scope('category_trans'):
            with tf.variable_scope('mid'):
                output = m.fc(
                    out_shape=1000,
                    init='xavier',
                    weight_decay=1e-4,
                    activation='relu',
                    bias=0.1,
                    dropout=None,
                    in_layer=output,
                )
        all_outs = {'category_trans_1': output}
    else:
        raise NotImplementedError('Model type not supported!')
    all_outs['model_inputs'] = post_input_image
    return all_outs, {}
Beispiel #8
0
def depth_image_preprocess_for_train(original_image,
                                     depth_image,
                                     image_height,
                                     image_width,
                                     down_sample=1,
                                     color_dp_tl=False,
                                     combine_3_task=False):
    """Preprocesses the given image for evaluation.

  Args:
    image: `Tensor` representing an image of arbitrary size.

  Returns:
G    A preprocessed image `Tensor`.
  """
    # image = _random_crop(image, IMAGE_SIZE)
    random_height = tf.random_uniform([],
                                      minval=0,
                                      maxval=image_height - 224,
                                      dtype=tf.int32)
    random_width = tf.random_uniform([],
                                     minval=0,
                                     maxval=image_width - 224,
                                     dtype=tf.int32)

    crop_original_image = tf.slice(original_image,
                                   [random_height, random_width, 0],
                                   [224, 224, 3])
    crop_depth_image = tf.slice(depth_image, [random_height, random_width, 0],
                                [224, 224, 1])

    #original_image = tf.random_crop(original_image, [IMAGE_SIZE, IMAGE_SIZE, 3], seed=3)
    #depth_image = tf.random_crop(depth_image, [IMAGE_SIZE, IMAGE_SIZE, 1], seed=3)

    random_flip = tf.random_uniform([], minval=0, maxval=2, dtype=tf.int32)

    def image_flip(o_image, d_image):
        o_image_f = tf.image.flip_left_right(o_image)
        d_image_f = tf.image.flip_left_right(d_image)
        return o_image_f, d_image_f

    def image_identity(o_image, d_image):
        return o_image, d_image

    flip_original_image, flip_depth_image = tf.cond(
        random_flip < 1,
        lambda: image_flip(crop_original_image, crop_depth_image),
        lambda: image_identity(crop_original_image, crop_depth_image))

    #original_image = _flip(original_image)
    #depth_image = _flip(depth_image)
    #orginal_image = _center_crop(original_image, IMAGE_SIZE)
    #depth_image = _center_crop(depth_image, IMAGE_SIZE)

    original_image = tf.reshape(flip_original_image, [224, 224, 3])
    depth_image = tf.reshape(flip_depth_image, [224, 224, 1])

    if color_dp_tl:
        lab_image = rgb_to_lab(original_image)
        original_image = lab_image[:, :, :1] - 50
        if combine_3_task:
            original_image = tf.tile(original_image, [1, 1, 3])

    if down_sample > 1:
        depth_image = depth_image[::down_sample, ::down_sample, :]

    return original_image, depth_image