Ejemplo n.º 1
0
    def __init__(
        self,
        kernel_size: int,
        nplants: int,
        nrows: int,
        ncols: int,
        reset_gate: bool = True,
        update_gate: bool = True,
        renormalize_inputs: bool = True,
        min_update: float = 0.0,
        min_reset: float = 0.0,
        rnn_act: Optional[str] = None,
        additive: bool = False,
    ) -> None:
        super().__init__()
        # self.gru = ConvGRUCell(
        #     input_size=nplants,
        #     hidden_size=nplants,
        #     kernel_size=kernel_size,
        #     groups=nplants,
        # )
        self.gru = ConvGRUCell(
            input_size=1,
            hidden_size=1,
            kernel_size=kernel_size,
            groups=1,
            reset_gate=reset_gate,
            update_gate=update_gate,
            out_bias=(not renormalize_inputs),
            out_act=rnn_act,
            min_update=min_update,
            min_reset=min_reset,
        )
        if not additive:
            # self.conv_final = nn.Conv2d(
            #     nrows * ncols * nplants,
            #     nrows * ncols,
            #     kernel_size=1,
            #     groups=nrows * ncols,
            #     bias=False,
            # )
            self.conv_final = True
            self.log_add_weights = nn.Parameter(
                0.02 * torch.randn(nplants, nrows, ncols)
            )
        else:
            self.conv_final = None
        self.bias = nn.Parameter(torch.tensor(1.0))
        self.nplants = nplants
        self.nrows = nrows
        self.ncols = ncols
        self.rnn_act = rnn_act

        if renormalize_inputs:
            self.renormalize_inputs = True
            self.gam = nn.Parameter(torch.ones(nplants))
            # self.beta = nn.Parameter(torch.zeros(nplants))
        else:
            self.renormalize_inputs = None
    def __init__(self, hidden_size, input_size, kernel_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.kernel_size = kernel_size
        self.input_size = input_size
        self.bn1 = nn.BatchNorm2d(8)
        self.print_log = False
        # self.relu1 = nn.Sigmoid()
        self.gruc_0 = ConvGRUCell(input_size, hidden_size[0], kernel_size[0])
        self.conv_pre_0 = deconv2_act(hidden_size[0], out_channels=hidden_size[0], kernel_size=4, stride=2, padding=1)

        self.gruc_1 = ConvGRUCell(hidden_size[0], hidden_size[1], kernel_size[1])
        self.conv_pre_1 = deconv2_act(hidden_size[1], out_channels=hidden_size[1], kernel_size=4, stride=2, padding=1)

        self.gruc_2 = ConvGRUCell(hidden_size[1], hidden_size[2], kernel_size[2])
        self.conv_pre_2_0 = conv2_act(hidden_size[2], out_channels=16, kernel_size=3, stride=1, padding=1)
        self.conv_pre_2_1 = conv2_act(16, out_channels=1, kernel_size=3, stride=1)
Ejemplo n.º 3
0
def inference_winner_take_all(images, cams, depth_num, depth_start, depth_end, 
                              is_master_gpu=True, reg_type='GRU', inverse_depth=False):
    """ infer disparity image from stereo images and cameras """

    if not inverse_depth:
        depth_interval = (depth_end - depth_start) / (tf.cast(depth_num, tf.float32) - 1)

    # reference image
    ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0], [-1, 1, -1, -1, 3]), axis=1)
    ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1)

    # image feature extraction    
    if is_master_gpu:
        ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=False)
    else:
        ref_tower = UNetDS2GN({'data': ref_image}, is_training=True, reuse=True)
    view_towers = []
    for view in range(1, FLAGS.view_num):
        view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0], [-1, 1, -1, -1, -1]), axis=1)
        view_tower = UNetDS2GN({'data': view_image}, is_training=True, reuse=True)
        view_towers.append(view_tower)

    # get all homographies
    view_homographies = []
    for view in range(1, FLAGS.view_num):
        view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0], [-1, 1, 2, 4, 4]), axis=1)
        if inverse_depth:
            homographies = get_homographies_inv_depth(ref_cam, view_cam, depth_num=depth_num,
                                depth_start=depth_start, depth_end=depth_end)
        else:
            homographies = get_homographies(ref_cam, view_cam, depth_num=depth_num,
                                            depth_start=depth_start, depth_interval=depth_interval)
        view_homographies.append(homographies)

    # gru unit
    gru1_filters = 16
    gru2_filters = 4
    gru3_filters = 2
    feature_shape = [FLAGS.batch_size, FLAGS.max_h/4, FLAGS.max_w/4, 32]
    gru_input_shape = [feature_shape[1], feature_shape[2]]
    state1 = tf.zeros([FLAGS.batch_size, feature_shape[1], feature_shape[2], gru1_filters])
    state2 = tf.zeros([FLAGS.batch_size, feature_shape[1], feature_shape[2], gru2_filters])
    state3 = tf.zeros([FLAGS.batch_size, feature_shape[1], feature_shape[2], gru3_filters])
    conv_gru1 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru1_filters)
    conv_gru2 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru2_filters)
    conv_gru3 = ConvGRUCell(shape=gru_input_shape, kernel=[3, 3], filters=gru3_filters)

    # initialize variables
    exp_sum = tf.Variable(tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]),
        name='exp_sum', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
    depth_image = tf.Variable(tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]),
        name='depth_image', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
    max_prob_image = tf.Variable(tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1]),
        name='max_prob_image', trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES])
    init_map = tf.zeros([FLAGS.batch_size, feature_shape[1], feature_shape[2], 1])

    # define winner take all loop
    def body(depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre):
        """Loop body."""

        # calculate cost 
        ave_feature = ref_tower.get_output()
        ave_feature2 = tf.square(ref_tower.get_output())
        for view in range(0, FLAGS.view_num - 1):
            homographies = view_homographies[view]
            homographies = tf.transpose(homographies, perm=[1, 0, 2, 3])
            homography = homographies[depth_index]
            # warped_view_feature = homography_warping(view_towers[view].get_output(), homography)
            warped_view_feature = tf_transform_homography(view_towers[view].get_output(), homography)
            ave_feature = ave_feature + warped_view_feature
            ave_feature2 = ave_feature2 + tf.square(warped_view_feature)
        ave_feature = ave_feature / FLAGS.view_num
        ave_feature2 = ave_feature2 / FLAGS.view_num
        cost = ave_feature2 - tf.square(ave_feature)
        cost.set_shape([FLAGS.batch_size, feature_shape[1], feature_shape[2], 32])

        # gru
        reg_cost1, state1 = conv_gru1(-cost, state1, scope='conv_gru1')
        reg_cost2, state2 = conv_gru2(reg_cost1, state2, scope='conv_gru2')
        reg_cost3, state3 = conv_gru3(reg_cost2, state3, scope='conv_gru3')
        reg_cost = tf.layers.conv2d(
            reg_cost3, 1, 3, padding='same', reuse=tf.AUTO_REUSE, name='prob_conv')
        prob = tf.exp(reg_cost)

        # index
        d_idx = tf.cast(depth_index, tf.float32) 
        if inverse_depth:
            inv_depth_start = tf.div(1.0, depth_start)
            inv_depth_end = tf.div(1.0, depth_end)
            inv_interval = (inv_depth_start - inv_depth_end) / (tf.cast(depth_num, 'float32') - 1)
            inv_depth = inv_depth_start - d_idx * inv_interval
            depth = tf.div(1.0, inv_depth)
        else:
            depth = depth_start + d_idx * depth_interval
        temp_depth_image = tf.reshape(depth, [FLAGS.batch_size, 1, 1, 1])
        temp_depth_image = tf.tile(
            temp_depth_image, [1, feature_shape[1], feature_shape[2], 1])

        # update the best
        update_flag_image = tf.cast(tf.less(max_prob_image, prob), dtype='float32')
        new_max_prob_image = update_flag_image * prob + (1 - update_flag_image) * max_prob_image
        new_depth_image = update_flag_image * temp_depth_image + (1 - update_flag_image) * depth_image
        max_prob_image = tf.assign(max_prob_image, new_max_prob_image)
        depth_image = tf.assign(depth_image, new_depth_image)

        # update counter
        exp_sum = tf.assign_add(exp_sum, prob)
        depth_index = tf.add(depth_index, incre)

        return depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre
    
    # run forward loop
    exp_sum = tf.assign(exp_sum, init_map)
    depth_image = tf.assign(depth_image, init_map)
    max_prob_image = tf.assign(max_prob_image, init_map)
    depth_index = tf.constant(0)
    incre = tf.constant(1)
    cond = lambda depth_index, *_: tf.less(depth_index, depth_num)
    _, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre = tf.while_loop(
        cond, body
        , [depth_index, state1, state2, state3, depth_image, max_prob_image, exp_sum, incre]
        , back_prop=False, parallel_iterations=1)

    # get output
    forward_exp_sum = exp_sum + 1e-7
    forward_depth_map = depth_image
    return forward_depth_map, max_prob_image / forward_exp_sum
Ejemplo n.º 4
0
def inference_prob_recurrent(images,
                             cams,
                             depth_num,
                             depth_start,
                             depth_interval,
                             is_master_gpu=True):
    """ infer disparity image from stereo images and cameras """

    # dynamic gpu params
    depth_end = depth_start + (tf.cast(depth_num, tf.float32) -
                               1) * depth_interval

    # reference image
    ref_image = tf.squeeze(tf.slice(images, [0, 0, 0, 0, 0],
                                    [-1, 1, -1, -1, 3]),
                           axis=1)
    ref_cam = tf.squeeze(tf.slice(cams, [0, 0, 0, 0, 0], [-1, 1, 2, 4, 4]),
                         axis=1)

    # image feature extraction
    if is_master_gpu:
        ref_tower = UNetDS2GN({'data': ref_image},
                              is_training=True,
                              reuse=False)
    else:
        ref_tower = UNetDS2GN({'data': ref_image},
                              is_training=True,
                              reuse=True)
    view_towers = []
    for view in range(1, FLAGS.view_num):
        view_image = tf.squeeze(tf.slice(images, [0, view, 0, 0, 0],
                                         [-1, 1, -1, -1, -1]),
                                axis=1)
        view_tower = UNetDS2GN({'data': view_image},
                               is_training=True,
                               reuse=True)
        view_towers.append(view_tower)

    # get all homographies
    view_homographies = []
    for view in range(1, FLAGS.view_num):
        view_cam = tf.squeeze(tf.slice(cams, [0, view, 0, 0, 0],
                                       [-1, 1, 2, 4, 4]),
                              axis=1)
        homographies = get_homographies(ref_cam,
                                        view_cam,
                                        depth_num=depth_num,
                                        depth_start=depth_start,
                                        depth_interval=depth_interval)
        view_homographies.append(homographies)

    gru1_filters = 16
    gru2_filters = 4
    gru3_filters = 2
    feature_shape = [FLAGS.batch_size, FLAGS.max_h / 4, FLAGS.max_w / 4, 32]
    gru_input_shape = [feature_shape[1], feature_shape[2]]
    state1 = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru1_filters])
    state2 = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru2_filters])
    state3 = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], gru3_filters])
    conv_gru1 = ConvGRUCell(shape=gru_input_shape,
                            kernel=[3, 3],
                            filters=gru1_filters)
    conv_gru2 = ConvGRUCell(shape=gru_input_shape,
                            kernel=[3, 3],
                            filters=gru2_filters)
    conv_gru3 = ConvGRUCell(shape=gru_input_shape,
                            kernel=[3, 3],
                            filters=gru3_filters)

    exp_div = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1])
    soft_depth_map = tf.zeros(
        [FLAGS.batch_size, feature_shape[1], feature_shape[2], 1])

    with tf.name_scope('cost_volume_homography'):

        # forward cost volume
        depth_costs = []
        for d in range(depth_num):

            # compute cost (variation metric)
            ave_feature = ref_tower.get_output()
            ave_feature2 = tf.square(ref_tower.get_output())

            for view in range(0, FLAGS.view_num - 1):
                homography = tf.slice(view_homographies[view],
                                      begin=[0, d, 0, 0],
                                      size=[-1, 1, 3, 3])
                homography = tf.squeeze(homography, axis=1)
                # warped_view_feature = homography_warping(view_towers[view].get_output(), homography)
                warped_view_feature = tf_transform_homography(
                    view_towers[view].get_output(), homography)
                ave_feature = ave_feature + warped_view_feature
                ave_feature2 = ave_feature2 + tf.square(warped_view_feature)
            ave_feature = ave_feature / FLAGS.view_num
            ave_feature2 = ave_feature2 / FLAGS.view_num
            cost = ave_feature2 - tf.square(ave_feature)

            # gru
            reg_cost1, state1 = conv_gru1(-cost, state1, scope='conv_gru1')
            reg_cost2, state2 = conv_gru2(reg_cost1, state2, scope='conv_gru2')
            reg_cost3, state3 = conv_gru3(reg_cost2, state3, scope='conv_gru3')
            reg_cost = tf.layers.conv2d(reg_cost3,
                                        1,
                                        3,
                                        padding='same',
                                        reuse=tf.AUTO_REUSE,
                                        name='prob_conv')
            depth_costs.append(reg_cost)

        prob_volume = tf.stack(depth_costs, axis=1)
        prob_volume = tf.nn.softmax(prob_volume, axis=1, name='prob_volume')

    return prob_volume