コード例 #1
0
ファイル: learn.py プロジェクト: thinknew/WLClassification
def conv(ix, w):
    # filter shape: [filter_height, filter_width, in_channels, out_channels]
    # flatten filters
    filter_height = int(w.shape[0])
    filter_width = int(w.shape[1])
    in_channels = int(w.shape[2])
    out_channels = int(w.shape[3])
    ix_height = int(ix.shape[1])
    ix_width = int(ix.shape[2])
    ix_channels = int(ix.shape[3])
    filter_shape = [filter_height, filter_width, in_channels, out_channels]
    flat_w = tf.reshape(
        w, [filter_height * filter_width * in_channels, out_channels])
    patches = tf.extract_image_patches(
        ix,
        ksizes=[1, filter_height, filter_width, 1],
        strides=[1, 1, 1, 1],
        rates=[1, 1, 1, 1],
        padding='SAME')
    patches_reshaped = tf.reshape(
        patches,
        [-1, ix_height, ix_width, filter_height * filter_width * ix_channels])
    feature_maps = []
    for i in range(out_channels):
        feature_map = tf.reduce_sum(tf.multiply(flat_w[:, i],
                                                patches_reshaped),
                                    axis=3,
                                    keep_dims=True)
        feature_maps.append(feature_map)
    features = tf.concat(feature_maps, axis=3)
    return features
コード例 #2
0
def compute_cost_volume(x1, x2, H, W, channel, d=9):
    x1 = tf.nn.l2_normalize(x1, axis=3)
    x2 = tf.nn.l2_normalize(x2, axis=3)

    x2_patches = tf.extract_image_patches(x2, [1, d, d, 1],
                                          strides=[1, 1, 1, 1],
                                          rates=[1, 1, 1, 1],
                                          padding='SAME')
    x2_patches = tf.reshape(x2_patches, [-1, H, W, d, d, channel])
    x1_reshape = tf.reshape(x1, [-1, H, W, 1, 1, channel])
    x1_dot_x2 = tf.multiply(x1_reshape, x2_patches)

    cost_volume = tf.reduce_sum(x1_dot_x2, axis=-1)
    #cost_volume = tf.reduce_mean(x1_dot_x2, axis=-1)
    cost_volume = tf.reshape(cost_volume, [-1, H, W, d * d])
    return cost_volume
コード例 #3
0
def extract_pointwise_conv2d_patches(inputs,
                                     filter_shape,
                                     name=None,
                                     data_format=None):
  """Extract patches for a 1x1 conv2d.

  Args:
    inputs: 4-D Tensor of shape [batch_size, height, width, in_channels].
    filter_shape: List of 4 ints. Shape of filter to apply with conv2d()
    name: None or str. Name for Op.
    data_format: None or str. Format for data. See 'data_format' in
      tf.nn.conv2d() for details.

  Returns:
    Tensor of shape [batch_size, ..spatial_input_shape..,
    ..spatial_filter_shape.., in_channels]

  Raises:
    ValueError: if inputs is not 4-D.
    ValueError: if filter_shape is not [1, 1, ?, ?]
    ValueError: if data_format is not channels-last.
  """
  if inputs.shape.ndims != 4:
    raise ValueError("inputs must have 4 dims.")
  if len(filter_shape) != 4:
    raise ValueError("filter_shape must have 4 dims.")
  if filter_shape[0] != 1 or filter_shape[1] != 1:
    raise ValueError("filter_shape must have shape 1 along spatial dimensions.")
  if not is_data_format_channel_last(data_format):
    raise ValueError("data_format must be channels last.")
  with tf.name_scope(name, "extract_pointwise_conv2d_patches",
                     [inputs, filter_shape]):
    ksizes = [1, 1, 1, 1]  # Spatial shape is 1x1.
    strides = [1, 1, 1, 1]  # Operate on all pixels.
    rates = [1, 1, 1, 1]  # Dilation has no meaning with spatial shape = 1.
    padding = "VALID"  # Doesn't matter.
    result = tf.extract_image_patches(inputs, ksizes, strides, rates, padding)

    batch_size, input_height, input_width, in_channels = inputs.shape.as_list()
    filter_height, filter_width, in_channels, _ = filter_shape
    return tf.reshape(result, [
        batch_size, input_height, input_width, filter_height, filter_width,
        in_channels
    ])
コード例 #4
0
 def forward(self):
     inp = self.inp.out
     s = self.lay.stride
     self.out = tf.extract_image_patches(inp, [1, s, s, 1], [1, s, s, 1],
                                         [1, 1, 1, 1], 'VALID')
コード例 #5
0
def conv_capsule_mat(input_tensor,
                     input_activation,
                     input_dim,
                     output_dim,
                     layer_name,
                     num_routing=3,
                     num_in_atoms=3,
                     num_out_atoms=3,
                     stride=2,
                     kernel_size=5,
                     min_var=0.0005,
                     final_beta=1.0):
    """Convolutional Capsule layer with Pose Matrices."""
    print('caps conv stride: {}'.format(stride))
    in_atom_sq = num_in_atoms * num_in_atoms
    with tf.variable_scope(layer_name):
        input_shape = tf.shape(input_tensor)
        _, _, _, in_height, in_width = input_tensor.get_shape()
        # This Variable will hold the state of the weights for the layer
        kernel = utils.weight_variable(shape=[
            input_dim, kernel_size, kernel_size, num_in_atoms,
            output_dim * num_out_atoms
        ],
                                       stddev=0.3)
        # kernel = tf.clip_by_norm(kernel, 3.0, axes=[1, 2, 3])
        activation_biases = utils.bias_variable(
            [1, 1, output_dim, 1, 1, 1, 1, 1],
            init_value=0.5,
            name='activation_biases')
        sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1],
                                           init_value=.5,
                                           name='sigma_biases')
        with tf.name_scope('conv'):
            print('convi;')
            # input_tensor: [x,128,8, c1,c2] -> [x*128,8, c1,c2]
            print(input_tensor.get_shape())
            input_tensor_reshaped = tf.reshape(input_tensor, [
                input_shape[0] * input_dim * in_atom_sq, input_shape[3],
                input_shape[4], 1
            ])
            input_tensor_reshaped.set_shape((None, input_tensor.get_shape()[3],
                                             input_tensor.get_shape()[4], 1))
            input_act_reshaped = tf.reshape(input_activation, [
                input_shape[0] * input_dim, input_shape[3], input_shape[4], 1
            ])
            input_act_reshaped.set_shape((None, input_tensor.get_shape()[3],
                                          input_tensor.get_shape()[4], 1))
            print(input_tensor_reshaped.get_shape())
            # conv: [x*128,out*out_at, c3,c4]
            conv_patches = tf.extract_image_patches(
                images=input_tensor_reshaped,
                ksizes=[1, kernel_size, kernel_size, 1],
                strides=[1, stride, stride, 1],
                rates=[1, 1, 1, 1],
                padding='VALID',
            )
            act_patches = tf.extract_image_patches(
                images=input_act_reshaped,
                ksizes=[1, kernel_size, kernel_size, 1],
                strides=[1, stride, stride, 1],
                rates=[1, 1, 1, 1],
                padding='VALID',
            )
            o_height = (in_height - kernel_size) // stride + 1
            o_width = (in_width - kernel_size) // stride + 1
            patches = tf.reshape(conv_patches,
                                 (input_shape[0], input_dim, in_atom_sq,
                                  o_height, o_width, kernel_size, kernel_size))
            patches.set_shape((None, input_dim, in_atom_sq, o_height, o_width,
                               kernel_size, kernel_size))
            patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2])
            patch_split = tf.reshape(
                patch_trans,
                (input_dim, kernel_size, kernel_size, input_shape[0] *
                 o_height * o_width * num_in_atoms, num_in_atoms))
            patch_split.set_shape(
                (input_dim, kernel_size, kernel_size, None, num_in_atoms))
            a_patches = tf.reshape(act_patches,
                                   (input_shape[0], input_dim, 1, 1, o_height,
                                    o_width, kernel_size, kernel_size))
            a_patches.set_shape((None, input_dim, 1, 1, o_height, o_width,
                                 kernel_size, kernel_size))
            with tf.name_scope('input_act'):
                utils.activation_summary(
                    tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(a_patches,
                                                              axis=1),
                                                axis=-1),
                                  axis=-1))
            with tf.name_scope('Wx'):
                wx = tf.matmul(patch_split, kernel)
                wx = tf.reshape(wx, (input_dim, kernel_size, kernel_size,
                                     input_shape[0], o_height, o_width,
                                     num_in_atoms * num_out_atoms, output_dim))
                wx.set_shape(
                    (input_dim, kernel_size, kernel_size, None, o_height,
                     o_width, num_in_atoms * num_out_atoms, output_dim))
                wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2])
                utils.activation_summary(wx)

        with tf.name_scope('routing'):
            # Routing
            # logits: [x, 128, 10, c3, c4]
            logit_shape = [
                input_dim, output_dim, 1, o_height, o_width, kernel_size,
                kernel_size
            ]
            activation, center = update_conv_routing(
                wx=wx,
                input_activation=a_patches,
                activation_biases=activation_biases,
                sigma_biases=sigma_biases,
                logit_shape=logit_shape,
                num_out_atoms=num_out_atoms * num_out_atoms,
                input_dim=input_dim,
                num_routing=num_routing,
                output_dim=output_dim,
                min_var=min_var,
                final_beta=final_beta,
            )
            # activations: [x, 10, 8, c3, c4]

        out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7])
        out_center = tf.squeeze(center, axis=[1, 6, 7])
        with tf.name_scope('center'):
            utils.activation_summary(out_center)
        return tf.sigmoid(out_activation), out_center
コード例 #6
0
    def _body(i, posterior, activation, center, masses):
        """Body of the EM while loop."""
        del activation
        beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32)))
        # beta = final_beta
        # route: [outdim, height?, width?, batch, indim]
        vote_conf = posterior * input_activation
        # masses: [batch, 1, outdim, 1, height, width, 1, 1]
        masses = tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            vote_conf, axis=1, keep_dims=True),
                                             axis=-1,
                                             keep_dims=True),
                               axis=-2,
                               keep_dims=True) + 0.0000001
        preactivate_unrolled = vote_conf * wx
        # center: [batch, 1, outdim, outatom, height, width]
        center = .9 * tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            preactivate_unrolled, axis=1, keep_dims=True),
                                                  axis=-1,
                                                  keep_dims=True),
                                    axis=-2,
                                    keep_dims=True) / masses + .1 * center

        noise = (wx - center) * (wx - center)
        variance = min_var + tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(
            vote_conf * noise, axis=1, keep_dims=True),
                                                         axis=-1,
                                                         keep_dims=True),
                                           axis=-2,
                                           keep_dims=True) / masses
        log_variance = tf.log(variance)
        p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True)
        log_2pi = tf.log(2 * math.pi)
        win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0))
        logit = beta * (win - activation_biases * 5000)
        activation_update = tf.minimum(
            0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit)))
        # return activation, center
        log_det_sigma = -1 * p_i
        sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0
        exp_update = tf.reduce_sum(noise / (2 * variance),
                                   axis=3,
                                   keep_dims=True)
        prior_update = activation_update - sigma_update - exp_update
        max_prior_update = tf.reduce_max(tf.reduce_max(tf.reduce_max(
            tf.reduce_max(prior_update, axis=-1, keep_dims=True),
            axis=-2,
            keep_dims=True),
                                                       axis=-3,
                                                       keep_dims=True),
                                         axis=-4,
                                         keep_dims=True)
        prior_normal = tf.add(prior_update, -1 * max_prior_update)
        prior_exp = tf.exp(prior_normal)
        t_prior = tf.transpose(prior_exp, [0, 1, 2, 3, 4, 6, 5, 7])
        c_prior = tf.reshape(t_prior, [-1, n * k, n * k, 1])
        pad_prior = tf.pad(c_prior,
                           [[0, 0], [(k - 1) * (k - 1), (k - 1) * (k - 1)],
                            [(k - 1) * (k - 1),
                             (k - 1) * (k - 1)], [0, 0]], 'CONSTANT')
        patch_prior = tf.extract_image_patches(images=pad_prior,
                                               ksizes=[1, k, k, 1],
                                               strides=[1, k, k, 1],
                                               rates=[1, k - 1, k - 1, 1],
                                               padding='VALID')
        sum_prior = tf.reduce_sum(patch_prior, axis=-1, keep_dims=True)
        sum_prior_patch = tf.extract_image_patches(images=sum_prior,
                                                   ksizes=[1, k, k, 1],
                                                   strides=[1, 1, 1, 1],
                                                   rates=[1, 1, 1, 1],
                                                   padding='VALID')
        sum_prior_reshape = tf.reshape(
            sum_prior_patch,
            [-1, input_dim, output_dim, 1, n, n, k, k]) + 0.0000001
        posterior = prior_exp / sum_prior_reshape
        return (posterior, logit, center, masses)
コード例 #7
0
    def forward_pass_with_file_inputs(self, x):
        with self._graph.as_default():
            if self._with_patching:
                # we want the largest multiple of of patch height/width that is smaller than the original
                # image height/width, for the final image dimensions
                patch_height = self._patch_height
                patch_width = self._patch_width
                final_height = (self._image_height //
                                patch_height) * patch_height
                final_width = (self._image_width // patch_width) * patch_width
                # find image differences to determine recentering crop coords, we divide by 2 so that the leftover
                # is equal on all sides of image
                offset_height = (self._image_height - final_height) // 2
                offset_width = (self._image_width - final_width) // 2
                # pre-allocate output dimensions
                total_outputs = np.empty([1, final_height, final_width, 1])
            else:
                total_outputs = np.empty(
                    [1, self._image_height, self._image_width, 1])

            num_batches = len(x) // self._batch_size
            remainder = len(x) % self._batch_size

            if remainder != 0:
                num_batches += 1
                remainder = self._batch_size - remainder

            # self.load_images_from_list(x) no longer calls following 2 lines so we needed to force them here
            images = x
            self._parse_images(images)

            x_test = tf.train.batch([self._all_images],
                                    batch_size=self._batch_size,
                                    num_threads=self._num_threads)
            x_test = tf.reshape(x_test,
                                shape=[
                                    -1, self._image_height, self._image_width,
                                    self._image_depth
                                ])
            if self._with_patching:
                x_test = tf.image.crop_to_bounding_box(x_test, offset_height,
                                                       offset_width,
                                                       final_height,
                                                       final_width)
                # Split the images up into the multiple slices of size patch_height x patch_width
                ksizes = [1, patch_height, patch_width, 1]
                strides = [1, patch_height, patch_width, 1]
                rates = [1, 1, 1, 1]
                x_test = tf.extract_image_patches(x_test, ksizes, strides,
                                                  rates, "VALID")
                x_test = tf.reshape(
                    x_test,
                    shape=[-1, patch_height, patch_width, self._image_depth])

            if self._load_from_saved:
                self.load_state()
            self._initialize_queue_runners()
            # Run model on them
            x_pred = self.forward_pass(x_test, deterministic=True)

            if self._with_patching:
                num_patch_rows = final_height // patch_height
                num_patch_cols = final_width // patch_width
                for i in range(num_batches):
                    xx = self._session.run(x_pred)

                    # generalized image stitching
                    for img in np.array_split(
                            xx,
                            self._batch_size):  # for each img in current batch
                        # we are going to build a list of rows of imgs called img_rows, where each element
                        # of img_rows is a row of img's concatenated together horizontally (axis=1), then we will
                        # iterate through img_rows concatenating the rows vertically (axis=0) to build
                        # the full img

                        img_rows = []
                        # for each row
                        for j in range(num_patch_rows):
                            curr_row = img[
                                j *
                                num_patch_cols]  # start new row with first img
                            # iterate through the rest of the row, concatenating img's together
                            for k in range(1, num_patch_cols):
                                # horizontal cat
                                curr_row = np.concatenate(
                                    (curr_row, img[k + (j * num_patch_cols)]),
                                    axis=1)
                            img_rows.append(
                                curr_row)  # add row of img's to the list

                        # start full img with the first full row of imgs
                        full_img = img_rows[0]
                        # iterate through rest of rows, concatenating rows together
                        for row_num in range(1, num_patch_rows):
                            # vertical cat
                            full_img = np.concatenate(
                                (full_img, img_rows[row_num]), axis=0)

                        # need to match total_outputs dimensions, so we add a dimension to the shape to match
                        full_img = np.array([
                            full_img
                        ])  # shape transformation: (x,y) --> (1,x,y)
                        total_outputs = np.append(
                            total_outputs, full_img,
                            axis=0)  # add the final img to the list
            else:
                for i in range(int(num_batches)):
                    xx = self._session.run(x_pred)
                    for img in np.array_split(xx, self._batch_size):
                        total_outputs = np.append(total_outputs, img, axis=0)

            # delete weird first row
            total_outputs = np.delete(total_outputs, 0, 0)

            # delete any outputs which are overruns from the last batch
            if remainder != 0:
                for i in range(remainder):
                    total_outputs = np.delete(total_outputs, -1, 0)

        return total_outputs