Ejemplo n.º 1
0
  def testSecondOrderGradientCalculation(self):
    param_list = [
        "prune_option=second_order_gradient",
        "gradient_decay_rate=0.5",
    ]
    test_spec = ",".join(param_list)
    pruning_hparams = pruning.get_pruning_hparams().parse(test_spec)
    tf.logging.info(pruning_hparams)

    w = tf.Variable(tf.linspace(1.0, 10.0, 10), name="weights")
    _ = pruning.apply_mask(w, prune_option="second_order_gradient")

    p = pruning.Pruning(pruning_hparams)
    old_weight_update_op = p.old_weight_update_op()
    old_old_weight_update_op = p.old_old_weight_update_op()
    gradient_update_op = p.gradient_update_op()

    with self.cached_session() as session:
      tf.global_variables_initializer().run()
      session.run(old_weight_update_op)
      session.run(old_old_weight_update_op)
      session.run(tf.assign(w, tf.math.scalar_mul(2.0, w)))
      session.run(gradient_update_op)

      old_weights = pruning.get_old_weights()
      old_old_weights = pruning.get_old_old_weights()
      gradients = pruning.get_gradients()

      old_weight = old_weights[0]
      old_old_weight = old_old_weights[0]
      gradient = gradients[0]
      self.assertAllEqual(
          gradient.eval(),
          tf.math.scalar_mul(0.5,
                             tf.nn.l2_normalize(tf.linspace(1.0, 10.0,
                                                            10))).eval())
      self.assertAllEqual(old_weight.eval(), old_old_weight.eval())
Ejemplo n.º 2
0
  def inv_depths(self, start_depth, end_depth, num_depths):
    """Returns reversed, sorted inverse interpolated depths.

    Args:
      start_depth: The first depth.
      end_depth: The last depth.
      num_depths: The total number of depths to create, include start_depth and
          end_depth are always included and other depths are interpolated
          between them, in inverse depth space.
    Returns:
      The depths sorted in descending order (so furthest first). This order is
      useful for back to front compositing.
    """
    depths = 1.0 / tf.linspace(1.0/end_depth, 1.0/start_depth, num_depths)
    return depths
Ejemplo n.º 3
0
 def when_nonsingular():
     bucket_width = range_ / tf.cast(bucket_count, tf.float64)
     offsets = data - min_
     bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                              dtype=tf.int32)
     clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
     one_hots = tf.one_hot(clamped_indices, depth=bucket_count)
     bucket_counts = tf.cast(
         tf.reduce_sum(input_tensor=one_hots, axis=0),
         dtype=tf.float64,
     )
     edges = tf.linspace(min_, max_, bucket_count + 1)
     left_edges = edges[:-1]
     right_edges = edges[1:]
     return tf.transpose(
         a=tf.stack([left_edges, right_edges, bucket_counts]))
Ejemplo n.º 4
0
def linear_lookup(phase: tf.Tensor, wavetables: tf.Tensor) -> tf.Tensor:
    """Lookup from wavetables with linear interpolation.

  Args:
    phase: The instantaneous phase of the base oscillator, ranging from 0 to
      1.0. This gives the position to lookup in the wavetable.
      Shape [batch_size, n_samples, 1].
    wavetables: Wavetables to be read from on lookup. Shape [batch_size,
      n_samples, n_wavetable] or [batch_size, n_wavetable].

  Returns:
    The resulting audio from linearly interpolated lookup of the wavetables at
      each point in time. Shape [batch_size, n_samples].
  """
    phase, wavetables = tf_float32(phase), tf_float32(wavetables)

    # Add a time dimension if not present.
    if len(wavetables.shape) == 2:
        wavetables = wavetables[:, tf.newaxis, :]

    # Add a wavetable dimension if not present.
    if len(phase.shape) == 2:
        phase = phase[:, :, tf.newaxis]

    # Add first sample to end of wavetable for smooth linear interpolation
    # between the last point in the wavetable and the first point.
    wavetables = tf.concat([wavetables, wavetables[..., 0:1]], axis=-1)
    n_wavetable = int(wavetables.shape[-1])

    # Get a phase value for each point on the wavetable.
    phase_wavetables = tf.linspace(0.0, 1.0, n_wavetable)

    # Get pair-wise distances from the oscillator phase to each wavetable point.
    # Axes are [batch, time, n_wavetable].
    phase_distance = tf.abs(
        (phase - phase_wavetables[tf.newaxis, tf.newaxis, :]))

    # Put distance in units of wavetable samples.
    phase_distance *= n_wavetable - 1

    # Weighting for interpolation.
    # Distance is > 1.0 (and thus weights are 0.0) for all but nearest neighbors.
    weights = tf.nn.relu(1.0 - phase_distance)
    weighted_wavetables = weights * wavetables

    # Interpolated audio from summing the weighted wavetable at each timestep.
    return tf.reduce_sum(weighted_wavetables, axis=-1)
Ejemplo n.º 5
0
 def testPartitionedVariableMasking(self):
   partitioner = tf.variable_axis_size_partitioner(40)
   with self.cached_session() as session:
     with tf.variable_scope("", partitioner=partitioner):
       sparsity = tf.Variable(0.5, name="Sparsity")
       weights = tf.get_variable(
           "weights", initializer=tf.linspace(1.0, 100.0, 100))
       masked_weights = pruning.apply_mask(
           weights, scope=tf.get_variable_scope())
     p = pruning.Pruning(sparsity=sparsity)
     p._spec.threshold_decay = 0.0
     mask_update_op = p.mask_update_op()
     tf.global_variables_initializer().run()
     masked_weights_val = masked_weights.eval()
     session.run(mask_update_op)
     masked_weights_val = masked_weights.eval()
     self.assertAllEqual(np.count_nonzero(masked_weights_val), 50)
Ejemplo n.º 6
0
def get_harmonic_frequencies(frequencies: tf.Tensor,
                             n_harmonics: int) -> tf.Tensor:
    """Create integer multiples of the fundamental frequency.

  Args:
    frequencies: Fundamental frequencies (Hz). Shape [batch_size, :, 1].
    n_harmonics: Number of harmonics.

  Returns:
    harmonic_frequencies: Oscillator frequencies (Hz).
      Shape [batch_size, :, n_harmonics].
  """
    frequencies = tf_float32(frequencies)

    f_ratios = tf.linspace(1.0, float(n_harmonics), int(n_harmonics))
    f_ratios = f_ratios[tf.newaxis, tf.newaxis, :]
    harmonic_frequencies = frequencies * f_ratios
    return harmonic_frequencies
Ejemplo n.º 7
0
def numerical_base_partition_function(alpha):
    """Numerically approximate the partition function Z(alpha)."""
    # Generate values `num_samples` values in [-x_max, x_max], with more samples
    # near the origin as `power` is set to larger values.
    num_samples = 2**24 + 1  # We want an odd value so that 0 gets sampled.
    x_max = 10**10
    power = 6
    t = t = tf.linspace(tf.constant(-1, tf.float64),
                        tf.constant(1, tf.float64), num_samples)
    t = tf.sign(t) * tf.abs(t)**power
    x = t * x_max

    # Compute losses for the values, then exponentiate the negative losses and
    # integrate with the trapezoid rule to get the partition function.
    losses = general.lossfun(x, alpha, np.float64(1))
    y = tf.math.exp(-losses)
    partition = tf.reduce_sum((y[1:] + y[:-1]) * (x[1:] - x[:-1])) / 2.
    return partition
Ejemplo n.º 8
0
def create_centered_identity_transformation_field(shape, spacings):
    """Create 2D or 3D centered identity transformation field.

  Args:
    shape: 2- or 3-element list. The shape of the transformation field.
    spacings: 2- or 3-element list. The spacings of the transformation field.

  Returns:
    2D case: 3-D Tensor (x0, x1, comp) describing a 2D vector field
    3D case: 4-D Tensor (x0, x1, x2, comp)  describing a 3D vector field
  """
    coords = []
    for i, size in enumerate(shape):
        spacing = spacings[i]
        coords.append(
            tf.linspace(-(size - 1) / 2 * spacing, (size - 1) / 2 * spacing,
                        size))
    permutation = np.roll(np.arange(len(coords) + 1), -1)
    return tf.transpose(tf.meshgrid(*coords, indexing="ij"), permutation)
Ejemplo n.º 9
0
    def format_network_input(self, ref_image, psv_src_images, ref_pose,
                             psv_src_poses, planes, intrinsics):
        """Format the network input.

    Args:
      ref_image: reference source image [batch, height, width, 3]
      psv_src_images: stack of source images (excluding the ref image)
                      [batch, height, width, 3*(num_source -1)]
      ref_pose: reference world-to-camera pose (where PSV is constructed)
                [batch, 4, 4]
      psv_src_poses: input poses (world to camera) [batch, num_source-1, 4, 4]
      planes: list of scalar depth values for each plane
      intrinsics: camera intrinsics [batch, 3, 3]
    Returns:
      net_input: [batch, height, width, #planes, num_source*3]
    """
        _, num_psv_source, _, _ = psv_src_poses.get_shape().as_list()
        num_planes = tf.shape(planes)[0]

        net_input = []
        for i in range(num_psv_source):
            curr_pose = tf.matmul(psv_src_poses[:, i],
                                  tf.matrix_inverse(ref_pose))
            curr_image = psv_src_images[:, :, :, i * 3:(i + 1) * 3]
            curr_psv = pj.plane_sweep(curr_image, planes, curr_pose,
                                      intrinsics)
            net_input.append(curr_psv)

        net_input = tf.concat(net_input, axis=4)
        ref_img_stack = tf.tile(tf.expand_dims(ref_image, 3),
                                [1, 1, 1, num_planes, 1])
        net_input = tf.concat([net_input, ref_img_stack], axis=4)

        # Append normalized plane indices
        normalized_disp_inds = tf.reshape(tf.linspace(0.0, 1.0, num_planes),
                                          [1, 1, 1, num_planes, 1])
        sh = tf.shape(net_input)
        normalized_disp_inds_stack = tf.tile(normalized_disp_inds,
                                             [1, sh[1], sh[2], 1, 1])
        net_input = tf.concat([net_input, normalized_disp_inds_stack], axis=4)

        return net_input
Ejemplo n.º 10
0
 def when_nonsingular():
     bucket_width = range_ / tf.cast(bucket_count, tf.float64)
     offsets = data - min_
     bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                              dtype=tf.int32)
     clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
     # Use float64 instead of float32 to avoid accumulating floating point error
     # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values.
     # See https://github.com/tensorflow/tensorflow/issues/51419 for details.
     one_hots = tf.one_hot(clamped_indices,
                           depth=bucket_count,
                           dtype=tf.float64)
     bucket_counts = tf.cast(
         tf.reduce_sum(input_tensor=one_hots, axis=0),
         dtype=tf.float64,
     )
     edges = tf.linspace(min_, max_, bucket_count + 1)
     left_edges = edges[:-1]
     right_edges = edges[1:]
     return tf.transpose(
         a=tf.stack([left_edges, right_edges, bucket_counts]))
Ejemplo n.º 11
0
def affine_grid_generator(height, width, theta):
    """
    This function returns a sampling grid, which when
    used with the bilinear sampler on the input feature
    map, will create an output feature map that is an
    affine transformation [1] of the input feature map.

    Input
    -----
    - height: desired height of grid/output. Used
      to downsample or upsample.

    - width: desired width of grid/output. Used
      to downsample or upsample.

    - theta: affine transform matrices of shape (num_batch, 2, 3).
      For each image in the batch, we have 6 theta parameters of
      the form (2x3) that define the affine transformation T.

    Returns
    -------
    - normalized grid (-1, 1) of shape (num_batch, 2, H, W).
      The 2nd dimension has 2 components: (x, y) which are the
      sampling points of the original image for each point in the
      target image.

    Note
    ----
    [1]: the affine transformation allows cropping, translation,
         and isotropic scaling.
    """
    num_batch = tf.shape(theta)[0]

    # create normalized 2D grid
    # x = tf.linspace(-1.0, 1.0, width)
    x = tf.linspace(0.0, 1.0, width)
    # y = tf.linspace(-1.0, 1.0, height)
    y = tf.linspace(0.0, 1.0, height)
    x = x * tf.cast(width, tf.float32)
    y = y * tf.cast(height, tf.float32)
    x_t, y_t = tf.meshgrid(x, y)

    # flatten
    x_t_flat = tf.reshape(x_t, [-1])
    y_t_flat = tf.reshape(y_t, [-1])

    # reshape to [x_t, y_t , 1] - (homogeneous form)
    ones = tf.ones_like(x_t_flat)
    sampling_grid = tf.stack([x_t_flat, y_t_flat, ones])

    # repeat grid num_batch times
    sampling_grid = tf.expand_dims(sampling_grid, axis=0)
    sampling_grid = tf.tile(sampling_grid, tf.stack([num_batch, 1, 1]))

    # cast to float32 (required for matmul)
    theta = tf.cast(theta, 'float32')
    sampling_grid = tf.cast(sampling_grid, 'float32')

    # transform the sampling grid - batch multiply
    batch_grids = tf.matmul(theta, sampling_grid)
    # batch grid has shape (num_batch, 2, H*W)

    # reshape to (num_batch, H, W, 2)
    batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width])

    return batch_grids
Ejemplo n.º 12
0
  def infer_mpi(self, raw_src_images, raw_ref_image, ref_pose, src_poses,
                intrinsics, num_mpi_planes, mpi_planes,
                run_patched=False,
                patch_ind=np.array([0, 0]),
                patchsize=np.array([256, 256]),
                outsize=np.array([128, 128])):
    """Construct the MPI inference graph.

    Args:
      raw_src_images: stack of source images [batch, height, width, 3*#source]
      raw_ref_image: reference image [batch, height, width, 3]
      ref_pose: reference frame pose (world to camera) [batch, 4, 4]
      src_poses: source frame poses (world to camera) [batch, #source, 4, 4]
      intrinsics: camera intrinsics [batch, 3, 3]
      num_mpi_planes: number of mpi planes to predict
      mpi_planes: list of plane depths
      run_patched: whether to only infer MPI for patches of PSV (inference only)
      patch_ind: patch index for infer MPI inference
      patchsize: spatial patch size for MPI inference
      outsize: size of central portion to keep for patched inference
    Returns:
      outputs: a collection of output tensors.
    """

    with tf.name_scope("preprocessing"):
      src_images = self.preprocess_image(raw_src_images)
      ref_image = self.preprocess_image(raw_ref_image)

    with tf.name_scope("format_network_input"):
      # WARNING: we assume the first src image/pose is the reference
      net_input = self.format_network_input(ref_image, src_images[:, :, :, 3:],
                                            ref_pose, src_poses[:, 1:],
                                            mpi_planes, intrinsics)

    with tf.name_scope("layer_prediction"):
      # The network directly outputs the color image at each MPI plane.

      chout = 4  # Number of output channels, RGBA

      if run_patched:
        # Patch the PSV spatially, with buffer, and generate MPI patch
        # Only for inference (not implemented for training)
        buffersize = (patchsize - outsize) // 2
        padding = [[0, 0], [buffersize[0], buffersize[0]],
                   [buffersize[1], buffersize[1]], [0, 0], [0, 0]]
        net_input_pad = tf.pad(net_input, padding)
        patch_start = patch_ind * outsize
        patch_end = patch_start + patchsize
        net_input_patch = net_input_pad[:, patch_start[0]:patch_end[0],
                                        patch_start[1]:patch_end[1], :, :]
        rgba_layers, _ = ed_3d_net(net_input_patch, chout)
      else:
        # Generate entire MPI (training and inference, but takes more memory)
        print("first step MPI prediction")
        rgba_layers, _ = ed_3d_net(net_input, chout)

      color_layers = rgba_layers[:, :, :, :, :-1]
      alpha_layers = rgba_layers[:, :, :, :, -1:]
      # Rescale alphas to (0, 1)
      alpha_layers = (alpha_layers + 1.)/2.
      rgba_layers = tf.concat([color_layers, alpha_layers], axis=4)

      print("refining MPI")
      transmittance = self.compute_transmittance(alpha_layers)
      refine_input_colors = color_layers * transmittance
      refine_input_alpha = alpha_layers * transmittance
      stuff_behind = tf.cumsum(refine_input_colors, axis=3)
      concat_trans = True  # Concatenate transmittance to second input
      if concat_trans:
        refine_input = tf.concat([tf.stop_gradient(refine_input_colors),
                                  tf.stop_gradient(stuff_behind),
                                  tf.stop_gradient(refine_input_alpha),
                                  tf.stop_gradient(transmittance)], axis=4)

      normalized_disp_inds = tf.reshape(tf.linspace(0.0, 1.0, num_mpi_planes),
                                        [1, 1, 1, num_mpi_planes, 1])
      sh = tf.shape(refine_input)
      normalized_disp_inds_stack = tf.tile(normalized_disp_inds,
                                           [1, sh[1], sh[2], 1, 1])
      refine_input = tf.concat([refine_input, normalized_disp_inds_stack],
                               axis=4)
      print("refine input size:", refine_input.shape)
      rgba_layers_refine = refine_net(refine_input)

      print("predicting flow for occlusions")
      flow_source = tf.stop_gradient(stuff_behind)
      flow_vecs = rgba_layers_refine[:, :, :, :, :2]
      color_layers = pj.flow_gather(flow_source, flow_vecs)
      alpha_layers = rgba_layers_refine[:, :, :, :, -1:]
      # Rescale alphas to (0, 1)
      alpha_layers = (alpha_layers + 1.)/2.
      rgba_layers_refine = tf.concat([color_layers, alpha_layers], axis=4)

    # Collect output tensors
    pred = {}
    pred["rgba_layers"] = rgba_layers
    pred["rgba_layers_refine"] = rgba_layers_refine
    pred["refine_input_mpi"] = tf.concat([refine_input_colors,
                                          refine_input_alpha], axis=-1)
    pred["stuff_behind"] = stuff_behind
    pred["flow_vecs"] = flow_vecs
    pred["psv"] = net_input[:, :, :, :, 0:3]

    # Add pred tensors to outputs collection
    print("adding outputs to collection")
    for i in pred:
      tf.add_to_collection("outputs", pred[i])

    return pred
    def __init__(self,
                 sess,
                 num_actions,
                 observation_shape=dqn_agent.NATURE_DQN_OBSERVATION_SHAPE,
                 observation_dtype=dqn_agent.NATURE_DQN_DTYPE,
                 stack_size=dqn_agent.NATURE_DQN_STACK_SIZE,
                 number_of_gammas=8,
                 gamma_max=0.99,
                 acting_policy='hyperbolic',
                 hyp_exponent=1.0,
                 integral_estimate='lower',
                 num_atoms=51,
                 vmax=10.,
                 gamma=0.99,
                 update_horizon=1,
                 min_replay_history=20000,
                 update_period=4,
                 target_update_period=8000,
                 epsilon_fn=dqn_agent.linearly_decaying_epsilon,
                 epsilon_train=0.01,
                 epsilon_eval=0.001,
                 epsilon_decay_period=250000,
                 replay_scheme='prioritized',
                 gradient_clipping_norm=None,
                 network_size_expansion=1.0,
                 tf_device='/cpu:*',
                 use_staging=True,
                 optimizer=tf.train.AdamOptimizer(learning_rate=0.00025,
                                                  epsilon=0.0003125),
                 summary_writer=None,
                 summary_writing_frequency=50000):
        """Initializes the agent and constructs the components of its graph.

    Args:
      sess: `tf.Session`, for executing ops.
      num_actions: int, number of actions the agent can take at any state.
      observation_shape: tuple of ints or an int. If single int, the observation
        is assumed to be a 2D square.
      observation_dtype: tf.DType, specifies the type of the observations. Note
        that if your inputs are continuous, you should set this to tf.float32.
      stack_size: int, number of frames to use in state stack.
      number_of_gammas: int, the number of gammas to estimate in parallel.
      gamma_max: int, the maximum gammas we will learn via Bellman updates.
      acting_policy: str, the policy with which the agent will act.  One of
        ['hyperbolic', 'largest_gamma']
      hyp_exponent:  float, the parameter k in the equation 1. / (1. + k * t)
        for hyperbolic discounting.  Smaller parameter will lead to a longer
        horizon.
      integral_estimate:  str, how to estimate the integral of the hyperbolic
        discount.
      num_atoms: int, the number of buckets of the value function distribution.
      vmax: float, the value distribution support is [-vmax, vmax].
      gamma: float, discount factor with the usual RL meaning.
      update_horizon: int, horizon at which updates are performed, the 'n' in
        n-step update.
      min_replay_history: int, number of transitions that should be experienced
        before the agent begins training its value function.
      update_period: int, period between DQN updates.
      target_update_period: int, update period for the target network.
      epsilon_fn: function expecting 4 parameters: (decay_period, step,
        warmup_steps, epsilon). This function should return the epsilon value
        used for exploration during training.
      epsilon_train: float, the value to which the agent's epsilon is eventually
        decayed during training.
      epsilon_eval: float, epsilon used when evaluating the agent.
      epsilon_decay_period: int, length of the epsilon decay schedule.
      replay_scheme: str, 'prioritized' or 'uniform', the sampling scheme of the
        replay memory.
      gradient_clipping_norm: str, if not None, this will set the gradient
        clipping value.
      network_size_expansion: float, the multiplier on the default layer size.
      tf_device: str, Tensorflow device on which the agent's graph is executed.
      use_staging: bool, when True use a staging area to prefetch the next
        training batch, speeding training up by about 30%.
      optimizer: `tf.train.Optimizer`, for training the value function.
      summary_writer: SummaryWriter object for outputting training statistics.
        Summary writing disabled if set to None.
      summary_writing_frequency: int, frequency with which summaries will be
        written. Lower values will result in slower training.
    """
        # We need this because some tools convert round floats into ints.
        vmax = float(vmax)
        self._num_atoms = num_atoms
        self._support = tf.linspace(-vmax, vmax, num_atoms)
        self._replay_scheme = replay_scheme
        self.optimizer = optimizer
        self.number_of_gammas = number_of_gammas
        self.gamma_max = gamma_max
        self.acting_policy = acting_policy
        self.hyp_exponent = hyp_exponent
        self.integral_estimate = integral_estimate
        self.gradient_clipping_norm = gradient_clipping_norm
        self.network_size_expansion = network_size_expansion

        # These are the discount factors (gammas) used to estimate the integral.
        self.eval_gammas = agent_utils.compute_eval_gamma_interval(
            self.gamma_max, self.hyp_exponent, self.number_of_gammas)
        # However, if we wish to estimate hyperbolic discounting with the form,
        #
        #      \Gamma_t =  1. / (1. + k * t)
        #
        # where we now have a coefficient k <= 1.0
        # we need consider the value functions for \gamma ^ k.  We refer to
        # these below as self.gammas, since these are the gammas actually being
        # learned via Bellman updates.
        self.gammas = [
            math.pow(gamma, self.hyp_exponent) for gamma in self.eval_gammas
        ]

        assert max(self.gammas) <= self.gamma_max

        super(HyperRainbowAgent, self).__init__(
            sess=sess,
            num_actions=num_actions,
            observation_shape=observation_shape,
            observation_dtype=observation_dtype,
            stack_size=stack_size,
            gamma=0,  # TODO(liamfedus): better way to deal with self.gamma
            update_horizon=update_horizon,
            min_replay_history=min_replay_history,
            update_period=update_period,
            target_update_period=target_update_period,
            epsilon_fn=epsilon_fn,
            epsilon_train=epsilon_train,
            epsilon_eval=epsilon_eval,
            epsilon_decay_period=epsilon_decay_period,
            tf_device=tf_device,
            use_staging=use_staging,
            optimizer=self.optimizer,
            summary_writer=summary_writer,
            summary_writing_frequency=summary_writing_frequency)
Ejemplo n.º 14
0
def mpi_resample_cube(mpi, tgt, intrinsics, depth_planes, side_length,
                      cube_res):
  """Resample MPI onto cube centered at target point.

  Args:
    mpi: [B,H,W,D,C], input MPI
    tgt: [B,3], [x,y,z] coordinates for cube center (in reference/mpi frame)
    intrinsics: [B,3,3], MPI reference camera intrinsics
    depth_planes: [D] depth values for MPI planes
    side_length: metric side length of cube
    cube_res: resolution of each cube dimension

  Returns:
    resampled: [B, cube_res, cube_res, cube_res, C]
  """

  batch_size = tf.shape(mpi)[0]
  num_depths = tf.shape(mpi)[3]

  # compute MPI world coordinates
  intrinsics_tile = tf.tile(intrinsics, [num_depths, 1, 1])

  # create cube coordinates
  b_vals = tf.to_float(tf.range(batch_size))
  x_vals = tf.linspace(-side_length / 2.0, side_length / 2.0, cube_res)
  y_vals = tf.linspace(-side_length / 2.0, side_length / 2.0, cube_res)
  z_vals = tf.linspace(side_length / 2.0, -side_length / 2.0, cube_res)
  b, y, x, z = tf.meshgrid(b_vals, y_vals, x_vals, z_vals, indexing='ij')

  x = x + tgt[:, 0, tf.newaxis, tf.newaxis, tf.newaxis]
  y = y + tgt[:, 1, tf.newaxis, tf.newaxis, tf.newaxis]
  z = z + tgt[:, 2, tf.newaxis, tf.newaxis, tf.newaxis]

  ones = tf.ones_like(x)
  coords = tf.stack([x, y, z, ones], axis=1)
  coords_r = tf.reshape(
      tf.transpose(coords, [0, 4, 1, 2, 3]),
      [batch_size * cube_res, 4, cube_res, cube_res])

  # store elements with negative z vals for projection
  bad_inds = tf.less(z, 0.0)

  # project into reference camera to transform coordinates into MPI indices
  filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
  filler = tf.tile(filler, [batch_size * cube_res, 1, 1])
  intrinsics_tile = tf.tile(intrinsics, [cube_res, 1, 1])
  intrinsics_tile_4 = tf.concat(
      [intrinsics_tile,
       tf.zeros([batch_size * cube_res, 3, 1])], axis=2)
  intrinsics_tile_4 = tf.concat([intrinsics_tile_4, filler], axis=1)
  coords_proj = cam2pixel(coords_r, intrinsics_tile_4)
  coords_depths = tf.transpose(coords_r[:, 2:3, :, :], [0, 2, 3, 1])
  coords_depth_inds = (tf.to_float(num_depths) - 1) * (
      (1.0 / coords_depths) -
      (1.0 / depth_planes[0])) / ((1.0 / depth_planes[-1]) -
                                  (1.0 / depth_planes[0]))
  coords_proj = tf.concat([coords_proj, coords_depth_inds], axis=3)
  coords_proj = tf.transpose(
      tf.reshape(coords_proj, [batch_size, cube_res, cube_res, cube_res, 3]),
      [0, 2, 3, 1, 4])
  coords_proj = tf.concat([b[:, :, :, :, tf.newaxis], coords_proj], axis=4)

  # trilinear interpolation gather from MPI
  # interpolate pre-multiplied RGBAs, then un-pre-multiply
  mpi_alpha = mpi[Ellipsis, -1:]
  mpi_channels_p = mpi[Ellipsis, :-1] * mpi_alpha
  mpi_p = tf.concat([mpi_channels_p, mpi_alpha], axis=-1)

  resampled_p = sampling.trilerp_gather(mpi_p, coords_proj, bad_inds)

  resampled_alpha = tf.clip_by_value(resampled_p[Ellipsis, -1:], 0.0, 1.0)
  resampled_channels = resampled_p[Ellipsis, :-1] / (resampled_alpha + 1e-8)
  resampled = tf.concat([resampled_channels, resampled_alpha], axis=-1)

  return resampled, coords_proj
Ejemplo n.º 15
0
    def build(self, Px, dx, X, N_neurons=180, eta=0.01, exp_decay=[50, 0.9]):
        '''
            Set up the Feature Extractor. Following parameters are required:
            - Px: distribution of the x, with shape [N, N]. N will be used as number of points in each side of the lattice
            - dx: discretization size of the lattice sum Px dx**2 = 1 in this 2d case
            - X : points of the lattice (usually meshgrid). They must be the same on which Px has been calculated
            - N_neurons of the neural network (same in each layer)
            - eta : learning rate
            - exp_decay = [decay rate, decay step] for an exponentially decaying learning rate
            In particular decayed_learning_rate = learning_rate *decay_rate ^ (global_step / decay_steps)

        '''
        self.xdelta = dx
        self.x_points = X

        self.P_x = Px
        self.N = np.shape(Px)[0]

        self.P_x_short = self.P_x.reshape([1, self.N*self.N])

        self.Ny = self.N
        self.eta = eta
        self.neurons_feature = N_neurons

        self.graph = tf.Graph()

        with self.graph.as_default():
            #############################
            # Define the input placeholders and the feature neural network
            self.tf_x = tf.placeholder(tf.float32, shape=[2, None])
            self.tf_i = tf.placeholder(tf.float32)
            self.tf_a = tf.placeholder(tf.float32)

            self.tf_theta_input = K.layers.Input(shape=(2,))
            self.tf_theta_layers = self.tf_theta_input
            self.tf_theta_layers = K.layers.Dense(self.neurons_feature,
                                                  input_shape=(2,),
                                                  activation=K.layers.ReLU(),
                                                  kernel_initializer=tf.random_normal_initializer(),
                                                  bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers)
            self.tf_theta_layers = K.layers.Dense(self.neurons_feature,
                                                  activation=K.layers.ReLU(),
                                                  kernel_initializer=tf.initializers.glorot_normal(),
                                                  bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers)
            self.tf_theta_layers = K.layers.Dense(self.neurons_feature,
                                                  activation=K.activations.tanh,
                                                  kernel_initializer=tf.initializers.glorot_normal(),
                                                  bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers)
            self.tf_theta_layers = K.layers.Dense(1,
                                                  kernel_initializer=tf.initializers.glorot_normal(),
                                                  bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers)
            self.tf_theta_net = K.Model(self.tf_theta_input,
                                        self.tf_theta_layers)

            self.tf_f = self.tf_a * tf.reshape(self.tf_theta_net(tf.transpose(self.tf_x)),
                                               [1, -1])

            #############################
            # Regularizing Term
            self.tf_grads_f = tf.gradients(self.tf_f, self.tf_x)[0]
            self.tf_norm2_grad_f = tf.reduce_sum(self.tf_grads_f**2, 0)
            self.tf_term1_local = -0.5 * (
                tf.log(self.tf_norm2_grad_f)*self.P_x_short*self.xdelta**2)
            self.tf_term1 = -0.5 * tf.reduce_sum(
                tf.log(self.tf_norm2_grad_f) * self.P_x_short*self.xdelta**2)

            #############################
            # Entropy Term

            # Define current range of the feature (in which to approximate Py)
            self.tf_y_min = tf.reduce_min(self.tf_f)
            self.tf_y_max = tf.reduce_max(self.tf_f)
            self.tf_ydelta = tf.stop_gradient(
                (self.tf_y_max-self.tf_y_min)/(self.Ny-1))
            self.tf_y_linspace = tf.reshape(tf.stop_gradient(tf.linspace(self.tf_y_min, self.tf_y_max, self.Ny)),
                                            [self.Ny, 1])

            # Define a triangular histogram (so that it is differentiable)
            self.tf_y_mask_left = tf.logical_and((self.tf_y_linspace - self.tf_ydelta < self.tf_f),
                                                 (self.tf_y_linspace > self.tf_f))
            self.tf_y_mask_right = tf.logical_and((self.tf_y_linspace <= self.tf_f),
                                                  (self.tf_y_linspace + self.tf_ydelta > self.tf_f))

            self.tf_y_line_left = (
                1/self.tf_ydelta + 1/self.tf_ydelta**2*(self.tf_f-self.tf_y_linspace))
            self.tf_y_line_right = (
                1/self.tf_ydelta - 1/self.tf_ydelta**2*(self.tf_f-self.tf_y_linspace))

            self.tf_ydelta_left = self.tf_y_line_left * tf.stop_gradient(tf.cast(self.tf_y_mask_left,
                                                                                 tf.float32))
            self.tf_ydelta_right = self.tf_y_line_right * tf.stop_gradient(tf.cast(self.tf_y_mask_right,
                                                                                   tf.float32))

            # Approximate the distribution of the feature through a differentiable histogram
            self.tf_P_y = tf.reduce_sum((self.tf_ydelta_left+self.tf_ydelta_right)*self.P_x_short*self.xdelta**2,
                                        1)
            # Calculate the Entropy of the feature
            self.tf_H_y = - tf.reduce_sum(
                self.tf_P_y*tf.log(self.tf_P_y))*self.tf_ydelta

            #############################
            # Renormalized Mutual Information and training methods
            self.tf_cost = self.tf_term1 + self.tf_H_y

            # Optimizer (with exponential decaying learning rate)
            self.tf_optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=tf.train.exponential_decay(self.eta, self.tf_i, exp_decay[0], exp_decay[1]))

            # Gradients of the cost function
            self.tf_grad_cost = self.tf_optimizer.compute_gradients(
                -self.tf_cost,
                self.tf_theta_net.trainable_variables)

            # Train step
            self.tf_train_step = self.tf_optimizer.apply_gradients(
                self.tf_grad_cost)

            # Initialize the neural network
            self.tf_init_op = tf.global_variables_initializer()

        self.sess = tf.Session(graph=self.graph)
        self.sess.run(self.tf_init_op)
        self.costs = []
Ejemplo n.º 16
0
  def __init__(self,
               sess,
               num_actions,
               observation_shape=dqn_agent.NATURE_DQN_OBSERVATION_SHAPE,
               observation_dtype=dqn_agent.NATURE_DQN_DTYPE,
               stack_size=dqn_agent.NATURE_DQN_STACK_SIZE,
               network=legacy_networks.rainbow_network,
               num_atoms=51,
               vmax=10.,
               gamma=0.99,
               update_horizon=1,
               min_replay_history=20000,
               update_period=4,
               target_update_period=8000,
               epsilon_fn=dqn_agent.linearly_decaying_epsilon,
               epsilon_train=0.01,
               epsilon_eval=0.001,
               epsilon_decay_period=250000,
               replay_scheme='prioritized',
               alpha_exponent=0.5,
               beta_exponent=0.5,
               tf_device='/cpu:*',
               use_staging=True,
               optimizer=tf.train.AdamOptimizer(
                   learning_rate=0.00025, epsilon=0.0003125),
               summary_writer=None,
               summary_writing_frequency=2500,
               replay_forgetting='default',
               sample_newest_immediately=False,
               oldest_policy_in_buffer=250000):
    """Initializes the agent and constructs the components of its graph.

    Args:
      sess: `tf.Session`, for executing ops.
      num_actions: int, number of actions the agent can take at any state.
      observation_shape: tuple of ints or an int. If single int, the observation
        is assumed to be a 2D square.
      observation_dtype: tf.DType, specifies the type of the observations. Note
        that if your inputs are continuous, you should set this to tf.float32.
      stack_size: int, number of frames to use in state stack.
      network: function expecting three parameters:
        (num_actions, network_type, state). This function will return the
        network_type object containing the tensors output by the network.
        See dopamine.discrete_domains.legacy_networks.rainbow_network as
        an example.
      num_atoms: int, the number of buckets of the value function distribution.
      vmax: float, the value distribution support is [-vmax, vmax].
      gamma: float, discount factor with the usual RL meaning.
      update_horizon: int, horizon at which updates are performed, the 'n' in
        n-step update.
      min_replay_history: int, number of transitions that should be experienced
        before the agent begins training its value function.
      update_period: int, period between DQN updates.
      target_update_period: int, update period for the target network.
      epsilon_fn: function expecting 4 parameters:
        (decay_period, step, warmup_steps, epsilon). This function should return
        the epsilon value used for exploration during training.
      epsilon_train: float, the value to which the agent's epsilon is eventually
        decayed during training.
      epsilon_eval: float, epsilon used when evaluating the agent.
      epsilon_decay_period: int, length of the epsilon decay schedule.
      replay_scheme: str, 'prioritized' or 'uniform', the sampling scheme of the
        replay memory.
      alpha_exponent: float, alpha hparam in prioritized experience replay.
      beta_exponent: float, beta hparam in prioritized experience replay.
      tf_device: str, Tensorflow device on which the agent's graph is executed.
      use_staging: bool, when True use a staging area to prefetch the next
        training batch, speeding training up by about 30%.
      optimizer: `tf.train.Optimizer`, for training the value function.
      summary_writer: SummaryWriter object for outputting training statistics.
        Summary writing disabled if set to None.
      summary_writing_frequency: int, frequency with which summaries will be
        written. Lower values will result in slower training.
      replay_forgetting:  str, What strategy to employ for forgetting old
        trajectories.  One of ['default', 'elephant'].
      sample_newest_immediately: bool, when True, immediately trains on the
        newest transition instead of using the max_priority hack.
      oldest_policy_in_buffer: int, the number of gradient updates of the oldest
        policy that has added data to the replay buffer.
    """
    # We need this because some tools convert round floats into ints.
    vmax = float(vmax)
    self._num_atoms = num_atoms
    self._support = tf.linspace(-vmax, vmax, num_atoms)
    self._replay_scheme = replay_scheme
    self._alpha_exponent = alpha_exponent
    self._beta_exponent = beta_exponent
    self._replay_forgetting = replay_forgetting
    self._sample_newest_immediately = sample_newest_immediately
    self._oldest_policy_in_buffer = oldest_policy_in_buffer
    # TODO(b/110897128): Make agent optimizer attribute private.
    self.optimizer = optimizer

    dqn_agent.DQNAgent.__init__(
        self,
        sess=sess,
        num_actions=num_actions,
        observation_shape=observation_shape,
        observation_dtype=observation_dtype,
        stack_size=stack_size,
        network=network,
        gamma=gamma,
        update_horizon=update_horizon,
        min_replay_history=min_replay_history,
        update_period=update_period,
        target_update_period=target_update_period,
        epsilon_fn=epsilon_fn,
        epsilon_train=epsilon_train,
        epsilon_eval=epsilon_eval,
        epsilon_decay_period=epsilon_decay_period,
        tf_device=tf_device,
        use_staging=use_staging,
        optimizer=self.optimizer,
        summary_writer=summary_writer,
        summary_writing_frequency=summary_writing_frequency)
    tf.logging.info('\t replay_scheme: %s', replay_scheme)
    tf.logging.info('\t alpha_exponent: %f', alpha_exponent)
    tf.logging.info('\t beta_exponent: %f', beta_exponent)
    tf.logging.info('\t replay_forgetting: %s', replay_forgetting)
    tf.logging.info('\t oldest_policy_in_buffer: %s', oldest_policy_in_buffer)
    self.episode_return = 0.0

    # We maintain attributes to record online and target network updates which
    self._online_network_updates = 0
    self._target_network_updates = 0

    # pylint: disable=protected-access
    buffer_to_oldest_policy_ratio = (
        float(self._replay.memory._replay_capacity) /
        float(self._oldest_policy_in_buffer))
    # pylint: enable=protected-access

    # This ratio is used to adjust other attributes that are explicitly tied to
    # agent steps.  When designed, the Dopamine agents assumed that the replay
    # ratio remain fixed and therefore elements such as epsilon_decay_period
    # will not be set appropriately without adjustment.
    self._gin_param_multiplier = (
        buffer_to_oldest_policy_ratio / self.update_period)
    tf.logging.info('\t self._gin_param_multiplier: %f',
                    self._gin_param_multiplier)

    # Adjust agent attributes that are tied to the agent steps.
    self.update_period = self.update_period * self._gin_param_multiplier
    self.target_update_period = (
        self.target_update_period * self._gin_param_multiplier)
    self.epsilon_decay_period = int(self.epsilon_decay_period *
                                    self._gin_param_multiplier)

    if self._replay_scheme == 'prioritized':
      if self._replay_forgetting == 'elephant':
        raise NotImplementedError
Ejemplo n.º 17
0
    def __init__(self,
                 sess,
                 num_actions,
                 observation_shape=dqn_agent.NATURE_DQN_OBSERVATION_SHAPE,
                 observation_dtype=dqn_agent.NATURE_DQN_DTYPE,
                 stack_size=dqn_agent.NATURE_DQN_STACK_SIZE,
                 network=atari_lib.rainbow_network,
                 num_atoms=51,
                 vmax=10.,
                 gamma=0.99,
                 update_horizon=1,
                 min_replay_history=20000,
                 update_period=4,
                 target_update_period=8000,
                 epsilon_fn=dqn_agent.linearly_decaying_epsilon,
                 epsilon_train=0.01,
                 epsilon_eval=0.001,
                 epsilon_decay_period=250000,
                 replay_scheme='prioritized',
                 tf_device='/cpu:*',
                 use_staging=True,
                 optimizer=tf.train.AdamOptimizer(learning_rate=0.00025,
                                                  epsilon=0.0003125),
                 summary_writer=None,
                 summary_writing_frequency=500):
        """Initializes the agent and constructs the components of its graph.

        Args:
          sess: `tf.Session`, for executing ops.
          num_actions: int, number of actions the agent can take at any state.
          observation_shape: tuple of ints or an int. If single int, the observation
            is assumed to be a 2D square.
          observation_dtype: tf.DType, specifies the type of the observations. Note
            that if your inputs are continuous, you should set this to tf.float32.
          stack_size: int, number of frames to use in state stack.
          network: tf.Keras.Model, expects four parameters:
            (num_actions, num_atoms, support, network_type).  This class is used to
            generate network instances that are used by the agent. Each
            instantiation would have different set of variables. See
            dopamine.discrete_domains.atari_lib.RainbowNetwork as an example.
          num_atoms: int, the number of buckets of the value function distribution.
          vmax: float, the value distribution support is [-vmax, vmax].
          gamma: float, discount factor with the usual RL meaning.
          update_horizon: int, horizon at which updates are performed, the 'n' in
            n-step update.
          min_replay_history: int, number of transitions that should be experienced
            before the agent begins training its value function.
          update_period: int, period between DQN updates.
          target_update_period: int, update period for the target network.
          epsilon_fn: function expecting 4 parameters:
            (decay_period, step, warmup_steps, epsilon). This function should return
            the epsilon value used for exploration during training.
          epsilon_train: float, the value to which the agent's epsilon is eventually
            decayed during training.
          epsilon_eval: float, epsilon used when evaluating the agent.
          epsilon_decay_period: int, length of the epsilon decay schedule.
          replay_scheme: str, 'prioritized' or 'uniform', the sampling scheme of the
            replay memory.
          tf_device: str, Tensorflow device on which the agent's graph is executed.
          use_staging: bool, when True use a staging area to prefetch the next
            training batch, speeding training up by about 30%.
          optimizer: `tf.train.Optimizer`, for training the value function.
          summary_writer: SummaryWriter object for outputting training statistics.
            Summary writing disabled if set to None.
          summary_writing_frequency: int, frequency with which summaries will be
            written. Lower values will result in slower training.
        """
        # We need this because some tools convert round floats into ints.
        vmax = float(vmax)
        self._num_atoms = num_atoms
        self._support = tf.linspace(-vmax, vmax, num_atoms)
        self._replay_scheme = replay_scheme
        # TODO(b/110897128): Make agent optimizer attribute private.
        self.optimizer = optimizer

        dqn_agent.DQNAgent.__init__(
            self,
            sess=sess,
            num_actions=num_actions,
            observation_shape=observation_shape,
            observation_dtype=observation_dtype,
            stack_size=stack_size,
            network=network,
            gamma=gamma,
            update_horizon=update_horizon,
            min_replay_history=min_replay_history,
            update_period=update_period,
            target_update_period=target_update_period,
            epsilon_fn=epsilon_fn,
            epsilon_train=epsilon_train,
            epsilon_eval=epsilon_eval,
            epsilon_decay_period=epsilon_decay_period,
            tf_device=tf_device,
            use_staging=use_staging,
            optimizer=self.optimizer,
            summary_writer=summary_writer,
            summary_writing_frequency=summary_writing_frequency)
Ejemplo n.º 18
0
 def get_cluster_centroids(self):
     weight_min = tf.reduce_min(self.weights)
     weight_max = tf.reduce_max(self.weights)
     cluster_centroids = tf.linspace(weight_min, weight_max,
                                     self.number_of_clusters)
     return cluster_centroids
Ejemplo n.º 19
0
def spherical_cubevol_resample(vol, env2ref, cube_center, side_length, n_phi,
                               n_theta, n_r):
  """Resample cube volume onto spherical coordinates centered at target point.

  Args:
    vol: [B,H,W,D,C], input volume
    env2ref: [B,4,4], relative pose transformation (transform env to ref)
    cube_center: [B,3], [x,y,z] coordinates for center of cube volume
    side_length: side length of cube
    n_phi: number of samples along vertical spherical coordinate dim
    n_theta: number of samples along horizontal spherical coordinate dim
    n_r: number of samples along radius spherical coordinate dim

  Returns:
    resampled: [B, n_phi, n_theta, n_r, C]
  """

  batch_size = tf.shape(vol)[0]
  height = tf.shape(vol)[1]

  cube_res = tf.to_float(height)

  # create spherical coordinates
  b_vals = tf.to_float(tf.range(batch_size))
  phi_vals = tf.linspace(0.0, np.pi, n_phi)
  theta_vals = tf.linspace(1.5 * np.pi, -0.5 * np.pi, n_theta)

  # compute radii to use
  x_vals = tf.linspace(-side_length / 2.0, side_length / 2.0,
                       tf.to_int32(cube_res))
  y_vals = tf.linspace(-side_length / 2.0, side_length / 2.0,
                       tf.to_int32(cube_res))
  z_vals = tf.linspace(side_length / 2.0, -side_length / 2.0,
                       tf.to_int32(cube_res))
  y_c, x_c, z_c = tf.meshgrid(y_vals, x_vals, z_vals, indexing='ij')
  x_c = x_c + cube_center[:, 0, tf.newaxis, tf.newaxis, tf.newaxis]
  y_c = y_c + cube_center[:, 1, tf.newaxis, tf.newaxis, tf.newaxis]
  z_c = z_c + cube_center[:, 2, tf.newaxis, tf.newaxis, tf.newaxis]
  cube_coords = tf.stack([x_c, y_c, z_c], axis=4)
  min_r = tf.reduce_min(
      tf.norm(
          cube_coords -
          env2ref[:, :3, 3][:, tf.newaxis, tf.newaxis, tf.newaxis, :],
          axis=4),
      axis=[0, 1, 2, 3])  # side_length / cube_res
  max_r = tf.reduce_max(
      tf.norm(
          cube_coords -
          env2ref[:, :3, 3][:, tf.newaxis, tf.newaxis, tf.newaxis, :],
          axis=4),
      axis=[0, 1, 2, 3])

  r_vals = tf.linspace(max_r, min_r, n_r)
  b, phi, theta, r = tf.meshgrid(
      b_vals, phi_vals, theta_vals, r_vals,
      indexing='ij')  # currently in env frame

  # transform spherical coordinates into cartesian
  # (currently in env frame, z points forwards)
  x = r * tf.cos(theta) * tf.sin(phi)
  z = r * tf.sin(theta) * tf.sin(phi)
  y = r * tf.cos(phi)

  # transform coordinates into ref frame
  sphere_coords = tf.stack([x, y, z, tf.ones_like(x)], axis=-1)[Ellipsis, tf.newaxis]
  sphere_coords_ref = tfmm(env2ref, sphere_coords)
  x = sphere_coords_ref[Ellipsis, 0, 0]
  y = sphere_coords_ref[Ellipsis, 1, 0]
  z = sphere_coords_ref[Ellipsis, 2, 0]

  # transform coordinates into vol indices
  x_inds = (x - cube_center[:, 0, tf.newaxis, tf.newaxis, tf.newaxis] +
            side_length / 2.0) * ((cube_res - 1) / side_length)
  y_inds = -(y - cube_center[:, 1, tf.newaxis, tf.newaxis, tf.newaxis] -
             side_length / 2.0) * ((cube_res - 1) / side_length)
  z_inds = -(z - cube_center[:, 2, tf.newaxis, tf.newaxis, tf.newaxis] -
             side_length / 2.0) * ((cube_res - 1) / side_length)
  sphere_coords_inds = tf.stack([b, x_inds, y_inds, z_inds], axis=-1)

  # trilinear interpolation gather from volume
  # interpolate pre-multiplied RGBAs, then un-pre-multiply
  vol_alpha = tf.clip_by_value(vol[Ellipsis, -1:], 0.0, 1.0)
  vol_channels_p = vol[Ellipsis, :-1] * vol_alpha
  vol_p = tf.concat([vol_channels_p, vol_alpha], axis=-1)

  resampled_p = sampling.trilerp_gather(vol_p, sphere_coords_inds)

  resampled_alpha = resampled_p[Ellipsis, -1:]
  resampled_channels = resampled_p[Ellipsis, :-1] / (resampled_alpha + 1e-8)
  resampled = tf.concat([resampled_channels, resampled_alpha], axis=-1)

  return resampled, r_vals
Ejemplo n.º 20
0
def run_sobel(logdir, verbose=False):
    """Run a Sobel edge detection demonstration.

    See the summary description for more details.

    Arguments:
      logdir: Directory into which to write event logs.
      verbose: Boolean; whether to log any output.
    """
    if verbose:
        logger.info("--- Starting run: sobel")

    tf.reset_default_graph()
    tf.set_random_seed(0)

    image = get_image(verbose=verbose)
    kernel_radius = tf.placeholder(shape=(), dtype=tf.int32)

    with tf.name_scope("horizontal_kernel"):
        kernel_side_length = kernel_radius * 2 + 1
        # Drop off influence for pixels further away from the center.
        weighting_kernel = 1.0 - tf.abs(
            tf.linspace(-1.0, 1.0, num=kernel_side_length))
        differentiation_kernel = tf.linspace(-1.0, 1.0, num=kernel_side_length)
        horizontal_kernel = tf.matmul(
            tf.expand_dims(weighting_kernel, 1),
            tf.expand_dims(differentiation_kernel, 0),
        )

    with tf.name_scope("vertical_kernel"):
        vertical_kernel = tf.transpose(a=horizontal_kernel)

    float_image = tf.cast(image, tf.float32)
    dx = convolve(float_image, horizontal_kernel, name="convolve_dx")
    dy = convolve(float_image, vertical_kernel, name="convolve_dy")
    gradient_magnitude = tf.norm(tensor=[dx, dy],
                                 axis=0,
                                 name="gradient_magnitude")
    with tf.name_scope("normalized_gradient"):
        normalized_gradient = gradient_magnitude / tf.reduce_max(
            input_tensor=gradient_magnitude)
    with tf.name_scope("output_image"):
        output_image = tf.cast(255 * normalized_gradient, tf.uint8)

    summ = image_summary.op(
        "sobel",
        tf.stack([output_image]),
        display_name="Sobel edge detection",
        description=(
            "Demonstration of [Sobel edge detection]. The step "
            "parameter adjusts the radius of the kernel. "
            "The kernel can be of arbitrary size, and considers "
            "nearby pixels with \u2113\u2082-linear falloff.\n\n"
            # (that says ``$\ell_2$-linear falloff'')
            "Edge detection is done on a per-channel basis, so "
            "you can observe which edges are &ldquo;mostly red "
            "edges,&rdquo; for instance.\n\n"
            "For practical edge detection, a small kernel "
            "(usually not more than more than *r*=2) is best.\n\n"
            "[Sobel edge detection]: %s\n\n"
            "%s" %
            ("https://en.wikipedia.org/wiki/Sobel_operator", IMAGE_CREDIT)),
    )

    with tf.Session() as sess:
        sess.run(image.initializer)
        writer = tf.summary.FileWriter(os.path.join(logdir, "sobel"))
        writer.add_graph(sess.graph)
        for step in xrange(8):
            if verbose:
                logger.info("--- sobel: step: %s" % step)
                feed_dict = {kernel_radius: step}
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = config_pb2.RunMetadata()
            s = sess.run(
                summ,
                feed_dict=feed_dict,
                options=run_options,
                run_metadata=run_metadata,
            )
            writer.add_summary(s, global_step=step)
            writer.add_run_metadata(run_metadata, "step_%04d" % step)
        writer.close()
Ejemplo n.º 21
0
def _get_pixel_grid(axis, width):
  """Returns an array of length `width` containing pixel coordinates."""
  if axis == Axis.x:
    return tf.linspace(-1.0, 1.0, width)  # Left is negative, right is positive.
  elif axis == Axis.y:
    return tf.linspace(1.0, -1.0, width)  # Top is positive, bottom is negative.