def testSecondOrderGradientCalculation(self): param_list = [ "prune_option=second_order_gradient", "gradient_decay_rate=0.5", ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) tf.logging.info(pruning_hparams) w = tf.Variable(tf.linspace(1.0, 10.0, 10), name="weights") _ = pruning.apply_mask(w, prune_option="second_order_gradient") p = pruning.Pruning(pruning_hparams) old_weight_update_op = p.old_weight_update_op() old_old_weight_update_op = p.old_old_weight_update_op() gradient_update_op = p.gradient_update_op() with self.cached_session() as session: tf.global_variables_initializer().run() session.run(old_weight_update_op) session.run(old_old_weight_update_op) session.run(tf.assign(w, tf.math.scalar_mul(2.0, w))) session.run(gradient_update_op) old_weights = pruning.get_old_weights() old_old_weights = pruning.get_old_old_weights() gradients = pruning.get_gradients() old_weight = old_weights[0] old_old_weight = old_old_weights[0] gradient = gradients[0] self.assertAllEqual( gradient.eval(), tf.math.scalar_mul(0.5, tf.nn.l2_normalize(tf.linspace(1.0, 10.0, 10))).eval()) self.assertAllEqual(old_weight.eval(), old_old_weight.eval())
def inv_depths(self, start_depth, end_depth, num_depths): """Returns reversed, sorted inverse interpolated depths. Args: start_depth: The first depth. end_depth: The last depth. num_depths: The total number of depths to create, include start_depth and end_depth are always included and other depths are interpolated between them, in inverse depth space. Returns: The depths sorted in descending order (so furthest first). This order is useful for back to front compositing. """ depths = 1.0 / tf.linspace(1.0/end_depth, 1.0/start_depth, num_depths) return depths
def when_nonsingular(): bucket_width = range_ / tf.cast(bucket_count, tf.float64) offsets = data - min_ bucket_indices = tf.cast(tf.floor(offsets / bucket_width), dtype=tf.int32) clamped_indices = tf.minimum(bucket_indices, bucket_count - 1) one_hots = tf.one_hot(clamped_indices, depth=bucket_count) bucket_counts = tf.cast( tf.reduce_sum(input_tensor=one_hots, axis=0), dtype=tf.float64, ) edges = tf.linspace(min_, max_, bucket_count + 1) left_edges = edges[:-1] right_edges = edges[1:] return tf.transpose( a=tf.stack([left_edges, right_edges, bucket_counts]))
def linear_lookup(phase: tf.Tensor, wavetables: tf.Tensor) -> tf.Tensor: """Lookup from wavetables with linear interpolation. Args: phase: The instantaneous phase of the base oscillator, ranging from 0 to 1.0. This gives the position to lookup in the wavetable. Shape [batch_size, n_samples, 1]. wavetables: Wavetables to be read from on lookup. Shape [batch_size, n_samples, n_wavetable] or [batch_size, n_wavetable]. Returns: The resulting audio from linearly interpolated lookup of the wavetables at each point in time. Shape [batch_size, n_samples]. """ phase, wavetables = tf_float32(phase), tf_float32(wavetables) # Add a time dimension if not present. if len(wavetables.shape) == 2: wavetables = wavetables[:, tf.newaxis, :] # Add a wavetable dimension if not present. if len(phase.shape) == 2: phase = phase[:, :, tf.newaxis] # Add first sample to end of wavetable for smooth linear interpolation # between the last point in the wavetable and the first point. wavetables = tf.concat([wavetables, wavetables[..., 0:1]], axis=-1) n_wavetable = int(wavetables.shape[-1]) # Get a phase value for each point on the wavetable. phase_wavetables = tf.linspace(0.0, 1.0, n_wavetable) # Get pair-wise distances from the oscillator phase to each wavetable point. # Axes are [batch, time, n_wavetable]. phase_distance = tf.abs( (phase - phase_wavetables[tf.newaxis, tf.newaxis, :])) # Put distance in units of wavetable samples. phase_distance *= n_wavetable - 1 # Weighting for interpolation. # Distance is > 1.0 (and thus weights are 0.0) for all but nearest neighbors. weights = tf.nn.relu(1.0 - phase_distance) weighted_wavetables = weights * wavetables # Interpolated audio from summing the weighted wavetable at each timestep. return tf.reduce_sum(weighted_wavetables, axis=-1)
def testPartitionedVariableMasking(self): partitioner = tf.variable_axis_size_partitioner(40) with self.cached_session() as session: with tf.variable_scope("", partitioner=partitioner): sparsity = tf.Variable(0.5, name="Sparsity") weights = tf.get_variable( "weights", initializer=tf.linspace(1.0, 100.0, 100)) masked_weights = pruning.apply_mask( weights, scope=tf.get_variable_scope()) p = pruning.Pruning(sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() tf.global_variables_initializer().run() masked_weights_val = masked_weights.eval() session.run(mask_update_op) masked_weights_val = masked_weights.eval() self.assertAllEqual(np.count_nonzero(masked_weights_val), 50)
def get_harmonic_frequencies(frequencies: tf.Tensor, n_harmonics: int) -> tf.Tensor: """Create integer multiples of the fundamental frequency. Args: frequencies: Fundamental frequencies (Hz). Shape [batch_size, :, 1]. n_harmonics: Number of harmonics. Returns: harmonic_frequencies: Oscillator frequencies (Hz). Shape [batch_size, :, n_harmonics]. """ frequencies = tf_float32(frequencies) f_ratios = tf.linspace(1.0, float(n_harmonics), int(n_harmonics)) f_ratios = f_ratios[tf.newaxis, tf.newaxis, :] harmonic_frequencies = frequencies * f_ratios return harmonic_frequencies
def numerical_base_partition_function(alpha): """Numerically approximate the partition function Z(alpha).""" # Generate values `num_samples` values in [-x_max, x_max], with more samples # near the origin as `power` is set to larger values. num_samples = 2**24 + 1 # We want an odd value so that 0 gets sampled. x_max = 10**10 power = 6 t = t = tf.linspace(tf.constant(-1, tf.float64), tf.constant(1, tf.float64), num_samples) t = tf.sign(t) * tf.abs(t)**power x = t * x_max # Compute losses for the values, then exponentiate the negative losses and # integrate with the trapezoid rule to get the partition function. losses = general.lossfun(x, alpha, np.float64(1)) y = tf.math.exp(-losses) partition = tf.reduce_sum((y[1:] + y[:-1]) * (x[1:] - x[:-1])) / 2. return partition
def create_centered_identity_transformation_field(shape, spacings): """Create 2D or 3D centered identity transformation field. Args: shape: 2- or 3-element list. The shape of the transformation field. spacings: 2- or 3-element list. The spacings of the transformation field. Returns: 2D case: 3-D Tensor (x0, x1, comp) describing a 2D vector field 3D case: 4-D Tensor (x0, x1, x2, comp) describing a 3D vector field """ coords = [] for i, size in enumerate(shape): spacing = spacings[i] coords.append( tf.linspace(-(size - 1) / 2 * spacing, (size - 1) / 2 * spacing, size)) permutation = np.roll(np.arange(len(coords) + 1), -1) return tf.transpose(tf.meshgrid(*coords, indexing="ij"), permutation)
def format_network_input(self, ref_image, psv_src_images, ref_pose, psv_src_poses, planes, intrinsics): """Format the network input. Args: ref_image: reference source image [batch, height, width, 3] psv_src_images: stack of source images (excluding the ref image) [batch, height, width, 3*(num_source -1)] ref_pose: reference world-to-camera pose (where PSV is constructed) [batch, 4, 4] psv_src_poses: input poses (world to camera) [batch, num_source-1, 4, 4] planes: list of scalar depth values for each plane intrinsics: camera intrinsics [batch, 3, 3] Returns: net_input: [batch, height, width, #planes, num_source*3] """ _, num_psv_source, _, _ = psv_src_poses.get_shape().as_list() num_planes = tf.shape(planes)[0] net_input = [] for i in range(num_psv_source): curr_pose = tf.matmul(psv_src_poses[:, i], tf.matrix_inverse(ref_pose)) curr_image = psv_src_images[:, :, :, i * 3:(i + 1) * 3] curr_psv = pj.plane_sweep(curr_image, planes, curr_pose, intrinsics) net_input.append(curr_psv) net_input = tf.concat(net_input, axis=4) ref_img_stack = tf.tile(tf.expand_dims(ref_image, 3), [1, 1, 1, num_planes, 1]) net_input = tf.concat([net_input, ref_img_stack], axis=4) # Append normalized plane indices normalized_disp_inds = tf.reshape(tf.linspace(0.0, 1.0, num_planes), [1, 1, 1, num_planes, 1]) sh = tf.shape(net_input) normalized_disp_inds_stack = tf.tile(normalized_disp_inds, [1, sh[1], sh[2], 1, 1]) net_input = tf.concat([net_input, normalized_disp_inds_stack], axis=4) return net_input
def when_nonsingular(): bucket_width = range_ / tf.cast(bucket_count, tf.float64) offsets = data - min_ bucket_indices = tf.cast(tf.floor(offsets / bucket_width), dtype=tf.int32) clamped_indices = tf.minimum(bucket_indices, bucket_count - 1) # Use float64 instead of float32 to avoid accumulating floating point error # later in tf.reduce_sum when summing more than 2^24 individual `1.0` values. # See https://github.com/tensorflow/tensorflow/issues/51419 for details. one_hots = tf.one_hot(clamped_indices, depth=bucket_count, dtype=tf.float64) bucket_counts = tf.cast( tf.reduce_sum(input_tensor=one_hots, axis=0), dtype=tf.float64, ) edges = tf.linspace(min_, max_, bucket_count + 1) left_edges = edges[:-1] right_edges = edges[1:] return tf.transpose( a=tf.stack([left_edges, right_edges, bucket_counts]))
def affine_grid_generator(height, width, theta): """ This function returns a sampling grid, which when used with the bilinear sampler on the input feature map, will create an output feature map that is an affine transformation [1] of the input feature map. Input ----- - height: desired height of grid/output. Used to downsample or upsample. - width: desired width of grid/output. Used to downsample or upsample. - theta: affine transform matrices of shape (num_batch, 2, 3). For each image in the batch, we have 6 theta parameters of the form (2x3) that define the affine transformation T. Returns ------- - normalized grid (-1, 1) of shape (num_batch, 2, H, W). The 2nd dimension has 2 components: (x, y) which are the sampling points of the original image for each point in the target image. Note ---- [1]: the affine transformation allows cropping, translation, and isotropic scaling. """ num_batch = tf.shape(theta)[0] # create normalized 2D grid # x = tf.linspace(-1.0, 1.0, width) x = tf.linspace(0.0, 1.0, width) # y = tf.linspace(-1.0, 1.0, height) y = tf.linspace(0.0, 1.0, height) x = x * tf.cast(width, tf.float32) y = y * tf.cast(height, tf.float32) x_t, y_t = tf.meshgrid(x, y) # flatten x_t_flat = tf.reshape(x_t, [-1]) y_t_flat = tf.reshape(y_t, [-1]) # reshape to [x_t, y_t , 1] - (homogeneous form) ones = tf.ones_like(x_t_flat) sampling_grid = tf.stack([x_t_flat, y_t_flat, ones]) # repeat grid num_batch times sampling_grid = tf.expand_dims(sampling_grid, axis=0) sampling_grid = tf.tile(sampling_grid, tf.stack([num_batch, 1, 1])) # cast to float32 (required for matmul) theta = tf.cast(theta, 'float32') sampling_grid = tf.cast(sampling_grid, 'float32') # transform the sampling grid - batch multiply batch_grids = tf.matmul(theta, sampling_grid) # batch grid has shape (num_batch, 2, H*W) # reshape to (num_batch, H, W, 2) batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width]) return batch_grids
def infer_mpi(self, raw_src_images, raw_ref_image, ref_pose, src_poses, intrinsics, num_mpi_planes, mpi_planes, run_patched=False, patch_ind=np.array([0, 0]), patchsize=np.array([256, 256]), outsize=np.array([128, 128])): """Construct the MPI inference graph. Args: raw_src_images: stack of source images [batch, height, width, 3*#source] raw_ref_image: reference image [batch, height, width, 3] ref_pose: reference frame pose (world to camera) [batch, 4, 4] src_poses: source frame poses (world to camera) [batch, #source, 4, 4] intrinsics: camera intrinsics [batch, 3, 3] num_mpi_planes: number of mpi planes to predict mpi_planes: list of plane depths run_patched: whether to only infer MPI for patches of PSV (inference only) patch_ind: patch index for infer MPI inference patchsize: spatial patch size for MPI inference outsize: size of central portion to keep for patched inference Returns: outputs: a collection of output tensors. """ with tf.name_scope("preprocessing"): src_images = self.preprocess_image(raw_src_images) ref_image = self.preprocess_image(raw_ref_image) with tf.name_scope("format_network_input"): # WARNING: we assume the first src image/pose is the reference net_input = self.format_network_input(ref_image, src_images[:, :, :, 3:], ref_pose, src_poses[:, 1:], mpi_planes, intrinsics) with tf.name_scope("layer_prediction"): # The network directly outputs the color image at each MPI plane. chout = 4 # Number of output channels, RGBA if run_patched: # Patch the PSV spatially, with buffer, and generate MPI patch # Only for inference (not implemented for training) buffersize = (patchsize - outsize) // 2 padding = [[0, 0], [buffersize[0], buffersize[0]], [buffersize[1], buffersize[1]], [0, 0], [0, 0]] net_input_pad = tf.pad(net_input, padding) patch_start = patch_ind * outsize patch_end = patch_start + patchsize net_input_patch = net_input_pad[:, patch_start[0]:patch_end[0], patch_start[1]:patch_end[1], :, :] rgba_layers, _ = ed_3d_net(net_input_patch, chout) else: # Generate entire MPI (training and inference, but takes more memory) print("first step MPI prediction") rgba_layers, _ = ed_3d_net(net_input, chout) color_layers = rgba_layers[:, :, :, :, :-1] alpha_layers = rgba_layers[:, :, :, :, -1:] # Rescale alphas to (0, 1) alpha_layers = (alpha_layers + 1.)/2. rgba_layers = tf.concat([color_layers, alpha_layers], axis=4) print("refining MPI") transmittance = self.compute_transmittance(alpha_layers) refine_input_colors = color_layers * transmittance refine_input_alpha = alpha_layers * transmittance stuff_behind = tf.cumsum(refine_input_colors, axis=3) concat_trans = True # Concatenate transmittance to second input if concat_trans: refine_input = tf.concat([tf.stop_gradient(refine_input_colors), tf.stop_gradient(stuff_behind), tf.stop_gradient(refine_input_alpha), tf.stop_gradient(transmittance)], axis=4) normalized_disp_inds = tf.reshape(tf.linspace(0.0, 1.0, num_mpi_planes), [1, 1, 1, num_mpi_planes, 1]) sh = tf.shape(refine_input) normalized_disp_inds_stack = tf.tile(normalized_disp_inds, [1, sh[1], sh[2], 1, 1]) refine_input = tf.concat([refine_input, normalized_disp_inds_stack], axis=4) print("refine input size:", refine_input.shape) rgba_layers_refine = refine_net(refine_input) print("predicting flow for occlusions") flow_source = tf.stop_gradient(stuff_behind) flow_vecs = rgba_layers_refine[:, :, :, :, :2] color_layers = pj.flow_gather(flow_source, flow_vecs) alpha_layers = rgba_layers_refine[:, :, :, :, -1:] # Rescale alphas to (0, 1) alpha_layers = (alpha_layers + 1.)/2. rgba_layers_refine = tf.concat([color_layers, alpha_layers], axis=4) # Collect output tensors pred = {} pred["rgba_layers"] = rgba_layers pred["rgba_layers_refine"] = rgba_layers_refine pred["refine_input_mpi"] = tf.concat([refine_input_colors, refine_input_alpha], axis=-1) pred["stuff_behind"] = stuff_behind pred["flow_vecs"] = flow_vecs pred["psv"] = net_input[:, :, :, :, 0:3] # Add pred tensors to outputs collection print("adding outputs to collection") for i in pred: tf.add_to_collection("outputs", pred[i]) return pred
def __init__(self, sess, num_actions, observation_shape=dqn_agent.NATURE_DQN_OBSERVATION_SHAPE, observation_dtype=dqn_agent.NATURE_DQN_DTYPE, stack_size=dqn_agent.NATURE_DQN_STACK_SIZE, number_of_gammas=8, gamma_max=0.99, acting_policy='hyperbolic', hyp_exponent=1.0, integral_estimate='lower', num_atoms=51, vmax=10., gamma=0.99, update_horizon=1, min_replay_history=20000, update_period=4, target_update_period=8000, epsilon_fn=dqn_agent.linearly_decaying_epsilon, epsilon_train=0.01, epsilon_eval=0.001, epsilon_decay_period=250000, replay_scheme='prioritized', gradient_clipping_norm=None, network_size_expansion=1.0, tf_device='/cpu:*', use_staging=True, optimizer=tf.train.AdamOptimizer(learning_rate=0.00025, epsilon=0.0003125), summary_writer=None, summary_writing_frequency=50000): """Initializes the agent and constructs the components of its graph. Args: sess: `tf.Session`, for executing ops. num_actions: int, number of actions the agent can take at any state. observation_shape: tuple of ints or an int. If single int, the observation is assumed to be a 2D square. observation_dtype: tf.DType, specifies the type of the observations. Note that if your inputs are continuous, you should set this to tf.float32. stack_size: int, number of frames to use in state stack. number_of_gammas: int, the number of gammas to estimate in parallel. gamma_max: int, the maximum gammas we will learn via Bellman updates. acting_policy: str, the policy with which the agent will act. One of ['hyperbolic', 'largest_gamma'] hyp_exponent: float, the parameter k in the equation 1. / (1. + k * t) for hyperbolic discounting. Smaller parameter will lead to a longer horizon. integral_estimate: str, how to estimate the integral of the hyperbolic discount. num_atoms: int, the number of buckets of the value function distribution. vmax: float, the value distribution support is [-vmax, vmax]. gamma: float, discount factor with the usual RL meaning. update_horizon: int, horizon at which updates are performed, the 'n' in n-step update. min_replay_history: int, number of transitions that should be experienced before the agent begins training its value function. update_period: int, period between DQN updates. target_update_period: int, update period for the target network. epsilon_fn: function expecting 4 parameters: (decay_period, step, warmup_steps, epsilon). This function should return the epsilon value used for exploration during training. epsilon_train: float, the value to which the agent's epsilon is eventually decayed during training. epsilon_eval: float, epsilon used when evaluating the agent. epsilon_decay_period: int, length of the epsilon decay schedule. replay_scheme: str, 'prioritized' or 'uniform', the sampling scheme of the replay memory. gradient_clipping_norm: str, if not None, this will set the gradient clipping value. network_size_expansion: float, the multiplier on the default layer size. tf_device: str, Tensorflow device on which the agent's graph is executed. use_staging: bool, when True use a staging area to prefetch the next training batch, speeding training up by about 30%. optimizer: `tf.train.Optimizer`, for training the value function. summary_writer: SummaryWriter object for outputting training statistics. Summary writing disabled if set to None. summary_writing_frequency: int, frequency with which summaries will be written. Lower values will result in slower training. """ # We need this because some tools convert round floats into ints. vmax = float(vmax) self._num_atoms = num_atoms self._support = tf.linspace(-vmax, vmax, num_atoms) self._replay_scheme = replay_scheme self.optimizer = optimizer self.number_of_gammas = number_of_gammas self.gamma_max = gamma_max self.acting_policy = acting_policy self.hyp_exponent = hyp_exponent self.integral_estimate = integral_estimate self.gradient_clipping_norm = gradient_clipping_norm self.network_size_expansion = network_size_expansion # These are the discount factors (gammas) used to estimate the integral. self.eval_gammas = agent_utils.compute_eval_gamma_interval( self.gamma_max, self.hyp_exponent, self.number_of_gammas) # However, if we wish to estimate hyperbolic discounting with the form, # # \Gamma_t = 1. / (1. + k * t) # # where we now have a coefficient k <= 1.0 # we need consider the value functions for \gamma ^ k. We refer to # these below as self.gammas, since these are the gammas actually being # learned via Bellman updates. self.gammas = [ math.pow(gamma, self.hyp_exponent) for gamma in self.eval_gammas ] assert max(self.gammas) <= self.gamma_max super(HyperRainbowAgent, self).__init__( sess=sess, num_actions=num_actions, observation_shape=observation_shape, observation_dtype=observation_dtype, stack_size=stack_size, gamma=0, # TODO(liamfedus): better way to deal with self.gamma update_horizon=update_horizon, min_replay_history=min_replay_history, update_period=update_period, target_update_period=target_update_period, epsilon_fn=epsilon_fn, epsilon_train=epsilon_train, epsilon_eval=epsilon_eval, epsilon_decay_period=epsilon_decay_period, tf_device=tf_device, use_staging=use_staging, optimizer=self.optimizer, summary_writer=summary_writer, summary_writing_frequency=summary_writing_frequency)
def mpi_resample_cube(mpi, tgt, intrinsics, depth_planes, side_length, cube_res): """Resample MPI onto cube centered at target point. Args: mpi: [B,H,W,D,C], input MPI tgt: [B,3], [x,y,z] coordinates for cube center (in reference/mpi frame) intrinsics: [B,3,3], MPI reference camera intrinsics depth_planes: [D] depth values for MPI planes side_length: metric side length of cube cube_res: resolution of each cube dimension Returns: resampled: [B, cube_res, cube_res, cube_res, C] """ batch_size = tf.shape(mpi)[0] num_depths = tf.shape(mpi)[3] # compute MPI world coordinates intrinsics_tile = tf.tile(intrinsics, [num_depths, 1, 1]) # create cube coordinates b_vals = tf.to_float(tf.range(batch_size)) x_vals = tf.linspace(-side_length / 2.0, side_length / 2.0, cube_res) y_vals = tf.linspace(-side_length / 2.0, side_length / 2.0, cube_res) z_vals = tf.linspace(side_length / 2.0, -side_length / 2.0, cube_res) b, y, x, z = tf.meshgrid(b_vals, y_vals, x_vals, z_vals, indexing='ij') x = x + tgt[:, 0, tf.newaxis, tf.newaxis, tf.newaxis] y = y + tgt[:, 1, tf.newaxis, tf.newaxis, tf.newaxis] z = z + tgt[:, 2, tf.newaxis, tf.newaxis, tf.newaxis] ones = tf.ones_like(x) coords = tf.stack([x, y, z, ones], axis=1) coords_r = tf.reshape( tf.transpose(coords, [0, 4, 1, 2, 3]), [batch_size * cube_res, 4, cube_res, cube_res]) # store elements with negative z vals for projection bad_inds = tf.less(z, 0.0) # project into reference camera to transform coordinates into MPI indices filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) filler = tf.tile(filler, [batch_size * cube_res, 1, 1]) intrinsics_tile = tf.tile(intrinsics, [cube_res, 1, 1]) intrinsics_tile_4 = tf.concat( [intrinsics_tile, tf.zeros([batch_size * cube_res, 3, 1])], axis=2) intrinsics_tile_4 = tf.concat([intrinsics_tile_4, filler], axis=1) coords_proj = cam2pixel(coords_r, intrinsics_tile_4) coords_depths = tf.transpose(coords_r[:, 2:3, :, :], [0, 2, 3, 1]) coords_depth_inds = (tf.to_float(num_depths) - 1) * ( (1.0 / coords_depths) - (1.0 / depth_planes[0])) / ((1.0 / depth_planes[-1]) - (1.0 / depth_planes[0])) coords_proj = tf.concat([coords_proj, coords_depth_inds], axis=3) coords_proj = tf.transpose( tf.reshape(coords_proj, [batch_size, cube_res, cube_res, cube_res, 3]), [0, 2, 3, 1, 4]) coords_proj = tf.concat([b[:, :, :, :, tf.newaxis], coords_proj], axis=4) # trilinear interpolation gather from MPI # interpolate pre-multiplied RGBAs, then un-pre-multiply mpi_alpha = mpi[Ellipsis, -1:] mpi_channels_p = mpi[Ellipsis, :-1] * mpi_alpha mpi_p = tf.concat([mpi_channels_p, mpi_alpha], axis=-1) resampled_p = sampling.trilerp_gather(mpi_p, coords_proj, bad_inds) resampled_alpha = tf.clip_by_value(resampled_p[Ellipsis, -1:], 0.0, 1.0) resampled_channels = resampled_p[Ellipsis, :-1] / (resampled_alpha + 1e-8) resampled = tf.concat([resampled_channels, resampled_alpha], axis=-1) return resampled, coords_proj
def build(self, Px, dx, X, N_neurons=180, eta=0.01, exp_decay=[50, 0.9]): ''' Set up the Feature Extractor. Following parameters are required: - Px: distribution of the x, with shape [N, N]. N will be used as number of points in each side of the lattice - dx: discretization size of the lattice sum Px dx**2 = 1 in this 2d case - X : points of the lattice (usually meshgrid). They must be the same on which Px has been calculated - N_neurons of the neural network (same in each layer) - eta : learning rate - exp_decay = [decay rate, decay step] for an exponentially decaying learning rate In particular decayed_learning_rate = learning_rate *decay_rate ^ (global_step / decay_steps) ''' self.xdelta = dx self.x_points = X self.P_x = Px self.N = np.shape(Px)[0] self.P_x_short = self.P_x.reshape([1, self.N*self.N]) self.Ny = self.N self.eta = eta self.neurons_feature = N_neurons self.graph = tf.Graph() with self.graph.as_default(): ############################# # Define the input placeholders and the feature neural network self.tf_x = tf.placeholder(tf.float32, shape=[2, None]) self.tf_i = tf.placeholder(tf.float32) self.tf_a = tf.placeholder(tf.float32) self.tf_theta_input = K.layers.Input(shape=(2,)) self.tf_theta_layers = self.tf_theta_input self.tf_theta_layers = K.layers.Dense(self.neurons_feature, input_shape=(2,), activation=K.layers.ReLU(), kernel_initializer=tf.random_normal_initializer(), bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers) self.tf_theta_layers = K.layers.Dense(self.neurons_feature, activation=K.layers.ReLU(), kernel_initializer=tf.initializers.glorot_normal(), bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers) self.tf_theta_layers = K.layers.Dense(self.neurons_feature, activation=K.activations.tanh, kernel_initializer=tf.initializers.glorot_normal(), bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers) self.tf_theta_layers = K.layers.Dense(1, kernel_initializer=tf.initializers.glorot_normal(), bias_initializer=tf.random_normal_initializer())(self.tf_theta_layers) self.tf_theta_net = K.Model(self.tf_theta_input, self.tf_theta_layers) self.tf_f = self.tf_a * tf.reshape(self.tf_theta_net(tf.transpose(self.tf_x)), [1, -1]) ############################# # Regularizing Term self.tf_grads_f = tf.gradients(self.tf_f, self.tf_x)[0] self.tf_norm2_grad_f = tf.reduce_sum(self.tf_grads_f**2, 0) self.tf_term1_local = -0.5 * ( tf.log(self.tf_norm2_grad_f)*self.P_x_short*self.xdelta**2) self.tf_term1 = -0.5 * tf.reduce_sum( tf.log(self.tf_norm2_grad_f) * self.P_x_short*self.xdelta**2) ############################# # Entropy Term # Define current range of the feature (in which to approximate Py) self.tf_y_min = tf.reduce_min(self.tf_f) self.tf_y_max = tf.reduce_max(self.tf_f) self.tf_ydelta = tf.stop_gradient( (self.tf_y_max-self.tf_y_min)/(self.Ny-1)) self.tf_y_linspace = tf.reshape(tf.stop_gradient(tf.linspace(self.tf_y_min, self.tf_y_max, self.Ny)), [self.Ny, 1]) # Define a triangular histogram (so that it is differentiable) self.tf_y_mask_left = tf.logical_and((self.tf_y_linspace - self.tf_ydelta < self.tf_f), (self.tf_y_linspace > self.tf_f)) self.tf_y_mask_right = tf.logical_and((self.tf_y_linspace <= self.tf_f), (self.tf_y_linspace + self.tf_ydelta > self.tf_f)) self.tf_y_line_left = ( 1/self.tf_ydelta + 1/self.tf_ydelta**2*(self.tf_f-self.tf_y_linspace)) self.tf_y_line_right = ( 1/self.tf_ydelta - 1/self.tf_ydelta**2*(self.tf_f-self.tf_y_linspace)) self.tf_ydelta_left = self.tf_y_line_left * tf.stop_gradient(tf.cast(self.tf_y_mask_left, tf.float32)) self.tf_ydelta_right = self.tf_y_line_right * tf.stop_gradient(tf.cast(self.tf_y_mask_right, tf.float32)) # Approximate the distribution of the feature through a differentiable histogram self.tf_P_y = tf.reduce_sum((self.tf_ydelta_left+self.tf_ydelta_right)*self.P_x_short*self.xdelta**2, 1) # Calculate the Entropy of the feature self.tf_H_y = - tf.reduce_sum( self.tf_P_y*tf.log(self.tf_P_y))*self.tf_ydelta ############################# # Renormalized Mutual Information and training methods self.tf_cost = self.tf_term1 + self.tf_H_y # Optimizer (with exponential decaying learning rate) self.tf_optimizer = tf.train.GradientDescentOptimizer( learning_rate=tf.train.exponential_decay(self.eta, self.tf_i, exp_decay[0], exp_decay[1])) # Gradients of the cost function self.tf_grad_cost = self.tf_optimizer.compute_gradients( -self.tf_cost, self.tf_theta_net.trainable_variables) # Train step self.tf_train_step = self.tf_optimizer.apply_gradients( self.tf_grad_cost) # Initialize the neural network self.tf_init_op = tf.global_variables_initializer() self.sess = tf.Session(graph=self.graph) self.sess.run(self.tf_init_op) self.costs = []
def __init__(self, sess, num_actions, observation_shape=dqn_agent.NATURE_DQN_OBSERVATION_SHAPE, observation_dtype=dqn_agent.NATURE_DQN_DTYPE, stack_size=dqn_agent.NATURE_DQN_STACK_SIZE, network=legacy_networks.rainbow_network, num_atoms=51, vmax=10., gamma=0.99, update_horizon=1, min_replay_history=20000, update_period=4, target_update_period=8000, epsilon_fn=dqn_agent.linearly_decaying_epsilon, epsilon_train=0.01, epsilon_eval=0.001, epsilon_decay_period=250000, replay_scheme='prioritized', alpha_exponent=0.5, beta_exponent=0.5, tf_device='/cpu:*', use_staging=True, optimizer=tf.train.AdamOptimizer( learning_rate=0.00025, epsilon=0.0003125), summary_writer=None, summary_writing_frequency=2500, replay_forgetting='default', sample_newest_immediately=False, oldest_policy_in_buffer=250000): """Initializes the agent and constructs the components of its graph. Args: sess: `tf.Session`, for executing ops. num_actions: int, number of actions the agent can take at any state. observation_shape: tuple of ints or an int. If single int, the observation is assumed to be a 2D square. observation_dtype: tf.DType, specifies the type of the observations. Note that if your inputs are continuous, you should set this to tf.float32. stack_size: int, number of frames to use in state stack. network: function expecting three parameters: (num_actions, network_type, state). This function will return the network_type object containing the tensors output by the network. See dopamine.discrete_domains.legacy_networks.rainbow_network as an example. num_atoms: int, the number of buckets of the value function distribution. vmax: float, the value distribution support is [-vmax, vmax]. gamma: float, discount factor with the usual RL meaning. update_horizon: int, horizon at which updates are performed, the 'n' in n-step update. min_replay_history: int, number of transitions that should be experienced before the agent begins training its value function. update_period: int, period between DQN updates. target_update_period: int, update period for the target network. epsilon_fn: function expecting 4 parameters: (decay_period, step, warmup_steps, epsilon). This function should return the epsilon value used for exploration during training. epsilon_train: float, the value to which the agent's epsilon is eventually decayed during training. epsilon_eval: float, epsilon used when evaluating the agent. epsilon_decay_period: int, length of the epsilon decay schedule. replay_scheme: str, 'prioritized' or 'uniform', the sampling scheme of the replay memory. alpha_exponent: float, alpha hparam in prioritized experience replay. beta_exponent: float, beta hparam in prioritized experience replay. tf_device: str, Tensorflow device on which the agent's graph is executed. use_staging: bool, when True use a staging area to prefetch the next training batch, speeding training up by about 30%. optimizer: `tf.train.Optimizer`, for training the value function. summary_writer: SummaryWriter object for outputting training statistics. Summary writing disabled if set to None. summary_writing_frequency: int, frequency with which summaries will be written. Lower values will result in slower training. replay_forgetting: str, What strategy to employ for forgetting old trajectories. One of ['default', 'elephant']. sample_newest_immediately: bool, when True, immediately trains on the newest transition instead of using the max_priority hack. oldest_policy_in_buffer: int, the number of gradient updates of the oldest policy that has added data to the replay buffer. """ # We need this because some tools convert round floats into ints. vmax = float(vmax) self._num_atoms = num_atoms self._support = tf.linspace(-vmax, vmax, num_atoms) self._replay_scheme = replay_scheme self._alpha_exponent = alpha_exponent self._beta_exponent = beta_exponent self._replay_forgetting = replay_forgetting self._sample_newest_immediately = sample_newest_immediately self._oldest_policy_in_buffer = oldest_policy_in_buffer # TODO(b/110897128): Make agent optimizer attribute private. self.optimizer = optimizer dqn_agent.DQNAgent.__init__( self, sess=sess, num_actions=num_actions, observation_shape=observation_shape, observation_dtype=observation_dtype, stack_size=stack_size, network=network, gamma=gamma, update_horizon=update_horizon, min_replay_history=min_replay_history, update_period=update_period, target_update_period=target_update_period, epsilon_fn=epsilon_fn, epsilon_train=epsilon_train, epsilon_eval=epsilon_eval, epsilon_decay_period=epsilon_decay_period, tf_device=tf_device, use_staging=use_staging, optimizer=self.optimizer, summary_writer=summary_writer, summary_writing_frequency=summary_writing_frequency) tf.logging.info('\t replay_scheme: %s', replay_scheme) tf.logging.info('\t alpha_exponent: %f', alpha_exponent) tf.logging.info('\t beta_exponent: %f', beta_exponent) tf.logging.info('\t replay_forgetting: %s', replay_forgetting) tf.logging.info('\t oldest_policy_in_buffer: %s', oldest_policy_in_buffer) self.episode_return = 0.0 # We maintain attributes to record online and target network updates which self._online_network_updates = 0 self._target_network_updates = 0 # pylint: disable=protected-access buffer_to_oldest_policy_ratio = ( float(self._replay.memory._replay_capacity) / float(self._oldest_policy_in_buffer)) # pylint: enable=protected-access # This ratio is used to adjust other attributes that are explicitly tied to # agent steps. When designed, the Dopamine agents assumed that the replay # ratio remain fixed and therefore elements such as epsilon_decay_period # will not be set appropriately without adjustment. self._gin_param_multiplier = ( buffer_to_oldest_policy_ratio / self.update_period) tf.logging.info('\t self._gin_param_multiplier: %f', self._gin_param_multiplier) # Adjust agent attributes that are tied to the agent steps. self.update_period = self.update_period * self._gin_param_multiplier self.target_update_period = ( self.target_update_period * self._gin_param_multiplier) self.epsilon_decay_period = int(self.epsilon_decay_period * self._gin_param_multiplier) if self._replay_scheme == 'prioritized': if self._replay_forgetting == 'elephant': raise NotImplementedError
def __init__(self, sess, num_actions, observation_shape=dqn_agent.NATURE_DQN_OBSERVATION_SHAPE, observation_dtype=dqn_agent.NATURE_DQN_DTYPE, stack_size=dqn_agent.NATURE_DQN_STACK_SIZE, network=atari_lib.rainbow_network, num_atoms=51, vmax=10., gamma=0.99, update_horizon=1, min_replay_history=20000, update_period=4, target_update_period=8000, epsilon_fn=dqn_agent.linearly_decaying_epsilon, epsilon_train=0.01, epsilon_eval=0.001, epsilon_decay_period=250000, replay_scheme='prioritized', tf_device='/cpu:*', use_staging=True, optimizer=tf.train.AdamOptimizer(learning_rate=0.00025, epsilon=0.0003125), summary_writer=None, summary_writing_frequency=500): """Initializes the agent and constructs the components of its graph. Args: sess: `tf.Session`, for executing ops. num_actions: int, number of actions the agent can take at any state. observation_shape: tuple of ints or an int. If single int, the observation is assumed to be a 2D square. observation_dtype: tf.DType, specifies the type of the observations. Note that if your inputs are continuous, you should set this to tf.float32. stack_size: int, number of frames to use in state stack. network: tf.Keras.Model, expects four parameters: (num_actions, num_atoms, support, network_type). This class is used to generate network instances that are used by the agent. Each instantiation would have different set of variables. See dopamine.discrete_domains.atari_lib.RainbowNetwork as an example. num_atoms: int, the number of buckets of the value function distribution. vmax: float, the value distribution support is [-vmax, vmax]. gamma: float, discount factor with the usual RL meaning. update_horizon: int, horizon at which updates are performed, the 'n' in n-step update. min_replay_history: int, number of transitions that should be experienced before the agent begins training its value function. update_period: int, period between DQN updates. target_update_period: int, update period for the target network. epsilon_fn: function expecting 4 parameters: (decay_period, step, warmup_steps, epsilon). This function should return the epsilon value used for exploration during training. epsilon_train: float, the value to which the agent's epsilon is eventually decayed during training. epsilon_eval: float, epsilon used when evaluating the agent. epsilon_decay_period: int, length of the epsilon decay schedule. replay_scheme: str, 'prioritized' or 'uniform', the sampling scheme of the replay memory. tf_device: str, Tensorflow device on which the agent's graph is executed. use_staging: bool, when True use a staging area to prefetch the next training batch, speeding training up by about 30%. optimizer: `tf.train.Optimizer`, for training the value function. summary_writer: SummaryWriter object for outputting training statistics. Summary writing disabled if set to None. summary_writing_frequency: int, frequency with which summaries will be written. Lower values will result in slower training. """ # We need this because some tools convert round floats into ints. vmax = float(vmax) self._num_atoms = num_atoms self._support = tf.linspace(-vmax, vmax, num_atoms) self._replay_scheme = replay_scheme # TODO(b/110897128): Make agent optimizer attribute private. self.optimizer = optimizer dqn_agent.DQNAgent.__init__( self, sess=sess, num_actions=num_actions, observation_shape=observation_shape, observation_dtype=observation_dtype, stack_size=stack_size, network=network, gamma=gamma, update_horizon=update_horizon, min_replay_history=min_replay_history, update_period=update_period, target_update_period=target_update_period, epsilon_fn=epsilon_fn, epsilon_train=epsilon_train, epsilon_eval=epsilon_eval, epsilon_decay_period=epsilon_decay_period, tf_device=tf_device, use_staging=use_staging, optimizer=self.optimizer, summary_writer=summary_writer, summary_writing_frequency=summary_writing_frequency)
def get_cluster_centroids(self): weight_min = tf.reduce_min(self.weights) weight_max = tf.reduce_max(self.weights) cluster_centroids = tf.linspace(weight_min, weight_max, self.number_of_clusters) return cluster_centroids
def spherical_cubevol_resample(vol, env2ref, cube_center, side_length, n_phi, n_theta, n_r): """Resample cube volume onto spherical coordinates centered at target point. Args: vol: [B,H,W,D,C], input volume env2ref: [B,4,4], relative pose transformation (transform env to ref) cube_center: [B,3], [x,y,z] coordinates for center of cube volume side_length: side length of cube n_phi: number of samples along vertical spherical coordinate dim n_theta: number of samples along horizontal spherical coordinate dim n_r: number of samples along radius spherical coordinate dim Returns: resampled: [B, n_phi, n_theta, n_r, C] """ batch_size = tf.shape(vol)[0] height = tf.shape(vol)[1] cube_res = tf.to_float(height) # create spherical coordinates b_vals = tf.to_float(tf.range(batch_size)) phi_vals = tf.linspace(0.0, np.pi, n_phi) theta_vals = tf.linspace(1.5 * np.pi, -0.5 * np.pi, n_theta) # compute radii to use x_vals = tf.linspace(-side_length / 2.0, side_length / 2.0, tf.to_int32(cube_res)) y_vals = tf.linspace(-side_length / 2.0, side_length / 2.0, tf.to_int32(cube_res)) z_vals = tf.linspace(side_length / 2.0, -side_length / 2.0, tf.to_int32(cube_res)) y_c, x_c, z_c = tf.meshgrid(y_vals, x_vals, z_vals, indexing='ij') x_c = x_c + cube_center[:, 0, tf.newaxis, tf.newaxis, tf.newaxis] y_c = y_c + cube_center[:, 1, tf.newaxis, tf.newaxis, tf.newaxis] z_c = z_c + cube_center[:, 2, tf.newaxis, tf.newaxis, tf.newaxis] cube_coords = tf.stack([x_c, y_c, z_c], axis=4) min_r = tf.reduce_min( tf.norm( cube_coords - env2ref[:, :3, 3][:, tf.newaxis, tf.newaxis, tf.newaxis, :], axis=4), axis=[0, 1, 2, 3]) # side_length / cube_res max_r = tf.reduce_max( tf.norm( cube_coords - env2ref[:, :3, 3][:, tf.newaxis, tf.newaxis, tf.newaxis, :], axis=4), axis=[0, 1, 2, 3]) r_vals = tf.linspace(max_r, min_r, n_r) b, phi, theta, r = tf.meshgrid( b_vals, phi_vals, theta_vals, r_vals, indexing='ij') # currently in env frame # transform spherical coordinates into cartesian # (currently in env frame, z points forwards) x = r * tf.cos(theta) * tf.sin(phi) z = r * tf.sin(theta) * tf.sin(phi) y = r * tf.cos(phi) # transform coordinates into ref frame sphere_coords = tf.stack([x, y, z, tf.ones_like(x)], axis=-1)[Ellipsis, tf.newaxis] sphere_coords_ref = tfmm(env2ref, sphere_coords) x = sphere_coords_ref[Ellipsis, 0, 0] y = sphere_coords_ref[Ellipsis, 1, 0] z = sphere_coords_ref[Ellipsis, 2, 0] # transform coordinates into vol indices x_inds = (x - cube_center[:, 0, tf.newaxis, tf.newaxis, tf.newaxis] + side_length / 2.0) * ((cube_res - 1) / side_length) y_inds = -(y - cube_center[:, 1, tf.newaxis, tf.newaxis, tf.newaxis] - side_length / 2.0) * ((cube_res - 1) / side_length) z_inds = -(z - cube_center[:, 2, tf.newaxis, tf.newaxis, tf.newaxis] - side_length / 2.0) * ((cube_res - 1) / side_length) sphere_coords_inds = tf.stack([b, x_inds, y_inds, z_inds], axis=-1) # trilinear interpolation gather from volume # interpolate pre-multiplied RGBAs, then un-pre-multiply vol_alpha = tf.clip_by_value(vol[Ellipsis, -1:], 0.0, 1.0) vol_channels_p = vol[Ellipsis, :-1] * vol_alpha vol_p = tf.concat([vol_channels_p, vol_alpha], axis=-1) resampled_p = sampling.trilerp_gather(vol_p, sphere_coords_inds) resampled_alpha = resampled_p[Ellipsis, -1:] resampled_channels = resampled_p[Ellipsis, :-1] / (resampled_alpha + 1e-8) resampled = tf.concat([resampled_channels, resampled_alpha], axis=-1) return resampled, r_vals
def run_sobel(logdir, verbose=False): """Run a Sobel edge detection demonstration. See the summary description for more details. Arguments: logdir: Directory into which to write event logs. verbose: Boolean; whether to log any output. """ if verbose: logger.info("--- Starting run: sobel") tf.reset_default_graph() tf.set_random_seed(0) image = get_image(verbose=verbose) kernel_radius = tf.placeholder(shape=(), dtype=tf.int32) with tf.name_scope("horizontal_kernel"): kernel_side_length = kernel_radius * 2 + 1 # Drop off influence for pixels further away from the center. weighting_kernel = 1.0 - tf.abs( tf.linspace(-1.0, 1.0, num=kernel_side_length)) differentiation_kernel = tf.linspace(-1.0, 1.0, num=kernel_side_length) horizontal_kernel = tf.matmul( tf.expand_dims(weighting_kernel, 1), tf.expand_dims(differentiation_kernel, 0), ) with tf.name_scope("vertical_kernel"): vertical_kernel = tf.transpose(a=horizontal_kernel) float_image = tf.cast(image, tf.float32) dx = convolve(float_image, horizontal_kernel, name="convolve_dx") dy = convolve(float_image, vertical_kernel, name="convolve_dy") gradient_magnitude = tf.norm(tensor=[dx, dy], axis=0, name="gradient_magnitude") with tf.name_scope("normalized_gradient"): normalized_gradient = gradient_magnitude / tf.reduce_max( input_tensor=gradient_magnitude) with tf.name_scope("output_image"): output_image = tf.cast(255 * normalized_gradient, tf.uint8) summ = image_summary.op( "sobel", tf.stack([output_image]), display_name="Sobel edge detection", description=( "Demonstration of [Sobel edge detection]. The step " "parameter adjusts the radius of the kernel. " "The kernel can be of arbitrary size, and considers " "nearby pixels with \u2113\u2082-linear falloff.\n\n" # (that says ``$\ell_2$-linear falloff'') "Edge detection is done on a per-channel basis, so " "you can observe which edges are “mostly red " "edges,” for instance.\n\n" "For practical edge detection, a small kernel " "(usually not more than more than *r*=2) is best.\n\n" "[Sobel edge detection]: %s\n\n" "%s" % ("https://en.wikipedia.org/wiki/Sobel_operator", IMAGE_CREDIT)), ) with tf.Session() as sess: sess.run(image.initializer) writer = tf.summary.FileWriter(os.path.join(logdir, "sobel")) writer.add_graph(sess.graph) for step in xrange(8): if verbose: logger.info("--- sobel: step: %s" % step) feed_dict = {kernel_radius: step} run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() s = sess.run( summ, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata, ) writer.add_summary(s, global_step=step) writer.add_run_metadata(run_metadata, "step_%04d" % step) writer.close()
def _get_pixel_grid(axis, width): """Returns an array of length `width` containing pixel coordinates.""" if axis == Axis.x: return tf.linspace(-1.0, 1.0, width) # Left is negative, right is positive. elif axis == Axis.y: return tf.linspace(1.0, -1.0, width) # Top is positive, bottom is negative.