def get_reward(self): """ Get reward based on current state :return: Reward array """ # Goal proximity. rew = ivy.exp(-0.5 * ivy.reduce_sum((self.xy - self.goal_xy)**2, -1)) # Urchins proximity. rew = rew * ivy.reduce_prod( 1 - ivy.exp(-30 * ivy.reduce_sum( (self.xy - self.urchin_xys)**2, -1)), -1) return ivy.reshape(rew, (1, ))
def get_reward(self): """ Get reward based on current state :return: Reward array """ # Goal proximity. return ivy.reshape(ivy.exp(-5 * ((self.x - self.goal_x) ** 2)), (1,))
def get_reward(self): """ Get reward based on current state :return: Reward array """ # Center proximity. rew = ivy.exp(-1 * (self.x**2)) # Pole verticality. rew = rew * (ivy.cos(self.angle) + 1) / 2 return ivy.reshape(rew[0], (1, ))
def test_exp(x, dtype_str, tensor_fn, dev_str, call): # smoke test x = tensor_fn(x, dtype_str, dev_str) ret = ivy.exp(x) # type test assert ivy.is_array(ret) # cardinality test assert ret.shape == x.shape # value test assert np.allclose(call(ivy.exp, x), ivy.numpy.exp(ivy.to_numpy(x))) # compilation test helpers.assert_compilable(ivy.exp)
def sampled_volume_density_to_occupancy_probability(density, inter_sample_distance): """ Compute probability of occupancy, given sampled volume densities and their associated inter-sample distances :param density: The sampled density values *[batch_shape]* :type density: array :param inter_sample_distance: The inter-sample distances *[batch_shape]* :type inter_sample_distance: array :return: The occupancy probabilities *[batch_shape]* """ return 1 - ivy.exp(-density * inter_sample_distance)
def _addressing(self, k, beta, g, s, gamma, prev_M, prev_w): # Sec 3.3.1 Focusing by Content # Cosine Similarity k = ivy.expand_dims(k, axis=2) inner_product = ivy.matmul(prev_M, k) k_norm = ivy.reduce_sum(k**2, axis=1, keepdims=True)**0.5 M_norm = ivy.reduce_sum(prev_M**2, axis=2, keepdims=True)**0.5 norm_product = M_norm * k_norm K = ivy.squeeze(inner_product / (norm_product + 1e-8)) # eq (6) # Calculating w^c K_amplified = ivy.exp(ivy.expand_dims(beta, axis=1) * K) w_c = K_amplified / ivy.reduce_sum(K_amplified, axis=1, keepdims=True) # eq (5) if self._addressing_mode == 'content': # Only focus on content return w_c # Sec 3.3.2 Focusing by Location g = ivy.expand_dims(g, axis=1) w_g = g * w_c + (1 - g) * prev_w # eq (7) s = ivy.concatenate([ s[:, :self._shift_range + 1], ivy.zeros( [s.shape[0], self._memory_size - (self._shift_range * 2 + 1)]), s[:, -self._shift_range:] ], axis=1) t = ivy.concatenate([ivy.flip(s, axis=[1]), ivy.flip(s, axis=[1])], axis=1) s_matrix = ivy.stack([ t[:, self._memory_size - i - 1:self._memory_size * 2 - i - 1] for i in range(self._memory_size) ], axis=1) w_ = ivy.reduce_sum(ivy.expand_dims(w_g, axis=1) * s_matrix, axis=2) # eq (8) w_sharpen = w_**ivy.expand_dims(gamma, axis=1) w = w_sharpen / ivy.reduce_sum(w_sharpen, axis=1, keepdims=True) # eq (9) return w
def get_reward(self): """ Get reward based on current state :return: Reward array """ # Goal proximity. x = ivy.reduce_sum(ivy.cos(self.angles), -1) y = ivy.reduce_sum(ivy.sin(self.angles), -1) xy = ivy.concatenate([ivy.expand_dims(x, 0), ivy.expand_dims(y, 0)], axis=0) rew = ivy.reshape( ivy.exp(-1 * ivy.reduce_sum((xy - self.goal_xy)**2, -1)), (-1, )) return ivy.reduce_mean(rew, axis=0, keepdims=True)