def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] x = tf.cast(x, 'int32') y = tf.cast(y, 'int32') w = tf.cast(w, 'int32') h = tf.cast(h, 'int32') rs = tf.image.resize(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = tf.concat(outputs, axis=0) final_output = tf.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) return final_output
def _compute(self, dep_values): logger.log(f"Computing node for layer {self._layer}") if len(dep_values) == 0: # Case 1, zero dependency, this should not occur assert False, f"No dependency for computing the layer {self._layer}, consider deleting it" elif len(dep_values) == 1: # Case 2, single dependency, call it directly return self._layer(dep_values[0]) else: num_outputs = [ len(dep_value) if isinstance(dep_value, (list, tuple)) else 0 for dep_value in dep_values ] num_output = num_outputs[0] assert all([x == num_outputs for x in num_outputs]), \ f"Cannot merge the dependencies since they have different number of outputs, num_outputs={num_outputs}" if num_output == 0: # Case 3, every dependencies generate only a single value, just concat them normally concat_value = tf.keras.layers.concatenate(dep_values, axis=-1, name="Concat") else: # Case 4, every dependencies generate multiple values. We need to concat them one by one # The values have been flattened before send into the layer concat_value = tf.keras.layers.Lambda( lambda values: tuple([ tf.concat(values[i::num_output], axis=-1) for i in range(num_output) ]), name="Concat")(dep_values) return self._layer(concat_value)
def loss(): loss = 0 image_batch, targets_init_batch, targets_time_batch, actions_time_batch, mask_time_batch, dynamic_mask_time_batch = batch representation_batch, value_batch, policy_batch = network.initial_model(np.array(image_batch)) target_value_batch, _, target_policy_batch = zip(*targets_init_batch) mask_policy = list(map(lambda l: bool(l), target_policy_batch)) target_policy_batch = list(filter(lambda l: bool(l), target_policy_batch)) policy_batch = tf.boolean_mask(policy_batch, mask_policy) loss += tf.math.reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size)) loss += tf.math.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch)) for actions_batch, targets_batch, mask, dynamic_mask in zip(actions_time_batch, targets_time_batch, mask_time_batch, dynamic_mask_time_batch): target_value_batch, target_reward_batch, target_policy_batch = zip(*targets_batch) representation_batch = tf.boolean_mask(representation_batch, dynamic_mask) target_value_batch = tf.boolean_mask(target_value_batch, mask) target_reward_batch = tf.boolean_mask(target_reward_batch, mask) actions_batch = tf.one_hot(actions_batch, network.action_size) conditioned_representation_batch = tf.concat((representation_batch, actions_batch), axis=1) representation_batch, reward_batch, value_batch, policy_batch = network.recurrent_model( conditioned_representation_batch) target_policy_batch = [policy for policy, b in zip(target_policy_batch, mask) if b] mask_policy = list(map(lambda l: bool(l), target_policy_batch)) target_policy_batch = tf.convert_to_tensor([policy for policy in target_policy_batch if policy]) policy_batch = tf.boolean_mask(policy_batch, mask_policy) l = (tf.math.reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size)) + MSE(target_reward_batch, tf.squeeze(reward_batch)) + tf.math.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch))) gradient_scale = 1. / len(actions_time_batch) loss += scale_gradient(l, gradient_scale) representation_batch = scale_gradient(representation_batch, 0.5) return loss
def stft_analysis(_input, window, N, H): """ Analysis of a sound using the short-time Fourier transform Inputs: _input: tensor of shape [batch_size, audio_samples] window: analysis window, tensor of shape [N] N: FFT size, Integer H: hop size, Integer Returns: magnitudes, phases: 3D tensor with magnitude and phase spectra of shape [batch_size, co efficients, frames] """ if (H <= 0): raise ValueError("Hop size (H) smaller or equal to 0") if not(is_power2(N)): raise ValueError("FFT size is not a power of 2") _input_shape = tf.shape(_input) pad_size = int(N / 2) with tf.name_scope('STFT_Zero_padding'): zeros_left = tf.zeros(_input_shape)[:, :pad_size] zeros_right = tf.zeros(_input_shape)[:, :pad_size] _input = tf.concat([zeros_left, _input, zeros_right], axis=1) with tf.name_scope('overlapping_slicer'): sliced_input = overlapping_slicer_3D(_input, N, H) _, frames, _ = sliced_input.get_shape() with tf.name_scope('DFT_analysis'): reshaped_sliced_input = tf.reshape(sliced_input, (-1, N)) m, p = dft_analysis(reshaped_sliced_input, window, N) with tf.name_scope('STFT_output_reshape'): magnitudes = tf.reshape(m, (-1, int(m.get_shape()[-1]), int(frames))) phases = tf.reshape(p, (-1, int(p.get_shape()[-1]), int(frames))) return magnitudes, phases
def loss(): loss = 0 image_batch, targets_init_batch, targets_time_batch, actions_time_batch, mask_time_batch, dynamic_mask_time_batch = batch # Initial step, from the real observation: representation + prediction networks representation_batch, value_batch, policy_batch = network.initial_model(np.array(image_batch)) # Only update the element with a policy target target_value_batch, _, target_policy_batch = zip(*targets_init_batch) mask_policy = list(map(lambda l: bool(l), target_policy_batch)) target_policy_batch = list(filter(lambda l: bool(l), target_policy_batch)) policy_batch = boolean_mask(policy_batch, mask_policy) # Compute the loss of the first pass loss += reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size)) loss += reduce_mean( softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch)) # Recurrent steps, from action and previous hidden state. for actions_batch, targets_batch, mask, dynamic_mask in zip(actions_time_batch, targets_time_batch, mask_time_batch, dynamic_mask_time_batch): target_value_batch, target_reward_batch, target_policy_batch = zip(*targets_batch) # Only execute BPTT for elements with an action representation_batch = boolean_mask(representation_batch, dynamic_mask) target_value_batch = boolean_mask(target_value_batch, mask) target_reward_batch = boolean_mask(target_reward_batch, mask) # Creating conditioned_representation: concatenate representations with actions batch actions_batch = one_hot(actions_batch, network.action_size) # TODO: make this reshape dynamic actions_batch = reshape(actions_batch, (actions_batch.shape[0], 6, 3, 1)) paddings = constant([[0, 0], [0, max(0, representation_batch.shape[1] - actions_batch.shape[1])], [0, max(0, representation_batch.shape[2] - actions_batch.shape[2])], [0, 0]]) actions_batch = pad(actions_batch, paddings, "CONSTANT") # Recurrent step from conditioned representation: recurrent + prediction networks conditioned_representation_batch = concat((representation_batch, actions_batch), axis=3) representation_batch, reward_batch, value_batch, policy_batch = network.recurrent_model( conditioned_representation_batch) # Only execute BPTT for elements with a policy target target_policy_batch = [policy for policy, b in zip(target_policy_batch, mask) if b] mask_policy = list(map(lambda l: bool(l), target_policy_batch)) target_policy_batch = convert_to_tensor([policy for policy in target_policy_batch if policy]) policy_batch = boolean_mask(policy_batch, mask_policy) # Compute the partial loss l = (reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size)) + MSE(target_reward_batch, squeeze(reward_batch)) + reduce_mean( softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch))) # Scale the gradient of the loss by the average number of actions unrolled gradient_scale = 1. / len(actions_time_batch) loss += scale_gradient(l, gradient_scale) # Half the gradient of the representation representation_batch = scale_gradient(representation_batch, 0.5) return loss
def net_from_config(model_conf, data_conf): """ Generate a keras network from configuration dict :param model_conf: The global model configuration dictionary :param data_conf: The configuration of the dataset, it might use to initialize some layer like "output-classification" :param train_dataset: The train dataset, used to add input layer based on shape :return: A keras net """ # Get network conf net_conf = model_conf["net"] # Input layer transform_confs = model_conf["dataset"].get("train_transforms", []) # Get the shape of the dataset, first check whether we have clip-feature layer in the dataset, if not, we # use the feature size in the dataset configuration feature_size = None for transform_conf in transform_confs[::-1]: if type(transform_conf) is dict and transform_conf.get( "name") == "clip-feature": feature_size = transform_conf["c"] logger.log("Get feature_size={} from model configuration".format( feature_size)) if feature_size is None: feature_size = data_conf.get("feature_size") logger.log("Get feature_size={} from dataset configuration".format( feature_size)) assert feature_size is not None, "Cannot determine the feature_size" # Get the point size, if possible point_count = data_conf.get("point_count") for transform_conf in transform_confs[::-1]: if type(transform_conf) is dict and transform_conf.get( "name") == "sampling": point_count = None logger.log( "Ignore point_count since we have transform sampling from dataset" ) # input_layer = tf.keras.layers.InputLayer(input_shape=(point_count, feature_size)) # Extend feature layer if "extend_feature" in net_conf: logger.log( "\"extend_feature\" is deprecated, use \"input-feature-extend\" layer instead", color="yellow") inputs = tf.keras.Input(shape=(point_count, feature_size)) if net_conf["structure"] == "sequence": xyz_points_list = [[inputs[..., :3], inputs[..., 3:]]] # process SA layers for idx in range(4): layer_conf = net_conf["layers"][idx] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) output = layer(xyz_points_list[-1][0], xyz_points_list[-1][1]) xyz_points_list.append([output[0], output[1]]) sem_list = [xyz_points_list[-1][1]] # process FP layers for idx in range(4, 8): layer_conf = net_conf["layers"][idx] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) output = layer(xyz_points_list[7 - idx][0], xyz_points_list[8 - idx][0], xyz_points_list[7 - idx][1], sem_list[-1]) sem_list.append(output) layer_conf = net_conf["layers"][8] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) net_sem = layer(sem_list[-1]) layer_conf = net_conf["layers"][9] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) net_sem_cache = layer(sem_list[-1]) ins_list = [xyz_points_list[-1][1]] # process FP layers for idx in range(10, 14): layer_conf = net_conf["layers"][idx] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) output = layer(xyz_points_list[7 - idx][0], xyz_points_list[8 - idx][0], xyz_points_list[7 - idx][1], ins_list[-1]) ins_list.append(output) layer_conf = net_conf["layers"][14] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) net_ins = layer(ins_list[-1]) net_ins = net_ins + net_sem_cache for idx in range(15, 17): layer_conf = net_conf["layers"][idx] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) net_ins = layer(net_ins) layer_conf = net_conf["layers"][17] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) adj_matrix = layer(net_ins) layer_conf = net_conf["layers"][18] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) nn_idx = layer(adj_matrix) layer_conf = net_conf["layers"][19] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) net_sem = layer(net_sem, nn_idx) for idx in range(20, 22): layer_conf = net_conf["layers"][idx] logger.log(f"In constructing: {layer_conf}") layer = layer_from_config(layer_conf, model_conf, data_conf) net_sem = layer(net_sem) # concatenate two output tensors # semantics label first outputs = tf.concat([net_sem, net_ins], -1) return tf.keras.Model(inputs=inputs, outputs=outputs) else: assert False, "\"{}\" is currently not supported".format( net_conf["structure"])
def dft_analysis(_input, window, N): """ Analysis of a signal using the discrete Fourier transform inputs: _input: tensor of shape [batch_size, N] window: analysis window, tensor of shape [N] N: FFT size returns: Tensors m, p: magnitude and phase spectrum of _input m of shape [batch_size, num_coefficients] p of shape [batch_size, num_coefficients] """ if not(is_power2(N)): raise ValueError("FFT size is not a power of 2") _, input_length = _input.get_shape() _input_shape = tf.shape(_input) if (int(input_length) > N): raise ValueError("Input length is greater than FFT size") if (int(window.get_shape()[0]) != N): raise ValueError("Window length is different from FFT size") if int(input_length) < N: with tf.name_scope('DFT_Zero_padding'): zeros_left = tf.zeros(_input_shape)[ :, :int((N - (int(input_length))+1) / 2)] zeros_right = tf.zeros(_input_shape)[ :, :int((N - (int(input_length))) / 2)] _input = tf.concat([zeros_left, _input, zeros_right], axis=1) assert(int(_input.get_shape()[1]) == N) positive_spectrum_size = int(N/2) + 1 with tf.name_scope('Windowing'): window_norm = tf.math.divide(window, tf.math.reduce_sum(window)) # window the input windowed_input = tf.math.multiply(_input, window_norm) with tf.name_scope('Zero_phase_padding'): # zero-phase window in fftbuffer fftbuffer_left = tf.slice(windowed_input, [0, int(N/2)], [-1, -1]) fftbuffer_right = tf.slice(windowed_input, [0, 0], [-1, int(N/2)]) fftbuffer = tf.concat([fftbuffer_left, fftbuffer_right], axis=1) fft = tf.signal.rfft(fftbuffer) with tf.name_scope('Slice_positive_side'): sliced_fft = tf.slice(fft, [0, 0], [-1, positive_spectrum_size]) with tf.name_scope('Magnitude'): # compute absolute value of positive side abs_fft = tf.abs(sliced_fft) # magnitude spectrum of positive frequencies in dB magnitude = 20 * log10(tf.maximum(abs_fft, 1E-06)) with tf.name_scope('Phase'): # phase of positive frequencies phase = angle(sliced_fft) return magnitude, phase