コード例 #1
0
ファイル: nets.py プロジェクト: OrangeBai/C3DLab
    def call(self, x, mask=None):

        assert (len(x) == 2)

        img = x[0]
        rois = x[1]

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            x = tf.cast(x, 'int32')
            y = tf.cast(y, 'int32')
            w = tf.cast(w, 'int32')
            h = tf.cast(h, 'int32')

            rs = tf.image.resize(img[:, y:y + h, x:x + w, :],
                                 (self.pool_size, self.pool_size))
            outputs.append(rs)

        final_output = tf.concat(outputs, axis=0)
        final_output = tf.reshape(final_output,
                                  (1, self.num_rois, self.pool_size,
                                   self.pool_size, self.nb_channels))

        return final_output
コード例 #2
0
    def _compute(self, dep_values):
        logger.log(f"Computing node for layer {self._layer}")
        if len(dep_values) == 0:
            # Case 1, zero dependency, this should not occur
            assert False, f"No dependency for computing the layer {self._layer}, consider deleting it"
        elif len(dep_values) == 1:
            # Case 2, single dependency, call it directly
            return self._layer(dep_values[0])
        else:
            num_outputs = [
                len(dep_value) if isinstance(dep_value, (list, tuple)) else 0
                for dep_value in dep_values
            ]
            num_output = num_outputs[0]
            assert all([x == num_outputs for x in num_outputs]), \
                f"Cannot merge the dependencies since they have different number of outputs, num_outputs={num_outputs}"

            if num_output == 0:
                # Case 3, every dependencies generate only a single value, just concat them normally
                concat_value = tf.keras.layers.concatenate(dep_values,
                                                           axis=-1,
                                                           name="Concat")
            else:
                # Case 4, every dependencies generate multiple values. We need to concat them one by one
                # The values have been flattened before send into the layer
                concat_value = tf.keras.layers.Lambda(
                    lambda values: tuple([
                        tf.concat(values[i::num_output], axis=-1)
                        for i in range(num_output)
                    ]),
                    name="Concat")(dep_values)

            return self._layer(concat_value)
コード例 #3
0
    def loss():
        loss = 0
        image_batch, targets_init_batch, targets_time_batch, actions_time_batch, mask_time_batch, dynamic_mask_time_batch = batch

        representation_batch, value_batch, policy_batch = network.initial_model(np.array(image_batch))

        target_value_batch, _, target_policy_batch = zip(*targets_init_batch)
        mask_policy = list(map(lambda l: bool(l), target_policy_batch))
        target_policy_batch = list(filter(lambda l: bool(l), target_policy_batch))
        policy_batch = tf.boolean_mask(policy_batch, mask_policy)

        loss += tf.math.reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size))
        loss += tf.math.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch))

        for actions_batch, targets_batch, mask, dynamic_mask in zip(actions_time_batch, targets_time_batch,
                                                                    mask_time_batch, dynamic_mask_time_batch):
            target_value_batch, target_reward_batch, target_policy_batch = zip(*targets_batch)

            representation_batch = tf.boolean_mask(representation_batch, dynamic_mask)
            target_value_batch = tf.boolean_mask(target_value_batch, mask)
            target_reward_batch = tf.boolean_mask(target_reward_batch, mask)
            actions_batch = tf.one_hot(actions_batch, network.action_size)

            conditioned_representation_batch = tf.concat((representation_batch, actions_batch), axis=1)
            representation_batch, reward_batch, value_batch, policy_batch = network.recurrent_model(
                conditioned_representation_batch)

            target_policy_batch = [policy for policy, b in zip(target_policy_batch, mask) if b]
            mask_policy = list(map(lambda l: bool(l), target_policy_batch))
            target_policy_batch = tf.convert_to_tensor([policy for policy in target_policy_batch if policy])
            policy_batch = tf.boolean_mask(policy_batch, mask_policy)

            l = (tf.math.reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size)) +
                 MSE(target_reward_batch, tf.squeeze(reward_batch)) +
                 tf.math.reduce_mean(
                     tf.nn.softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch)))

            gradient_scale = 1. / len(actions_time_batch)
            loss += scale_gradient(l, gradient_scale)

            representation_batch = scale_gradient(representation_batch, 0.5)

        return loss
コード例 #4
0
def stft_analysis(_input, window, N, H):
    """
    Analysis of a sound using the short-time Fourier transform
    Inputs:
    _input: tensor of shape [batch_size, audio_samples]
    window: analysis window, tensor of shape [N]
    N: FFT size, Integer
    H: hop size, Integer
    Returns:
    magnitudes, phases: 3D tensor with magnitude and phase spectra of shape
    [batch_size, co						efficients, frames]
    """
    if (H <= 0):
        raise ValueError("Hop size (H) smaller or equal to 0")
    if not(is_power2(N)):
        raise ValueError("FFT size is not a power of 2")

    _input_shape = tf.shape(_input)
    pad_size = int(N / 2)
    with tf.name_scope('STFT_Zero_padding'):
        zeros_left = tf.zeros(_input_shape)[:, :pad_size]
        zeros_right = tf.zeros(_input_shape)[:, :pad_size]
        _input = tf.concat([zeros_left, _input, zeros_right], axis=1)

    with tf.name_scope('overlapping_slicer'):
        sliced_input = overlapping_slicer_3D(_input, N, H)
    _, frames, _ = sliced_input.get_shape()

    with tf.name_scope('DFT_analysis'):
        reshaped_sliced_input = tf.reshape(sliced_input, (-1, N))
        m, p = dft_analysis(reshaped_sliced_input, window, N)

    with tf.name_scope('STFT_output_reshape'):
        magnitudes = tf.reshape(m, (-1, int(m.get_shape()[-1]), int(frames)))
        phases = tf.reshape(p, (-1, int(p.get_shape()[-1]), int(frames)))

    return magnitudes, phases
コード例 #5
0
ファイル: training.py プロジェクト: zpear/FastCentipede
    def loss():
        loss = 0
        image_batch, targets_init_batch, targets_time_batch, actions_time_batch, mask_time_batch, dynamic_mask_time_batch = batch

        # Initial step, from the real observation: representation + prediction networks
        representation_batch, value_batch, policy_batch = network.initial_model(np.array(image_batch))

        # Only update the element with a policy target
        target_value_batch, _, target_policy_batch = zip(*targets_init_batch)
        mask_policy = list(map(lambda l: bool(l), target_policy_batch))
        target_policy_batch = list(filter(lambda l: bool(l), target_policy_batch))
        policy_batch = boolean_mask(policy_batch, mask_policy)

        # Compute the loss of the first pass
        loss += reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size))
        loss += reduce_mean(
            softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch))

        # Recurrent steps, from action and previous hidden state.
        for actions_batch, targets_batch, mask, dynamic_mask in zip(actions_time_batch, targets_time_batch,
                                                                    mask_time_batch, dynamic_mask_time_batch):
            target_value_batch, target_reward_batch, target_policy_batch = zip(*targets_batch)

            # Only execute BPTT for elements with an action
            representation_batch = boolean_mask(representation_batch, dynamic_mask)
            target_value_batch = boolean_mask(target_value_batch, mask)
            target_reward_batch = boolean_mask(target_reward_batch, mask)

            # Creating conditioned_representation: concatenate representations with actions batch
            actions_batch = one_hot(actions_batch, network.action_size)

            # TODO: make this reshape dynamic
            actions_batch = reshape(actions_batch, (actions_batch.shape[0], 6, 3, 1))

            paddings = constant([[0, 0],
                                    [0, max(0, representation_batch.shape[1] - actions_batch.shape[1])],
                                    [0, max(0, representation_batch.shape[2] - actions_batch.shape[2])],
                                    [0, 0]])
            actions_batch = pad(actions_batch, paddings, "CONSTANT")

            # Recurrent step from conditioned representation: recurrent + prediction networks
            conditioned_representation_batch = concat((representation_batch, actions_batch), axis=3)
            representation_batch, reward_batch, value_batch, policy_batch = network.recurrent_model(
                conditioned_representation_batch)

            # Only execute BPTT for elements with a policy target
            target_policy_batch = [policy for policy, b in zip(target_policy_batch, mask) if b]
            mask_policy = list(map(lambda l: bool(l), target_policy_batch))
            target_policy_batch = convert_to_tensor([policy for policy in target_policy_batch if policy])
            policy_batch = boolean_mask(policy_batch, mask_policy)

            # Compute the partial loss
            l = (reduce_mean(loss_value(target_value_batch, value_batch, network.value_support_size)) +
                 MSE(target_reward_batch, squeeze(reward_batch)) +
                 reduce_mean(
                     softmax_cross_entropy_with_logits(logits=policy_batch, labels=target_policy_batch)))

            # Scale the gradient of the loss by the average number of actions unrolled
            gradient_scale = 1. / len(actions_time_batch)
            loss += scale_gradient(l, gradient_scale)

            # Half the gradient of the representation
            representation_batch = scale_gradient(representation_batch, 0.5)

        return loss
コード例 #6
0
def net_from_config(model_conf, data_conf):
    """
    Generate a keras network from configuration dict
    :param model_conf: The global model configuration dictionary
    :param data_conf: The configuration of the dataset, it might use to initialize some layer like
    "output-classification"
    :param train_dataset: The train dataset, used to add input layer based on shape
    :return: A keras net
    """
    # Get network conf
    net_conf = model_conf["net"]

    # Input layer
    transform_confs = model_conf["dataset"].get("train_transforms", [])
    # Get the shape of the dataset, first check whether we have clip-feature layer in the dataset, if not, we
    # use the feature size in the dataset configuration
    feature_size = None
    for transform_conf in transform_confs[::-1]:
        if type(transform_conf) is dict and transform_conf.get(
                "name") == "clip-feature":
            feature_size = transform_conf["c"]
            logger.log("Get feature_size={} from model configuration".format(
                feature_size))
    if feature_size is None:
        feature_size = data_conf.get("feature_size")
        logger.log("Get feature_size={} from dataset configuration".format(
            feature_size))
    assert feature_size is not None, "Cannot determine the feature_size"
    # Get the point size, if possible
    point_count = data_conf.get("point_count")
    for transform_conf in transform_confs[::-1]:
        if type(transform_conf) is dict and transform_conf.get(
                "name") == "sampling":
            point_count = None
            logger.log(
                "Ignore point_count since we have transform sampling from dataset"
            )
    # input_layer = tf.keras.layers.InputLayer(input_shape=(point_count, feature_size))

    # Extend feature layer
    if "extend_feature" in net_conf:
        logger.log(
            "\"extend_feature\" is deprecated, use \"input-feature-extend\" layer instead",
            color="yellow")

    inputs = tf.keras.Input(shape=(point_count, feature_size))
    if net_conf["structure"] == "sequence":

        xyz_points_list = [[inputs[..., :3], inputs[..., 3:]]]

        # process SA layers
        for idx in range(4):
            layer_conf = net_conf["layers"][idx]
            logger.log(f"In constructing: {layer_conf}")
            layer = layer_from_config(layer_conf, model_conf, data_conf)
            output = layer(xyz_points_list[-1][0], xyz_points_list[-1][1])
            xyz_points_list.append([output[0], output[1]])

        sem_list = [xyz_points_list[-1][1]]

        # process FP layers
        for idx in range(4, 8):
            layer_conf = net_conf["layers"][idx]
            logger.log(f"In constructing: {layer_conf}")
            layer = layer_from_config(layer_conf, model_conf, data_conf)
            output = layer(xyz_points_list[7 - idx][0],
                           xyz_points_list[8 - idx][0],
                           xyz_points_list[7 - idx][1], sem_list[-1])
            sem_list.append(output)

        layer_conf = net_conf["layers"][8]
        logger.log(f"In constructing: {layer_conf}")
        layer = layer_from_config(layer_conf, model_conf, data_conf)
        net_sem = layer(sem_list[-1])

        layer_conf = net_conf["layers"][9]
        logger.log(f"In constructing: {layer_conf}")
        layer = layer_from_config(layer_conf, model_conf, data_conf)
        net_sem_cache = layer(sem_list[-1])

        ins_list = [xyz_points_list[-1][1]]

        # process FP layers
        for idx in range(10, 14):
            layer_conf = net_conf["layers"][idx]
            logger.log(f"In constructing: {layer_conf}")
            layer = layer_from_config(layer_conf, model_conf, data_conf)
            output = layer(xyz_points_list[7 - idx][0],
                           xyz_points_list[8 - idx][0],
                           xyz_points_list[7 - idx][1], ins_list[-1])
            ins_list.append(output)

        layer_conf = net_conf["layers"][14]
        logger.log(f"In constructing: {layer_conf}")
        layer = layer_from_config(layer_conf, model_conf, data_conf)
        net_ins = layer(ins_list[-1])

        net_ins = net_ins + net_sem_cache

        for idx in range(15, 17):
            layer_conf = net_conf["layers"][idx]
            logger.log(f"In constructing: {layer_conf}")
            layer = layer_from_config(layer_conf, model_conf, data_conf)
            net_ins = layer(net_ins)

        layer_conf = net_conf["layers"][17]
        logger.log(f"In constructing: {layer_conf}")
        layer = layer_from_config(layer_conf, model_conf, data_conf)
        adj_matrix = layer(net_ins)

        layer_conf = net_conf["layers"][18]
        logger.log(f"In constructing: {layer_conf}")
        layer = layer_from_config(layer_conf, model_conf, data_conf)
        nn_idx = layer(adj_matrix)

        layer_conf = net_conf["layers"][19]
        logger.log(f"In constructing: {layer_conf}")
        layer = layer_from_config(layer_conf, model_conf, data_conf)
        net_sem = layer(net_sem, nn_idx)

        for idx in range(20, 22):
            layer_conf = net_conf["layers"][idx]
            logger.log(f"In constructing: {layer_conf}")
            layer = layer_from_config(layer_conf, model_conf, data_conf)
            net_sem = layer(net_sem)

        # concatenate two output tensors
        # semantics label first
        outputs = tf.concat([net_sem, net_ins], -1)

        return tf.keras.Model(inputs=inputs, outputs=outputs)
    else:
        assert False, "\"{}\" is currently not supported".format(
            net_conf["structure"])
コード例 #7
0
def dft_analysis(_input, window, N):
    """
    Analysis of a signal using the discrete Fourier transform
    inputs:
    _input: tensor of shape [batch_size, N]
    window: analysis window, tensor of shape [N]
    N: FFT size
    returns:
    Tensors m, p: magnitude and phase spectrum of _input
    m of shape [batch_size, num_coefficients]
    p of shape [batch_size, num_coefficients]
    """

    if not(is_power2(N)):
        raise ValueError("FFT size is not a power of 2")

    _, input_length = _input.get_shape()
    _input_shape = tf.shape(_input)

    if (int(input_length) > N):
        raise ValueError("Input length is greater than FFT size")

    if (int(window.get_shape()[0]) != N):
        raise ValueError("Window length is different from FFT size")
    if int(input_length) < N:
        with tf.name_scope('DFT_Zero_padding'):
            zeros_left = tf.zeros(_input_shape)[
                :, :int((N - (int(input_length))+1) / 2)]
            zeros_right = tf.zeros(_input_shape)[
                :, :int((N - (int(input_length))) / 2)]
            _input = tf.concat([zeros_left, _input, zeros_right], axis=1)
            assert(int(_input.get_shape()[1]) == N)

    positive_spectrum_size = int(N/2) + 1
    with tf.name_scope('Windowing'):
        window_norm = tf.math.divide(window, tf.math.reduce_sum(window))
        # window the input
        windowed_input = tf.math.multiply(_input, window_norm)

    with tf.name_scope('Zero_phase_padding'):
        # zero-phase window in fftbuffer
        fftbuffer_left = tf.slice(windowed_input, [0, int(N/2)], [-1, -1])
        fftbuffer_right = tf.slice(windowed_input, [0, 0],   [-1, int(N/2)])
        fftbuffer = tf.concat([fftbuffer_left, fftbuffer_right], axis=1)
        fft = tf.signal.rfft(fftbuffer)

    with tf.name_scope('Slice_positive_side'):
        sliced_fft = tf.slice(fft, [0, 0], [-1, positive_spectrum_size])

    with tf.name_scope('Magnitude'):
        # compute absolute value of positive side
        abs_fft = tf.abs(sliced_fft)

        # magnitude spectrum of positive frequencies in dB
        magnitude = 20 * log10(tf.maximum(abs_fft, 1E-06))

    with tf.name_scope('Phase'):
        # phase of positive frequencies
        phase = angle(sliced_fft)

    return magnitude, phase