Ejemplo n.º 1
0
def epoch_predict(env, args, model, loader):
    """Predict in one epoch"""
    model.eval()

    arcs, rels, probs = [], [], []
    for words, feats in loader():
        # ignore the first token of each sentence
        tmp_words = layers.pad(words[:, 1:],
                               paddings=[0, 0, 1, 0],
                               pad_value=args.pad_index)
        mask = tmp_words != args.pad_index
        lens = nn.reduce_sum(mask, -1)
        s_arc, s_rel = model(words, feats)
        arc_preds, rel_preds = decode(args, s_arc, s_rel, mask)
        arcs.extend(
            layers.split(nn.masked_select(arc_preds, mask),
                         lens.numpy().tolist()))
        rels.extend(
            layers.split(nn.masked_select(rel_preds, mask),
                         lens.numpy().tolist()))
        if args.prob:
            arc_probs = nn.index_sample(layers.softmax(s_arc, -1),
                                        layers.unsqueeze(arc_preds, -1))
            probs.extend(
                layers.split(
                    nn.masked_select(layers.squeeze(arc_probs, axes=[-1]),
                                     mask),
                    lens.numpy().tolist()))
    arcs = [seq.numpy().tolist() for seq in arcs]
    rels = [env.REL.vocab[seq.numpy().tolist()] for seq in rels]
    probs = [[round(p, 3) for p in seq.numpy().tolist()] for seq in probs]

    return arcs, rels, probs
Ejemplo n.º 2
0
    def forward(self, input, pre_encode_hidden):
        #print('Im here!')
        #print(input.shape)
        pre_hidden, encode_hidden = layers.split(pre_encode_hidden,
                                                 num_or_sections=[self._hiden_size, self._encode_hiden_size],
                                                 dim=1)
        concat_input_hidden = layers.concat([input, pre_hidden, encode_hidden], 1)

        gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight)

        gate_input = layers.elementwise_add(gate_input, self._gate_bias)

        gate_input = self._gate_activation(gate_input)
        r, u = layers.split(gate_input, num_or_sections=2, dim=1)

        r_hidden = r * pre_hidden

        candidate = layers.matmul(
            layers.concat([input, r_hidden, encode_hidden], 1), self._candidate_weight)
        candidate = layers.elementwise_add(candidate, self._candidate_bias)

        c = self._activation(candidate)
        new_hidden = u * pre_hidden + (1 - u) * c

        return new_hidden
Ejemplo n.º 3
0
    def forward(self, x, y, **kargs):
        """
        Adaptive Normalization forward.

        Args:
            x (N x C1 x *): input, 
            y (N x C2): Conditional information.
        Returns:
            out (N x c1 x *): output
        """
        residual_dim = len(x.shape) - len(y.shape)
        if self.projection:
            if self.separate_projection:
                gamma = self.fc_gamma(y)
                beta = self.fc_beta(y)
                for _ in range(residual_dim):
                    gamma = L.unsqueeze(gamma, -1)
                    beta = L.unsqueeze(beta, -1)
            else:
                y = self.fc(x)
                for _ in range(residual_dim):
                    y = L.unsqueeze(y, -1)
                gamma, beta = L.split(y, num_or_sections=2, dim=1)
        else:
            for _ in range(residual_dim):
                y = L.unsqueeze(y, -1)
            gamma, beta = L.split(y, 2, 1)
        
        x = self.norm(x) if self.norm is not None else x
        out = x * (1 + gamma) + beta
        return out
Ejemplo n.º 4
0
    def seq2seq_api_rnn(input_embedding,
                        len=3,
                        init_hiddens=None,
                        init_cells=None):
        class EncoderCell(layers.RNNCell):
            def __init__(self,
                         num_layers,
                         hidden_size,
                         dropout_prob=0.,
                         forget_bias=0.):
                self.num_layers = num_layers
                self.hidden_size = hidden_size
                self.dropout_prob = dropout_prob
                self.lstm_cells = []
                for i in range(num_layers):
                    self.lstm_cells.append(
                        layers.LSTMCell(
                            hidden_size,
                            forget_bias=forget_bias,
                            param_attr=fluid.ParamAttr(
                                initializer=fluid.initializer.
                                UniformInitializer(low=-init_scale,
                                                   high=init_scale))))

            def call(self, step_input, states):
                new_states = []
                for i in range(self.num_layers):
                    out, new_state = self.lstm_cells[i](step_input, states[i])
                    step_input = layers.dropout(
                        out,
                        self.dropout_prob,
                        dropout_implementation='upscale_in_train'
                    ) if self.dropout_prob > 0 else out
                    new_states.append(new_state)
                return step_input, new_states

        cell = EncoderCell(num_layers, hidden_size, dropout)
        output, new_states = layers.rnn(
            cell,
            inputs=input_embedding,
            initial_states=[[hidden, cell] for hidden, cell in zip([
                layers.reshape(init_hidden, shape=[-1, hidden_size])
                for init_hidden in layers.split(
                    init_hiddens, num_or_sections=num_layers, dim=0)
            ], [
                layers.reshape(init_cell, shape=[-1, hidden_size])
                for init_cell in layers.split(
                    init_cells, num_or_sections=num_layers, dim=0)
            ])],
            time_major=False)
        last_hidden = layers.stack([hidden for hidden, _ in new_states], 0)
        last_cell = layers.stack([cell for _, cell in new_states], 0)
        return output, last_hidden, last_cell
Ejemplo n.º 5
0
    def forward(self, z, condition=None):
        """Transform a random noise sampled from a standard Gaussian distribution into sample from the target distribution. And output the mean and log standard deviation of the output distribution.

        Args:
            z (Variable): shape(B, T), random noise sampled from a standard gaussian disribution.
            condition (Variable, optional): shape(B, F, T), dtype float, the upsampled condition. Defaults to None.

        Returns:
            (z, out_mu, out_log_std)
            z (Variable): shape(B, T), dtype float, transformed noise, it is the synthesized waveform.
            out_mu (Variable): shape(B, T), dtype float, means of the output distributions.
            out_log_std (Variable): shape(B, T), dtype float, log standard deviations of the output distributions.
        """
        for i, flow in enumerate(self.flows):
            theta = flow(z, condition)  # w, mu, log_std [0: T]
            w, mu, log_std = F.split(theta, 3, dim=-1)  # (B, T, 1) for each
            mu = F.squeeze(mu, [-1])  #[0: T]
            log_std = F.squeeze(log_std, [-1])  #[0: T]
            z = z * F.exp(log_std) + mu  #[0: T]

            if i == 0:
                out_mu = mu
                out_log_std = log_std
            else:
                out_mu = out_mu * F.exp(log_std) + mu
                out_log_std += log_std

        return z, out_mu, out_log_std
Ejemplo n.º 6
0
    def add_input(self, x_t, speaker_embed=None):
        """
        Takes a step of inputs and return a step of outputs. It works similarily with the `forward` method, but in a `step-in-step-out` fashion.

        Args:
            x_t (Variable): shape(B, C_in, T=1), dtype float32, the input of Conv1DGLU layer, where B means batch_size, C_in means the input channels.
            speaker_embed (Variable): Shape(B, C_sp), dtype float32, speaker embed, where C_sp means speaker embedding size. 

        Returns:
            x (Variable): shape(B, C_out), the output of Conv1DGLU, where C_out means the `num_filter`.
        """
        residual = x_t
        x_t = F.dropout(x_t,
                        self.dropout,
                        dropout_implementation="upscale_in_train")
        x_t = self.conv.add_input(x_t)
        content_t, gate_t = F.split(x_t, num_or_sections=2, dim=1)

        if speaker_embed is not None:
            sp = F.softsign(self.fc(speaker_embed))
            content_t = F.elementwise_add(content_t, sp, axis=0)

        # glu
        x_t = F.sigmoid(gate_t) * content_t

        if self.residual:
            x_t = F.scale(x_t + residual, np.sqrt(0.5))
        return x_t
Ejemplo n.º 7
0
    def forward(self, x, condition=None):
        """Conv1D gated-tanh Block.

        Args:
            x (Variable): shape(B, C_res, T), the input. (B stands for batch_size, C_res stands for residual channels, T stands for time steps.) dtype float32.
            condition (Variable, optional): shape(B, C_cond, T), the condition, it has been upsampled in time steps, so it has the same time steps as the input does.(C_cond stands for the condition's channels). Defaults to None.

        Returns:
            (residual, skip_connection)
            residual (Variable): shape(B, C_res, T), the residual, which is used as the input to the next layer of ResidualBlock.
            skip_connection (Variable): shape(B, C_res, T), the skip connection. This output is accumulated with that of other ResidualBlocks. 
        """
        time_steps = x.shape[-1]
        h = x

        # dilated conv
        h = self.conv(h)
        if h.shape[-1] != time_steps:
            h = h[:, :, :time_steps]

        # condition
        if condition is not None:
            h += self.condition_proj(condition)

        # gated tanh
        content, gate = F.split(h, 2, dim=1)
        z = F.sigmoid(gate) * F.tanh(content)

        # projection
        residual = F.scale(z + x, math.sqrt(.5))
        skip_connection = z
        return residual, skip_connection
Ejemplo n.º 8
0
    def forward(self, x, speaker_embed=None):
        """
        Args:
            x (Variable): shape(B, C_in, T), dtype float32, the input of Conv1DGLU layer, where B means batch_size, C_in means the input channels T means input time steps.
            speaker_embed (Variable): shape(B, C_sp), dtype float32, speaker embed, where C_sp means speaker embedding size.

        Returns:
            x (Variable): shape(B, C_out, T), the output of Conv1DGLU, where
                C_out means the `num_filters`.
        """
        residual = x
        x = F.dropout(x,
                      self.dropout,
                      dropout_implementation="upscale_in_train")
        x = self.conv(x)
        content, gate = F.split(x, num_or_sections=2, dim=1)

        if speaker_embed is not None:
            sp = F.softsign(self.fc(speaker_embed))
            content = F.elementwise_add(content, sp, axis=0)

        # glu
        x = F.sigmoid(gate) * content

        if self.residual:
            x = F.scale(x + residual, np.sqrt(0.5))
        return x
Ejemplo n.º 9
0
    def forward(self, input, bias=None, padding=None):
        """
        input: input feature (B, T, C)
        padding: only used when using causal conv, we pad mannually
        """
        input_dropped = F.dropout(input,
                                  1. - self.keep_prob,
                                  dropout_implementation="upscale_in_train")
        if self.causal:
            assert padding is not None
            input_dropped = F.concat([padding, input_dropped], axis=1)
        hidden = self.conv(input_dropped)

        if self.has_bias:
            assert bias is not None
            transformed_bias = F.softsign(self.bias_affine(bias))
            hidden_embedded = hidden + F.unsqueeze(transformed_bias, [1])
        else:
            hidden_embedded = hidden

        # glu
        content, gate = F.split(hidden, num_or_sections=2, dim=-1)
        content = hidden_embedded[:, :, :self.in_channel]
        hidden = F.sigmoid(gate) * content

        # # residual
        hidden = F.scale(input + hidden, math.sqrt(0.5))
        return hidden
Ejemplo n.º 10
0
    def add_input(self, x, condition=None):
        """Add a step input. This method works similarily with `forward` but in a `step-in-step-out` fashion.

        Args:
            x (Variable): shape(B, C_res, T=1), input for a step, dtype float32.
            condition (Variable, optional): shape(B, C_cond, T=1). condition for a step, dtype float32. Defaults to None.

        Returns:
            (residual, skip_connection)
            residual (Variable): shape(B, C_res, T=1), the residual for a step, which is used as the input to the next layer of ResidualBlock.
            skip_connection (Variable): shape(B, C_res, T=1), the skip connection for a step. This output is accumulated with that of other ResidualBlocks. 
        """
        h = x

        # dilated conv
        h = self.conv.add_input(h)

        # condition
        if condition is not None:
            h += self.condition_proj(condition)

        # gated tanh
        content, gate = F.split(h, 2, dim=1)
        z = F.sigmoid(gate) * F.tanh(content)

        # projection
        residual = F.scale(z + x, np.sqrt(0.5))
        skip_connection = z
        return residual, skip_connection
Ejemplo n.º 11
0
    def sample_from_mog(self, y):
        """Sample from the output distribution where the output distribution is a mixture of Gaussians.
        Args:
            y (Variable): shape(B, T, C_output), dtype float32, the parameterd of the output distribution. It is the concatenation of 3 parts, the logits of every distribution, the mean of each distribution and the log standard deviation of each distribution. Each part's shape is (B, T, n_mixture), where `n_mixture` means the number of Gaussians in the mixture.

        Returns:
            Variable: shape(B, T), waveform sampled from the output distribution.
        """
        batch_size, time_steps, output_dim = y.shape
        n_mixture = output_dim // 3

        w, mu, log_std = F.split(y, 3, dim=-1)

        reshaped_w = F.reshape(w, (batch_size * time_steps, n_mixture))
        prob_ids = F.sampling_id(F.softmax(reshaped_w))
        prob_ids = F.reshape(prob_ids, (batch_size, time_steps))
        prob_ids = prob_ids.numpy()

        index = np.array([[[b, t, prob_ids[b, t]] for t in range(time_steps)]
                          for b in range(batch_size)]).astype("int32")
        index_var = dg.to_variable(index)

        mu_ = F.gather_nd(mu, index_var)
        log_std_ = F.gather_nd(log_std, index_var)

        dist = D.Normal(mu_, F.exp(log_std_))
        samples = dist.sample(shape=[])
        samples = F.clip(samples, min=-1., max=1.)
        return samples
Ejemplo n.º 12
0
    def forward(self, x, *cond_inputs, norm_weights=(None, None), **kwargs):
        """
        Spatially Adaptive Normalization (SPADE) forward.
        """
        output = self.norm(x)
        for i in range(len(cond_inputs)):
            if cond_inputs[i] is None:
                continue
            
            if type(cond_inputs[i]) == list:
                cond_input, mask = cond_inputs[i]
                mask = L.image_resize(mask, size=x.shape[2:], resample='BILINEAR', align_corners=False)
            else:
                cond_input = cond_inputs[i]
                mask = None

            label_map = L.image_resize(cond_input, x.shape[2:])
            if norm_weights is None or norm_weights[0] is None or i != 0:
                affine_params = self.mlps[i](label_map)
            else:
                affine_params = self.mlps[i](label_map, conv_weights=norm_weights)
            
            gamma, beta = L.split(affine_params, 2, 1)
            if mask is not None:
                gamma = gamma * (1 - mask)
                beta = beta * (1 - mask)
            output = output * (1 + gamma) + beta
        
        return output
Ejemplo n.º 13
0
    def forward(self, input, class_id, input_class_emb=False):
        if isinstance(input, list):
            codes = [input[0]]
            codes += [
                input[2 * i + 1:2 * i + 3] for i in range(len(input) // 2)
            ]
        else:
            codes = layers.split(input, self.num_split, 1)
        if not input_class_emb:
            class_emb = self.embed_y(class_id)  # 128
        else:
            class_emb = class_id
        out = self.noise_fc(codes[0])
        out = layers.transpose(layers.reshape(out, (out.shape[0], 4, 4, -1)),
                               (0, 3, 1, 2))
        for i, (code, gblock) in enumerate(zip(codes[1:], self.blocks)):
            if isinstance(input, list):
                condition = [layers.concat([c, class_emb], 1) for c in code]
            else:
                condition = layers.concat([code, class_emb], 1)
            out = gblock(out, condition)

        out = self.output_layer_bn(out)
        out = layers.relu(out)
        out = self.output_layer_conv(out)

        return (layers.tanh(out) + 1) / 2
Ejemplo n.º 14
0
    def forward(self, input, pre_hidden):
        xu_t, xr_t, xc_t = layers.split(input, num_or_sections=3, dim=-1)
        gate_input = layers.matmul(x=pre_hidden, y=self._gate_weight)
        gate_input = layers.elementwise_add(gate_input, self._gate_bias)
        hu_t, hr_t = layers.split(gate_input, num_or_sections=2, dim=-1)
        u_add = layers.elementwise_add(xu_t, hu_t)
        r_add = layers.elementwise_add(xr_t, hr_t)
        u = self._gate_activation(u_add)
        r = self._gate_activation(r_add)
        r_hidden = r * pre_hidden
        candidate = layers.matmul(r_hidden, self._candidate_weight)
        candidate = layers.elementwise_add(xc_t, candidate)
        candidate = layers.elementwise_add(candidate, self._candidate_bias)
        c = self._activation(candidate)
        new_hidden = (1 - u) * pre_hidden + u * c

        return new_hidden
    def gru_step(self, input, hidden, mask=None):
        """ gru step """
        hidden_array = []
        for i in range(self.num_layers):
            hidden_temp = layers.slice(hidden,
                                       axes=[0],
                                       starts=[i],
                                       ends=[i + 1])
            hidden_temp = layers.reshape(hidden_temp,
                                         shape=[-1, self.hidden_size])
            hidden_array.append(hidden_temp)

        last_hidden_array = []
        for k in range(self.num_layers):
            trans_input = layers.matmul(input, self.weight_input_array[k])
            trans_input += self.bias_input_array[k]
            trans_hidden = layers.matmul(hidden_array[k],
                                         self.weight_hidden_array[k])
            trans_hidden += self.bias_hidden_array[k]

            input_array = layers.split(trans_input, num_or_sections=3, dim=-1)
            trans_array = layers.split(trans_hidden, num_or_sections=3, dim=-1)

            reset_gate = layers.sigmoid(input_array[0] + trans_array[0])
            input_gate = layers.sigmoid(input_array[1] + trans_array[1])
            new_gate = layers.tanh(input_array[2] +
                                   reset_gate * trans_array[2])

            new_hidden = new_gate + input_gate * (hidden_array[k] - new_gate)

            if mask:
                neg_mask = layers.fill_constant_batch_size_like(
                    input=mask, shape=[1], value=1.0, dtype='float32') - mask
                new_hidden = new_hidden * mask + hidden_array[k] * neg_mask

            last_hidden_array.append(new_hidden)
            input = new_hidden

            if self.dropout and self.dropout > 0.0:
                input = layers.dropout(input, dropout_prob=self.dropout)

        last_hidden = layers.concat(last_hidden_array, 0)
        last_hidden = layers.reshape(
            last_hidden, shape=[self.num_layers, -1, self.hidden_size])

        return input, last_hidden
Ejemplo n.º 16
0
    def forward(self, audio, mel, audio_start, clip_kl=True):
        """Compute loss of Clarinet model.

        Args:
            audio (Variable): shape(B, T_audio), dtype flaot32, ground truth waveform.
            mel (Variable): shape(B, F, T_mel), dtype flaot32, condition(mel spectrogram here).
            audio_start (Variable): shape(B, ), dtype int64, audio starts positions.
            clip_kl (bool, optional): whether to clip kl_loss by maximum=100. Defaults to True.

        Returns:
            Dict(str, Variable)
            loss (Variable): shape(1, ), dtype flaot32, total loss.
            kl (Variable): shape(1, ), dtype flaot32, kl divergence between the teacher's output distribution and student's output distribution.
            regularization (Variable): shape(1, ), dtype flaot32, a regularization term of the KL divergence.
            spectrogram_frame_loss (Variable): shape(1, ), dytpe: float, stft loss, the L1-distance of the magnitudes of the spectrograms of the ground truth waveform and synthesized waveform.
        """
        batch_size, audio_length = audio.shape  # audio clip's length

        z = F.gaussian_random(audio.shape)
        condition = self.encoder(mel)  # (B, C, T)
        condition_slice = crop(condition, audio_start, audio_length)

        x, s_means, s_scales = self.student(z, condition_slice)  # all [0: T]
        s_means = s_means[:, 1:]  # (B, T-1), time steps [1: T]
        s_scales = s_scales[:, 1:]  # (B, T-1), time steps [1: T]
        s_clipped_scales = F.clip(s_scales, self.min_log_scale, 100.)

        # teacher outputs single gaussian
        y = self.teacher(x[:, :-1], condition_slice[:, :, 1:])
        _, t_means, t_scales = F.split(y, 3, -1)  # time steps [1: T]
        t_means = F.squeeze(t_means, [-1])  # (B, T-1), time steps [1: T]
        t_scales = F.squeeze(t_scales, [-1])  # (B, T-1), time steps [1: T]
        t_clipped_scales = F.clip(t_scales, self.min_log_scale, 100.)

        s_distribution = D.Normal(s_means, F.exp(s_clipped_scales))
        t_distribution = D.Normal(t_means, F.exp(t_clipped_scales))

        # kl divergence loss, so we only need to sample once? no MC
        kl = s_distribution.kl_divergence(t_distribution)
        if clip_kl:
            kl = F.clip(kl, -100., 10.)
        # context size dropped
        kl = F.reduce_mean(kl[:, self.teacher.context_size:])
        # major diff here
        regularization = F.mse_loss(t_scales[:, self.teacher.context_size:],
                                    s_scales[:, self.teacher.context_size:])

        # introduce information from real target
        spectrogram_frame_loss = F.mse_loss(self.stft.magnitude(audio),
                                            self.stft.magnitude(x))
        loss = kl + self.lmd * regularization + spectrogram_frame_loss
        loss_dict = {
            "loss": loss,
            "kl_divergence": kl,
            "regularization": regularization,
            "stft_loss": spectrogram_frame_loss
        }
        return loss_dict
Ejemplo n.º 17
0
 def forward(self, x):
     """Forward network"""
     mask = layers.reduce_any(x != self.pad_index, -1)
     lens = nn.reduce_sum(mask, -1)
     masked_x = nn.masked_select(x, mask)
     h, _ = self.transformer(masked_x)
     feat_embed = nn.pad_sequence_paddle(
         layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index)
     return feat_embed
Ejemplo n.º 18
0
 def _build_distribution(self, enc_final_state=None):
     enc_hidden = [
         layers.concat(state, axis=-1) for state in enc_final_state
     ]
     enc_hidden = layers.concat(enc_hidden, axis=-1)
     z_mean_log_var = layers.fc(input=enc_hidden,
                                size=self.latent_size * 2,
                                name='fc_dist')
     z_mean, z_log_var = layers.split(z_mean_log_var, 2, -1)
     return z_mean, z_log_var
Ejemplo n.º 19
0
def epoch_predict(env, args, model, loader):
    """Predict in one epoch"""
    connections, deprels, probabilities = [], [], []
    pad_index = args.pad_index
    bos_index = args.bos_index
    eos_index = args.eos_index
    for batch, inputs in enumerate(loader(), start=1):
        if args.encoding_model.startswith("ernie"):
            words = inputs[0]
            connection_prob, deprel_prob, words = model(words)
        else:
            words, feats = inputs
            connection_prob, deprel_prob, words = model(words, feats)
        mask = layers.logical_and(
            layers.logical_and(words != pad_index, words != bos_index),
            words != eos_index,
        )
        lens = nn.reduce_sum(mask, -1)
        connection_predicts, deprel_predicts = decode(args, connection_prob,
                                                      deprel_prob, mask)
        connections.extend(
            layers.split(nn.masked_select(connection_predicts, mask),
                         lens.numpy().tolist()))
        deprels.extend(
            layers.split(nn.masked_select(deprel_predicts, mask),
                         lens.numpy().tolist()))
        if args.prob:
            arc_probs = nn.index_sample(
                layers.softmax(connection_prob, -1),
                layers.unsqueeze(connection_predicts, -1))
            probabilities.extend(
                layers.split(
                    nn.masked_select(layers.squeeze(arc_probs, axes=[-1]),
                                     mask),
                    lens.numpy().tolist(),
                ))
    connections = [seq.numpy().tolist() for seq in connections]
    deprels = [env.REL.vocab[seq.numpy().tolist()] for seq in deprels]
    probabilities = [[round(p, 3) for p in seq.numpy().tolist()]
                     for seq in probabilities]

    return connections, deprels, probabilities
Ejemplo n.º 20
0
    def forward(self, x):
        """Forward network"""
        mask = layers.reduce_any(x != self.pad_index, -1)
        lens = nn.reduce_sum(mask, -1)
        masked_x = nn.masked_select(x, mask)
        char_mask = masked_x != self.pad_index
        emb = self.embed(masked_x)

        _, (h, _) = self.lstm(emb, char_mask, self.pad_index)
        h = layers.concat(layers.unstack(h), axis=-1)
        feat_embed = nn.pad_sequence_paddle(
            layers.split(h, lens.numpy().tolist(), dim=0), self.pad_index)
        return feat_embed
Ejemplo n.º 21
0
 def flat_words(self, words):
     pad_index = self.args.pad_index
     lens = nn.reduce_sum(words != pad_index, dim=-1)
     position = layers.cumsum(lens + layers.cast((lens == 0), "int32"),
                              axis=1) - 1
     flat_words = nn.masked_select(words, words != pad_index)
     flat_words = nn.pad_sequence_paddle(
         layers.split(flat_words,
                      layers.reduce_sum(lens, -1).numpy().tolist(),
                      pad_index))
     max_len = flat_words.shape[1]
     position = nn.mask_fill(position, position >= max_len, max_len - 1)
     return flat_words, position
Ejemplo n.º 22
0
 def pad_packed_sequence(self, x, batch_sizes, unsorted_indices):
     """Pads a packed sequences."""
     h_size = x.shape[1]
     split_x = layers.split(x, batch_sizes, dim=0)
     max_bs = batch_sizes[0]
     step_embs = []
     for step, cur_bs in enumerate(batch_sizes):
         pad_emb = layers.zeros(shape=(max_bs - cur_bs, h_size),
                                dtype=x.dtype)
         step_emb = layers.concat(input=(split_x[step], pad_emb))
         step_embs.append(step_emb)
     new_x = layers.stack(step_embs, axis=1)
     new_x = layers.index_select(new_x, unsorted_indices)
     return new_x
Ejemplo n.º 23
0
    def forward(self, input, pre_hidden, pre_cell):
        concat_input_hidden = layers.concat([input, pre_hidden], 1)
        gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)

        gate_input = layers.elementwise_add(gate_input, self._bias)
        i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
        new_cell = layers.elementwise_add(
            layers.elementwise_mul(
                pre_cell,
                layers.sigmoid(layers.elementwise_add(f, self._forget_bias))),
            layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j)))
        new_hidden = layers.tanh(new_cell) * layers.sigmoid(o)

        return new_hidden, new_cell
Ejemplo n.º 24
0
 def __call__(self, x):
     bond_feature = get_bond_feature_dims()
     bond_input = L.split(x, num_or_sections=len(bond_feature), dim=-1)
     outputs = None
     count = 0
     for _x, _bond_input_dim in zip(bond_input, bond_feature):
         count += 1
         emb = L.embedding(_x,
                           size=(_bond_input_dim, self.emb_dim),
                           param_attr=F.ParamAttr(name=self.name +
                                                  '_bond_feat_%s' % count))
         if outputs is None:
             outputs = emb
         else:
             outputs = outputs + emb
     return outputs
Ejemplo n.º 25
0
    def for_rnn_net(self):
        x = layers.data(shape=[BATCH_SIZE, SEQ_LEN, INPUT_DIM],
                        dtype="float32",
                        name="x",
                        append_batch_size=False)
        split_x = layers.split(x, num_or_sections=SEQ_LEN, dim=1)

        h_pre = fluid.layers.zeros(shape=[BATCH_SIZE, 1, INPUT_DIM],
                                   dtype="float32")

        for i in range(SEQ_LEN):
            x_t = split_x[i]
            h = layers.scale(x=layers.elementwise_add(x=h_pre, y=x_t),
                             scale=self.scale)
            h_pre = h
        return layers.mean(h_pre)
Ejemplo n.º 26
0
    def forward(self, x, *cond_inputs, **kwargs):
        output = self.norm(x) if self.norm is not None else x
        for i in range(len(cond_inputs)):
            if cond_inputs[i] is None:
                continue
            label_map = L.image_resize(cond_inputs[i], out_shape=x.shape[2:], resample='NEAREST')

            if self.separate_projection:
                hidden = self.mlps[i](label_map)
                gamma = self.gammas[i](hidden)
                beta = self.betas[i](hidden)
            else:
                affine_params = self.mlps[i](label_map)
                gamma, beta = L.split(affine_params, 2, 1)
            output = output * (1 + gamma) + beta
        
        return output
Ejemplo n.º 27
0
    def forward(self, input, state):
        #logging.info("input shape: {}".format(input.shape))
        pre_hidden, pre_cell = state
        #logging.info("pre hidden shape: {}".format(pre_hidden.shape))
        #logging.info("pre cell shape: {}".format(pre_cell.shape))
        # i,f,c,o 四个值均有Wx+Wh+b 即W(x+h)+b
        # 因此:
        # 实际相乘为[x, b]·W+b
        # x,b 横向相连, shape为[batch_size, input_size+hidden_size]
        # W的shape为[input_size+hidden_size, 4*hidden_size]
        # b的shape为[4*hidden_size,]

        # 横向连接
        # shape: [batch_size, input_size+hidden_size]
        concat_input_hidden = L.concat([input, pre_hidden], axis=1)
        #logging.info("x concat h shape: {}".format(concat_input_hidden.shape))

        # 计算Wx+Wh+b
        # shape: [batch_size, 4*hidden_size]
        gate_input = L.matmul(x=concat_input_hidden, y=self._weight)
        #logging.info("[x, b]·W shape: {}".format(gate_input.shape))

        # shape: [batch_size, 4*hidden_size]
        gate_input = L.elementwise_add(gate_input, self._bias)
        #logging.info("[x, b]·W+b shape: {}".format(gate_input.shape))

        # i,f,c,o四值按最后一维分开 因此每个的最后一维都是hidden_size
        i, f, c, o = L.split(gate_input, num_or_sections=4, dim=-1)

        # new_c = pre_c·sigmoid(f+forget_bias) + sigmoid(i)·tanh(c)
        # shape: [batch_size, hidden_size]
        new_cell = L.elementwise_add(
            L.elementwise_mul(
                pre_cell,
                L.sigmoid(L.elementwise_add(f, self._forget_bias))),
            L.elementwise_mul(L.sigmoid(i), L.tanh(c))
            )
        #logging.info("new_cell shape: {}".format(new_cell.shape))

        # new_h = tanh(new_c)*sigmoid(o)
        # shape: [batch_size, hidden_size]
        new_hidden = L.tanh(new_cell) * L.sigmoid(o)
        #logging.info("new_hidden shape: {}".format(new_hidden.shape))

        return new_hidden, [new_hidden, new_cell]
Ejemplo n.º 28
0
    def pairwise_hinge(self):
        """pairwise model"""
        poi_repr = L.split(self.poi_repr, 2, dim=0)
        pos_repr, neg_repr = poi_repr
        pos_pred = L.cos_sim(self.query_repr, pos_repr)
        neg_pred = L.cos_sim(self.query_repr, neg_repr)

        mode = 'hinge_loss'
        # log(1 + e-z), max(0, 1 - z)
        if 'hinge_loss' == mode:
            theta_z = L.relu(1 + neg_pred - pos_pred)
        elif 'logistic_loss' == mode:
            theta_z = L.log(1 + L.exp(neg_pred - pos_pred))
        self.loss = L.reduce_mean(theta_z)
        pos_cnt = L.reduce_sum(L.cast(L.greater_than(pos_pred, neg_pred), dtype="float32"))
        neg_cnt = L.reduce_sum(L.cast(L.less_than(pos_pred, neg_pred), dtype="float32"))
        self.order = pos_cnt / (1e-5 + neg_cnt)
        self.metrics = [self.loss, self.order]
Ejemplo n.º 29
0
def weight_layers(lm_embeddings, name="", l2_coef=0.0):
    '''
    Weight the layers of a biLM with trainable scalar weights to
    compute ELMo representations.

    Input:
        lm_embeddings(list): representations of 2 layers from biLM.
        name = a string prefix used for the trainable variable names
        l2_coef: the l2 regularization coefficient $\lambda$.
            Pass None or 0.0 for no regularization.

    Output:
        weighted_lm_layers: weighted embeddings form biLM
    '''

    n_lm_layers = len(lm_embeddings)
    W = layers.create_parameter(
        [
            n_lm_layers,
        ],
        dtype="float32",
        name=name + "ELMo_w",
        attr=fluid.ParamAttr(name=name + "ELMo_w",
                             initializer=fluid.initializer.Constant(0.0),
                             regularizer=fluid.regularizer.L2Decay(l2_coef)))
    normed_weights = layers.softmax(W + 1.0 / n_lm_layers)
    splited_normed_weights = layers.split(normed_weights, n_lm_layers, dim=0)

    # compute the weighted, normalized LM activations
    pieces = []
    for w, t in zip(splited_normed_weights, lm_embeddings):
        pieces.append(t * w)
    sum_pieces = layers.sums(pieces)

    # scale the weighted sum by gamma
    gamma = layers.create_parameter(
        [1],
        dtype="float32",
        name=name + "ELMo_gamma",
        attr=fluid.ParamAttr(name=name + "ELMo_gamma",
                             initializer=fluid.initializer.Constant(1.0)))
    weighted_lm_layers = sum_pieces * gamma
    return weighted_lm_layers
Ejemplo n.º 30
0
    def forward(self, input, pre_hidden):
        concat_input_hidden = layers.concat([input, pre_hidden], 1)

        gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight)

        gate_input = layers.elementwise_add(gate_input, self._gate_bias)

        gate_input = self._gate_activation(gate_input)
        r, u = layers.split(gate_input, num_or_sections=2, dim=1)

        r_hidden = r * pre_hidden

        candidate = layers.matmul(layers.concat([input, r_hidden], 1),
                                  self._candidate_weight)
        candidate = layers.elementwise_add(candidate, self._candidate_bias)

        c = self._activation(candidate)
        new_hidden = u * pre_hidden + (1 - u) * c

        return new_hidden