Beispiel #1
0
 def test_ceil(self):
     program = Program()
     with program_guard(program):
         input = layers.data(name="input", shape=[16], dtype="float32")
         out = layers.ceil(input, name='ceil')
         self.assertIsNotNone(out)
     print(str(program))
Beispiel #2
0
    def forward(self, x, alpha=1.0, target=None):
        """
        Compute length of mel from encoder output use TransformerTTS attention
        
        Args:
            x (Variable): shape(B, T, C), dtype float32, the encoder output.
            alpha (float32, optional): the hyperparameter to determine the length of 
                the expanded sequence mel, thereby controlling the voice speed. Defaults to 1.0.
            target (Variable, optional): shape(B, T_text), dtype int64, the duration of phoneme compute from pretrained transformerTTS. 
                Defaults to None. 

        Returns:
            output (Variable): shape(B, T, C), the output after exppand.
            duration_predictor_output (Variable): shape(B, T, C), the output of duration predictor.
        """
        duration_predictor_output = self.duration_predictor(x)
        if fluid.framework._dygraph_tracer()._train_mode:
            output = self.LR(x, target)
            return output, duration_predictor_output
        else:
            duration_predictor_output = duration_predictor_output * alpha
            duration_predictor_output = layers.ceil(duration_predictor_output)
            output = self.LR(x, duration_predictor_output)
            mel_pos = dg.to_variable(np.arange(1, output.shape[1] + 1)).astype(
                np.int64)
            mel_pos = layers.unsqueeze(mel_pos, [0])
            return output, mel_pos
 def communicate_avg_loss():
     communicate()
     self._generate_avg_loss(main_block, loss, avg_loss)
     next_local_steps = layers.cast(layers.ceil(
         layers.sqrt(lr_0 * avg_loss / (global_lr * loss_0) *
                     float(init_k_steps))),
                                    dtype='int64')
     max_local_steps = layers.fill_constant(shape=[1],
                                            dtype='int64',
                                            value=16)
     min_local_steps = layers.fill_constant(shape=[1],
                                            dtype='int64',
                                            value=1)
     next_local_steps = layers.elementwise_min(
         next_local_steps, max_local_steps)
     next_local_steps = layers.elementwise_max(
         next_local_steps, min_local_steps)
     layers.assign(next_local_steps, k_steps)
Beispiel #4
0
    def __compute_graph_bias(q, graph_attn_mask, pos_win):
        """
        :param q: (batch_size, n_heads, query_len, dim_per_head)
        :param graph_attn_mask: (batch_size, n_head, key_s_len, key_s_len)
        :param pos_win:
        :return:
        """
        # (batch_size, n_heads, query_len, dim_per_head)
        pos_v = layers.fc(input=q,
                          size=d_value,
                          num_flatten_dims=3,
                          param_attr=fluid.ParamAttr(
                              name=name + '_pos_fc.w_0',
                              initializer=param_initializer),
                          bias_attr=name + '_pos_fc.b_0')

        # (batch_size, n_heads, query_len, 1)
        pos_s = layers.fc(input=layers.tanh(pos_v),
                          size=1,
                          num_flatten_dims=3,
                          param_attr=fluid.ParamAttr(
                              name=name + '_pos_score_fc.w_0',
                              initializer=param_initializer),
                          bias_attr=False)

        # (batch_size, n_heads, query_len, 1)
        pos = layers.sigmoid(pos_s) * (key_s_len - 1)

        # (batch_size, n_heads, query_len, 1)
        pos_up = layers.cast(layers.ceil(pos), dtype='int64')
        # print("pos_up.shape = %s" % str(pos_up.shape))
        pos_down = layers.cast(layers.floor(pos), dtype='int64')
        # print("pos_down.shape = %s" % str(pos_down.shape))

        batch_ind = layers.range(0, layers.cast(batch_size, dtype='int64'), 1,
                                 'int64')
        # print("batch_ind.shape = %s" % str(batch_ind.shape))
        batch_ind = layers.unsqueeze(batch_ind,
                                     axes=[1, 2, 3])  # (batch_size, 1, 1, 1)
        batch_ind = layers.expand(
            batch_ind, expand_times=[1, n_head, query_len,
                                     1])  # (batch_size, n_heads, query_len, 1)
        # print("batch_ind.shape = %s" % str(batch_ind.shape))

        head_ind = layers.range(0, n_head, 1, 'int64')
        # print("head_ind.shape = %s" % str(head_ind.shape))
        head_ind = layers.unsqueeze(head_ind, axes=[0, 2,
                                                    3])  # (1, n_heads, 1, 1)
        head_ind = layers.expand(head_ind,
                                 expand_times=[batch_size, 1, query_len, 1])
        # print("head_ind.shape = %s" % str(head_ind.shape))

        query_ind = layers.range(0, layers.cast(query_len, dtype='int64'), 1,
                                 'int64')
        # print("query_ind.shape = %s" % str(query_ind.shape))
        query_ind = layers.unsqueeze(query_ind,
                                     axes=[0, 1, 3])  # (1, 1, query_len, 1)
        query_ind = layers.expand(query_ind,
                                  expand_times=[batch_size, n_head, 1, 1])
        # print("query_ind.shape = %s" % str(query_ind.shape))

        # (batch_size, n_heads, query_len, 4)
        pos_up_ind = layers.concat(
            input=[batch_ind, head_ind, query_ind, pos_up], axis=-1)
        # print("pos_up_ind.shape = %s" % str(pos_up_ind.shape))
        pos_up_ind.stop_gradient = True
        pos_down_ind = layers.concat(
            input=[batch_ind, head_ind, query_ind, pos_down], axis=-1)
        # print("pos_down_ind.shape = %s" % str(pos_down_ind.shape))
        pos_down_ind.stop_gradient = True

        # (batch_size, n_heads, query_len, key_s_len, key_s_len)
        graph_attn_mask = layers.unsqueeze(graph_attn_mask, axes=[2])
        # print("graph_attn_mask.shape = %s" % str(graph_attn_mask.shape))
        graph_attn_mask = layers.expand(graph_attn_mask,
                                        expand_times=[1, 1, query_len, 1, 1])
        # print("graph_attn_mask.shape = %s" % str(graph_attn_mask.shape))

        # (batch_size, n_heads, query_len, key_s_len)
        graph_attn_mask_up = layers.gather_nd(input=graph_attn_mask,
                                              index=pos_up_ind)
        graph_attn_mask_down = layers.gather_nd(input=graph_attn_mask,
                                                index=pos_down_ind)

        # print("graph_attn_mask_up.shape = %s" % str(graph_attn_mask_up.shape))
        # print("graph_attn_mask_down.shape = %s" % str(graph_attn_mask_down.shape))
        # print("pos_up.shape = %s" % str(pos_up.shape))
        # print("pos_down.shape = %s" % str(pos_down.shape))

        # linearly combine up and down (batch_size, n_heads, query_len, key_s_len)
        graph_attn_mask_select = graph_attn_mask_up * (1.0 - (layers.cast(pos_up, dtype='float32') - pos)) + \
                                 graph_attn_mask_down * (1.0 - (pos - layers.cast(pos_down, dtype='float32')))
        # print("graph_attn_mask_select.shape = %s" % str(graph_attn_mask_select.shape))
        # re-weight the attention score with gaussian weights
        gaussian_w = (
            -0.5 * graph_attn_mask_select * graph_attn_mask_select) / (
                (0.5 * pos_win)**2)  # [batch, n_heads, query_len, key_s_len]
        # print("gaussian_w.shape = %s" % str(gaussian_w.shape))

        return gaussian_w
            def communicate():
                sub_block = default_main_program().current_block()
                ring_id = -1
                for param, snapshot in p2s:
                    sub_block.append_op(type='elementwise_sub',
                                        inputs={
                                            'X': [snapshot],
                                            'Y': [param]
                                        },
                                        outputs={'Out': [param]},
                                        attrs={OP_ROLE_KEY: OpRole.Optimize})
                    sub_block.append_op(type='c_sync_calc_stream',
                                        inputs={'X': param},
                                        outputs={'Out': param},
                                        attrs={OP_ROLE_KEY: OpRole.Optimize})
                    ring_id = (ring_id + 1) % self.nrings
                    sub_block.append_op(type='c_allreduce_sum',
                                        inputs={'X': [param]},
                                        outputs={'Out': [param]},
                                        attrs={
                                            'ring_id': ring_id,
                                            OP_ROLE_KEY: OpRole.Optimize
                                        })

                for ring_id in range(self.nrings):
                    sub_block.append_op(type='c_sync_comm_stream',
                                        inputs={'X': param},
                                        outputs={'Out': param},
                                        attrs={
                                            'ring_id': ring_id,
                                            OP_ROLE_KEY: OpRole.Optimize
                                        })

                for param, snapshot in p2s:
                    sub_block.append_op(type='scale',
                                        inputs={'X': [param]},
                                        outputs={'Out': [param]},
                                        attrs={
                                            'scale':
                                            1.0 / self.role_maker.worker_num(),
                                            OP_ROLE_KEY:
                                            OpRole.Optimize
                                        })
                    sub_block.append_op(type='elementwise_sub',
                                        inputs={
                                            'X': [snapshot],
                                            'Y': [param]
                                        },
                                        outputs={'Out': [param]},
                                        attrs={OP_ROLE_KEY: OpRole.Optimize})
                    sub_block.append_op(type='assign',
                                        inputs={'X': [param]},
                                        outputs={'Out': [snapshot]},
                                        attrs={OP_ROLE_KEY: OpRole.Optimize})

                if auto_steps:
                    next_local_steps = layers.cast(layers.ceil(
                        layers.sqrt(lr_0 * loss / (global_lr * loss_0) *
                                    float(init_k_steps))),
                                                   dtype='int64')
                    max_local_steps = layers.fill_constant(shape=[1],
                                                           dtype='int64',
                                                           value=16)
                    next_local_steps = layers.elementwise_min(
                        next_local_steps, max_local_steps)
                    layers.assign(next_local_steps, k_steps)
                layers.assign(step, last_step)
Beispiel #6
0
def topk_pool(gw, score, graph_id, ratio):
    """Implementation of topk pooling, where k means pooling ratio.
    
    Args:
        gw: Graph wrapper object.

        score: The attention score of all nodes, which is used to select 
               important nodes.

        graph_id: The graphs that the nodes belong to.

        ratio: The pooling ratio of nodes we want to select.

    Return: 
        perm: The index of nodes we choose.

        ratio_length: The selected node numbers of each graph.
    """

    graph_lod = gw.graph_lod
    graph_nodes = gw.num_nodes
    num_graph = gw.num_graph

    num_nodes = L.ones(shape=[graph_nodes], dtype="float32")
    num_nodes = L.lod_reset(num_nodes, graph_lod)
    num_nodes_per_graph = L.sequence_pool(num_nodes, pool_type='sum')
    max_num_nodes = L.reduce_max(num_nodes_per_graph, dim=0)
    max_num_nodes = L.cast(max_num_nodes, dtype="int32")

    index = L.arange(0, gw.num_nodes, dtype="int64")
    offset = L.gather(graph_lod, graph_id, overwrite=False)
    index = (index - offset) + (graph_id * max_num_nodes)
    index.stop_gradient = True

    # padding
    dense_score = L.fill_constant(shape=[num_graph * max_num_nodes],
                                  dtype="float32",
                                  value=-999999)
    index = L.reshape(index, shape=[-1])
    dense_score = L.scatter(dense_score, index, updates=score)
    num_graph = L.cast(num_graph, dtype="int32")
    dense_score = L.reshape(dense_score, shape=[num_graph, max_num_nodes])

    # record the sorted index
    _, sort_index = L.argsort(dense_score, axis=-1, descending=True)

    # recover the index range
    graph_lod = graph_lod[:-1]
    graph_lod = L.reshape(graph_lod, shape=[-1, 1])
    graph_lod = L.cast(graph_lod, dtype="int64")
    sort_index = L.elementwise_add(sort_index, graph_lod, axis=-1)
    sort_index = L.reshape(sort_index, shape=[-1, 1])

    # use sequence_slice to choose selected node index
    pad_lod = L.arange(0, (num_graph + 1) * max_num_nodes,
                       step=max_num_nodes,
                       dtype="int32")
    sort_index = L.lod_reset(sort_index, pad_lod)
    ratio_length = L.ceil(num_nodes_per_graph * ratio)
    ratio_length = L.cast(ratio_length, dtype="int64")
    ratio_length = L.reshape(ratio_length, shape=[-1, 1])
    offset = L.zeros(shape=[num_graph, 1], dtype="int64")
    choose_index = L.sequence_slice(input=sort_index,
                                    offset=offset,
                                    length=ratio_length)

    perm = L.reshape(choose_index, shape=[-1])
    return perm, ratio_length