Beispiel #1
0
 def __init__(self, config, scale=1.0):
     super(Output, self).__init__()
     input_size = config.embedding_size
     output_size = config.embedding_size*config.expand_ratio
     self.mapping = Mapping(input_size, output_size, config.compute_dtype)
     self.projection = Mapping(output_size, input_size, config.compute_dtype, scale)
     self.activation = nn.GELU()
     self.dropout = nn.Dropout(1-config.dropout_rate)
 def __init__(self, config, scale=1.0):
     super(Output, self).__init__()
     input_size = config.embedding_size
     output_size = config.embedding_size * config.expand_ratio
     self.mapping = Mapping_output(config, input_size, output_size)
     self.projection = Mapping(config, output_size, input_size, scale)
     self.activation = nn.GELU()
     self.activation.gelu.shard(((config.dp, 1, config.mp),))
     self.dropout = nn.Dropout(1 - config.dropout_rate)
     self.dropout.dropout_gen_mask.shard(((config.dp, 1, 1),))
     self.dropout.dropout_do_mask.shard(((config.dp, 1, 1),))
 def __init__(self,
              dim,
              mult=4,
              initializer_range=0.02,
              hidden_dropout_prob=0.1,
              compute_type=mstype.float32):
     super(FeedForward, self).__init__()
     self.hidden_size = dim
     self.w1 = Mapping(dim, dim * mult, initializer_range, compute_type)
     self.w2 = Mapping(dim * mult, dim, initializer_range, compute_type)
     self.act = nn.GELU()
     self.dropout = nn.Dropout(hidden_dropout_prob)
Beispiel #4
0
    def __init__(self, config, num_labels):
        super(SequenceSummary, self).__init__()
        self.summary = nn.Dense(config.d_model,
                                num_labels,
                                weight_init=weight_variable(
                                    [config.d_model, num_labels]),
                                has_bias=True).to_float(config.compute_type)
        self.gelu = nn.GELU()
        self.first_dropout = nn.Dropout(1 - config.hidden_dropout)
        self.last_dropout = nn.Dropout(1 - config.hidden_dropout)

        self.expand_dims = P.ExpandDims()
        self.shape = P.Shape()
        self.size = P.Size()
        self.slice = P.GatherV2()
        self.squeeze = P.Squeeze(-2)
    def __init__(self,
                 in_channels=786,
                 out_channels=768,
                 hidden_size=3072,
                 hidden_dropout=0.1):
        super(FeedForward, self).__init__()

        self.c_fc = Conv1D(in_channels, hidden_size)
        self.c_proj = Conv1D(hidden_size, out_channels)

        self.layernorm = LayerNorm(in_channels=in_channels)
        self.residual_connect = ResidualConnection(dropout_prob=hidden_dropout)
        self.gelu = nn.GELU()
        self.dropout = nn.Dropout(1 - hidden_dropout)
        self.use_dropout = hidden_dropout > 0

        self.reshape = P.Reshape()
 def __init__(self):
     super(Net_gelu, self).__init__()
     self.gelu = nn.GELU()
Beispiel #7
0
def gelu(x):
    """Apply gelu function."""
    return nn.GELU()(x)