def __init__(self): super().__init__() self.virtual_dataset = _VirtualDataset() self.matmul1 = P.MatMul() self.matmul2 = P.MatMul() self.gelu = P.Gelu() self.bn1 = bn_with_initialize(2048)
def __init__(self, strategy1, strategy2, strategy3, strategy4, strategy5, strategy6): super().__init__() self.matmul1 = P.MatMul().set_strategy(strategy1) self.matmul2 = P.MatMul().set_strategy(strategy2) self.gelu = P.Gelu().set_strategy(strategy3) self.tanh = P.Tanh().set_strategy(strategy4) self.softmax = P.Softmax(axis=(0, 1)).set_strategy(strategy5) self.logsoftmax = P.LogSoftmax().set_strategy(strategy6)
def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.matmul1 = P.MatMul().shard(strategy1) self.matmul2 = P.MatMul().shard(strategy2) self.gelu = P.Gelu().shard(strategy3) self.tanh = P.Tanh().shard(strategy3) self.softmax = P.Softmax().shard(strategy3) self.logsoftmax = P.LogSoftmax().shard(strategy3)
def __init__(self, in_channels=786, out_channels=768, hidden_size=3072, hidden_dropout=0.1): super(FeedForward, self).__init__() self.c_fc = Conv1D(in_channels, hidden_size) self.c_proj = Conv1D(hidden_size, out_channels) self.layernorm = LayerNorm(in_channels=in_channels) self.residual_connect = ResidualConnection(dropout_prob=hidden_dropout) self.gelu_act = P.Gelu() self.dropout = nn.Dropout(1 - hidden_dropout) self.use_dropout = hidden_dropout > 0 self.reshape = P.Reshape()
def __init__(self, strategy1, strategy2, strategy3): super().__init__() self.matmul1 = P.MatMul().set_strategy(strategy1) self.matmul2 = P.MatMul().set_strategy(strategy2) self.gelu = P.Gelu().set_strategy(strategy3)
def __init__(self, strategy0, strategy1, strategy2, strategy3): super().__init__() self.virtual_dataset = _VirtualDataset().set_strategy(strategy0) self.matmul1 = P.MatMul().set_strategy(strategy1) self.matmul2 = P.MatMul().set_strategy(strategy2) self.gelu = P.Gelu().set_strategy(strategy3)
def __init__(self): super(GeluNet, self).__init__() self.gelu = P.Gelu()
def __init__(self): super(GELU, self).__init__() self.matmul = P.MatMul() self.gelu = P.Gelu()
def __init__(self): super(MEGeluLargeIn, self).__init__() self.matmul = P.MatMul() self.gelu = P.Gelu()
def __init__(self, strategy1, strategy2): super().__init__() self.matmul = P.MatMul().shard(strategy1) self.gelu = P.Gelu().shard(strategy2)
'desc_inputs': [Tensor(np.array([0, 1]).astype(np.float32)), Tensor(np.array([1, 1]).astype(np.float32))], 'desc_bprop': [[2]]}) ] test_case_nn_ops = [ ('BiasAdd', { 'block': P.BiasAdd(), 'desc_inputs': [[1, 3, 3, 3], [3]], 'desc_bprop': [[1, 3, 3, 3]]}), ('BiasAddGrad', { 'block': G.BiasAddGrad(), 'desc_inputs': [[1, 3, 3, 3]], 'skip': ['backward']}), ('Gelu', { 'block': P.Gelu(), 'desc_inputs': [[1, 3, 4, 4]], 'desc_bprop': [[1, 3, 4, 4]]}), ('GeluGrad', { 'block': G.GeluGrad(), 'desc_inputs': [[2, 2], [2, 2], [2, 2]], 'desc_bprop': [[2, 2]], 'skip': ['backward']}), ('Tanh', { 'block': P.Tanh(), 'desc_inputs': [[1, 3, 4, 4]], 'desc_bprop': [[1, 3, 4, 4]]}), ('TanhGrad', { 'block': G.TanhGrad(), 'desc_inputs': [[1, 3, 4, 4], [1, 3, 4, 4]], 'desc_bprop': [[1, 3, 4, 4]],
def __init__(self): super().__init__() self.matmul = P.MatMul(transpose_b=True) self.gelu = P.Gelu()
def __init__(self): super(VirtualDatasetNet, self).__init__() self.virtual_dataset = _VirtualDataset() self.matmul1 = P.MatMul() self.matmul2 = P.MatMul() self.gelu = P.Gelu()
def __init__(self, strategy1, strategy2): super().__init__() self.matmul = P.MatMul(transpose_b=True).set_strategy(strategy1) self.gelu = P.Gelu().set_strategy(strategy2)
def __init__(self): super(GELU, self).__init__() self.gelu = P.Gelu()
def __init__(self, strategy0, strategy1, strategy2): super().__init__() self.fc_nobias = P.MatMul(transpose_b=True).shard(strategy0) self.add = P.Add().shard(strategy1) self.gelu = P.Gelu().shard(strategy2)
def __init__(self, 输入_接口, 输出_接口=2048, 丢弃率=0.1): super(前向传播网络, self).__init__() self.linear_1 = 全连接层(输入_接口, 输出_接口) self.gelu = P.Gelu() self.linear_2 = 全连接层(输出_接口, 输入_接口) self.Dropout = nn.Dropout(1 - 丢弃率)