Beispiel #1
0
    def __init__(self, config):
        super(CRNN, self).__init__()
        self.batch_size = config.batch_size
        self.input_size = config.input_size
        self.hidden_size = config.hidden_size
        self.num_classes = config.class_num
        self.reshape = P.Reshape()
        self.cast = P.Cast()
        k = (1 / self.hidden_size)**0.5
        self.rnn1 = P.DynamicRNN(forget_bias=0.0)
        self.rnn1_bw = P.DynamicRNN(forget_bias=0.0)
        self.rnn2 = P.DynamicRNN(forget_bias=0.0)
        self.rnn2_bw = P.DynamicRNN(forget_bias=0.0)

        w1 = np.random.uniform(
            -k, k, (self.input_size + self.hidden_size, 4 * self.hidden_size))
        self.w1 = Parameter(w1.astype(np.float32), name="w1")
        w2 = np.random.uniform(
            -k, k,
            (2 * self.hidden_size + self.hidden_size, 4 * self.hidden_size))
        self.w2 = Parameter(w2.astype(np.float32), name="w2")
        w1_bw = np.random.uniform(
            -k, k, (self.input_size + self.hidden_size, 4 * self.hidden_size))
        self.w1_bw = Parameter(w1_bw.astype(np.float32), name="w1_bw")
        w2_bw = np.random.uniform(
            -k, k,
            (2 * self.hidden_size + self.hidden_size, 4 * self.hidden_size))
        self.w2_bw = Parameter(w2_bw.astype(np.float32), name="w2_bw")

        self.b1 = Parameter(np.random.uniform(
            -k, k, (4 * self.hidden_size)).astype(np.float32),
                            name="b1")
        self.b2 = Parameter(np.random.uniform(
            -k, k, (4 * self.hidden_size)).astype(np.float32),
                            name="b2")
        self.b1_bw = Parameter(np.random.uniform(
            -k, k, (4 * self.hidden_size)).astype(np.float32),
                               name="b1_bw")
        self.b2_bw = Parameter(np.random.uniform(
            -k, k, (4 * self.hidden_size)).astype(np.float32),
                               name="b2_bw")

        self.h1 = Tensor(
            np.zeros(shape=(1, self.batch_size,
                            self.hidden_size)).astype(np.float32))
        self.h2 = Tensor(
            np.zeros(shape=(1, self.batch_size,
                            self.hidden_size)).astype(np.float32))
        self.h1_bw = Tensor(
            np.zeros(shape=(1, self.batch_size,
                            self.hidden_size)).astype(np.float32))
        self.h2_bw = Tensor(
            np.zeros(shape=(1, self.batch_size,
                            self.hidden_size)).astype(np.float32))

        self.c1 = Tensor(
            np.zeros(shape=(1, self.batch_size,
                            self.hidden_size)).astype(np.float32))
        self.c2 = Tensor(
            np.zeros(shape=(1, self.batch_size,
                            self.hidden_size)).astype(np.float32))
        self.c1_bw = Tensor(
            np.zeros(shape=(1, self.batch_size,
                            self.hidden_size)).astype(np.float32))
        self.c2_bw = Tensor(
            np.zeros(shape=(1, self.batch_size,
                            self.hidden_size)).astype(np.float32))

        self.fc_weight = np.random.random(
            (self.num_classes, self.hidden_size)).astype(np.float32)
        self.fc_bias = np.random.random((self.num_classes)).astype(np.float32)

        self.fc = nn.Dense(in_channels=self.hidden_size,
                           out_channels=self.num_classes,
                           weight_init=Tensor(self.fc_weight),
                           bias_init=Tensor(self.fc_bias))
        self.fc.to_float(mstype.float32)
        self.expand_dims = P.ExpandDims()
        self.concat = P.Concat()
        self.transpose = P.Transpose()
        self.squeeze = P.Squeeze(axis=0)
        self.vgg = VGG()
        self.reverse_seq1 = P.ReverseSequence(batch_dim=1, seq_dim=0)
        self.reverse_seq2 = P.ReverseSequence(batch_dim=1, seq_dim=0)
        self.reverse_seq3 = P.ReverseSequence(batch_dim=1, seq_dim=0)
        self.reverse_seq4 = P.ReverseSequence(batch_dim=1, seq_dim=0)
        self.seq_length = Tensor(
            np.ones((self.batch_size), np.int32) * config.num_step,
            mstype.int32)
        self.concat1 = P.Concat(axis=2)
        self.dropout = nn.Dropout(0.5)
        self.rnn_dropout = nn.Dropout(0.9)
        self.use_dropout = config.use_dropout
Beispiel #2
0
def test_check_dropout_3():
    Tensor(np.ones([20, 16, 50]).astype(np.int32))
    with pytest.raises(ValueError):
        nn.Dropout(3, 0, 1)
Beispiel #3
0
 def __init__(self, attention_probs_dropout_prob: float = 0.1) -> None:
     super().__init__()
     self.dropout = nn.Dropout(1.0 - attention_probs_dropout_prob)
 def __init__(self):
     super().__init__()
     self.matmul1 = P.MatMul()
     self.dropout = nn.Dropout()
     self.matmul2 = P.MatMul()
Beispiel #5
0
    def __init__(self,
                 batch_size,
                 from_tensor_width,
                 to_tensor_width,
                 from_seq_length,
                 to_seq_length,
                 num_attention_heads=1,
                 size_per_head=512,
                 query_act=None,
                 key_act=None,
                 value_act=None,
                 has_attention_mask=False,
                 attention_probs_dropout_prob=0.0,
                 use_one_hot_embeddings=False,
                 initializer_range=0.02,
                 do_return_2d_tensor=False,
                 use_relative_positions=False,
                 compute_type=mstype.float32):

        super(BertAttention, self).__init__()
        self.batch_size = batch_size
        self.from_seq_length = from_seq_length
        self.to_seq_length = to_seq_length
        self.num_attention_heads = num_attention_heads
        self.size_per_head = size_per_head
        self.has_attention_mask = has_attention_mask
        self.use_relative_positions = use_relative_positions

        self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))],
                                 dtype=compute_type)
        self.reshape = P.Reshape()
        self.shape_from_2d = (-1, from_tensor_width)
        self.shape_to_2d = (-1, to_tensor_width)
        weight = TruncatedNormal(initializer_range)
        units = num_attention_heads * size_per_head
        self.query_layer = nn.Dense(from_tensor_width,
                                    units,
                                    activation=query_act,
                                    weight_init=weight).to_float(compute_type)
        self.key_layer = nn.Dense(to_tensor_width,
                                  units,
                                  activation=key_act,
                                  weight_init=weight).to_float(compute_type)
        self.value_layer = nn.Dense(to_tensor_width,
                                    units,
                                    activation=value_act,
                                    weight_init=weight).to_float(compute_type)

        self.shape_from = (batch_size, from_seq_length, num_attention_heads,
                           size_per_head)
        self.shape_to = (batch_size, to_seq_length, num_attention_heads,
                         size_per_head)

        self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
        self.multiply = P.Mul()
        self.transpose = P.Transpose()
        self.trans_shape = (0, 2, 1, 3)
        self.trans_shape_relative = (2, 0, 1, 3)
        self.trans_shape_position = (1, 2, 0, 3)
        self.multiply_data = Tensor([
            -10000.0,
        ], dtype=compute_type)
        self.batch_num = batch_size * num_attention_heads
        self.matmul = P.BatchMatMul()

        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(1 - attention_probs_dropout_prob)

        if self.has_attention_mask:
            self.expand_dims = P.ExpandDims()
            self.sub = P.Sub()
            self.add = P.TensorAdd()
            self.cast = P.Cast()
            self.get_dtype = P.DType()
        if do_return_2d_tensor:
            self.shape_return = (batch_size * from_seq_length,
                                 num_attention_heads * size_per_head)
        else:
            self.shape_return = (batch_size, from_seq_length,
                                 num_attention_heads * size_per_head)

        self.cast_compute_type = SaturateCast(dst_type=compute_type)
        if self.use_relative_positions:
            self._generate_relative_positions_embeddings = \
                RelaPosEmbeddingsGenerator(length=to_seq_length,
                                           depth=size_per_head,
                                           max_relative_position=16,
                                           initializer_range=initializer_range,
                                           use_one_hot_embeddings=use_one_hot_embeddings)
Beispiel #6
0
 def __init__(self, num_classes=10, dropout_keep_prob=0.8):
     super(Logits, self).__init__()
     self.avg_pool = nn.AvgPool2d(8, pad_mode='valid')
     self.dropout = nn.Dropout(keep_prob=dropout_keep_prob)
     self.flatten = P.Flatten()
     self.fc = nn.Dense(2048, num_classes)
Beispiel #7
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 has_bias=True,
                 batch_first=False,
                 dropout=0,
                 bidirectional=False):
        super(LSTM, self).__init__()
        validator.check_value_type("batch_first", batch_first, [bool],
                                   self.cls_name)
        validator.check_positive_int(hidden_size, "hidden_size", self.cls_name)
        validator.check_positive_int(num_layers, "num_layers", self.cls_name)
        self.is_ascend = context.get_context("device_target") == "Ascend"

        self.batch_first = batch_first
        self.transpose = P.Transpose()
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.lstm = P.LSTM(input_size=input_size,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           has_bias=has_bias,
                           bidirectional=bidirectional,
                           dropout=float(dropout))

        weight_size = 0
        gate_size = 4 * hidden_size
        stdv = 1 / math.sqrt(hidden_size)
        num_directions = 2 if bidirectional else 1
        if self.is_ascend:
            self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0)
            self.concat = P.Concat(axis=0)
            self.concat_2dim = P.Concat(axis=2)
            self.cast = P.Cast()
            self.shape = P.Shape()
            if dropout != 0:
                self.dropout_op = nn.Dropout(float(dropout))
            b0 = np.zeros(gate_size, dtype=np.float16)
            self.w_list = []
            self.b_list = []
            self.rnns_fw = P.DynamicRNN(forget_bias=0.0)
            self.rnns_bw = P.DynamicRNN(forget_bias=0.0)

            for layer in range(num_layers):
                w_shape = input_size if layer == 0 else (num_directions *
                                                         hidden_size)
                w_np = np.random.uniform(
                    -stdv, stdv,
                    (w_shape + hidden_size, gate_size)).astype(np.float16)
                self.w_list.append(
                    Parameter(initializer(Tensor(w_np),
                                          [w_shape + hidden_size, gate_size]),
                              name='weight_fw' + str(layer)))
                if has_bias:
                    b_np = np.random.uniform(-stdv, stdv,
                                             gate_size).astype(np.float16)
                    self.b_list.append(
                        Parameter(initializer(Tensor(b_np), [gate_size]),
                                  name='bias_fw' + str(layer)))
                else:
                    self.b_list.append(
                        Parameter(initializer(Tensor(b0), [gate_size]),
                                  name='bias_fw' + str(layer)))
                if bidirectional:
                    w_bw_np = np.random.uniform(
                        -stdv, stdv,
                        (w_shape + hidden_size, gate_size)).astype(np.float16)
                    self.w_list.append(
                        Parameter(
                            initializer(Tensor(w_bw_np),
                                        [w_shape + hidden_size, gate_size]),
                            name='weight_bw' + str(layer)))
                    b_bw_np = np.random.uniform(
                        -stdv, stdv,
                        (4 *
                         hidden_size)).astype(np.float16) if has_bias else b0
                    self.b_list.append(
                        Parameter(initializer(Tensor(b_bw_np), [gate_size]),
                                  name='bias_bw' + str(layer)))
            self.w_list = ParameterTuple(self.w_list)
            self.b_list = ParameterTuple(self.b_list)
        else:
            for layer in range(num_layers):
                input_layer_size = input_size if layer == 0 else hidden_size * num_directions
                increment_size = gate_size * input_layer_size
                increment_size += gate_size * hidden_size
                if has_bias:
                    increment_size += 2 * gate_size
                weight_size += increment_size * num_directions
            w_np = np.random.uniform(-stdv, stdv,
                                     (weight_size, 1, 1)).astype(np.float32)
            self.weight = Parameter(initializer(Tensor(w_np),
                                                [weight_size, 1, 1]),
                                    name='weight')
Beispiel #8
0
    def __init__(self, num_classes=1000):
        """ Constructor
        Args:
            num_classes: number of classes.
        """
        super(Xception, self).__init__()
        self.num_classes = num_classes
        self.conv1 = nn.Conv2d(3,
                               32,
                               3,
                               2,
                               pad_mode='valid',
                               weight_init='xavier_uniform')
        self.bn1 = nn.BatchNorm2d(32, momentum=0.9)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(32,
                               64,
                               3,
                               pad_mode='valid',
                               weight_init='xavier_uniform')
        self.bn2 = nn.BatchNorm2d(64, momentum=0.9)

        # Entry flow
        self.block1 = Block(64,
                            128,
                            2,
                            2,
                            start_with_relu=False,
                            grow_first=True)
        self.block2 = Block(128,
                            256,
                            2,
                            2,
                            start_with_relu=True,
                            grow_first=True)
        self.block3 = Block(256,
                            728,
                            2,
                            2,
                            start_with_relu=True,
                            grow_first=True)

        # Middle flow
        self.block4 = Block(728,
                            728,
                            3,
                            1,
                            start_with_relu=True,
                            grow_first=True)
        self.block5 = Block(728,
                            728,
                            3,
                            1,
                            start_with_relu=True,
                            grow_first=True)
        self.block6 = Block(728,
                            728,
                            3,
                            1,
                            start_with_relu=True,
                            grow_first=True)
        self.block7 = Block(728,
                            728,
                            3,
                            1,
                            start_with_relu=True,
                            grow_first=True)

        self.block8 = Block(728,
                            728,
                            3,
                            1,
                            start_with_relu=True,
                            grow_first=True)
        self.block9 = Block(728,
                            728,
                            3,
                            1,
                            start_with_relu=True,
                            grow_first=True)
        self.block10 = Block(728,
                             728,
                             3,
                             1,
                             start_with_relu=True,
                             grow_first=True)
        self.block11 = Block(728,
                             728,
                             3,
                             1,
                             start_with_relu=True,
                             grow_first=True)

        # Exit flow
        self.block12 = Block(728,
                             1024,
                             2,
                             2,
                             start_with_relu=True,
                             grow_first=False)
        self.conv3 = SeparableConv2d(1024, 1536, 3, 1, 1)
        self.bn3 = nn.BatchNorm2d(1536, momentum=0.9)

        self.conv4 = SeparableConv2d(1536, 2048, 3, 1, 1)
        self.bn4 = nn.BatchNorm2d(2048, momentum=0.9)

        self.avg_pool = nn.AvgPool2d(10)
        self.dropout = nn.Dropout()
        self.fc = nn.Dense(2048, num_classes)
 def __init__(self, dropout_prob=0.1):
     super(ResidualConnection, self).__init__()
     self.add = P.TensorAdd()
     self.dropout = nn.Dropout(1 - dropout_prob)
     self.use_dropout = dropout_prob > 0
# limitations under the License.
# ============================================================================
"""inceptionv4_train_export"""

import sys
import numpy as np
from train_utils import SaveInOut, TrainWrap
from official.cv.xception.src.Xception import Xception
import mindspore.common.dtype as mstype
from mindspore import context, Tensor, nn
from mindspore.train.serialization import export

context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", save_graphs=False)


n = Xception(num_classes=1000)
n.dropout = nn.Dropout(keep_prob=1.0)

loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=False)
optimizer = nn.SGD(n.trainable_params(), learning_rate=0.01, momentum=0.9, dampening=0.0, weight_decay=0.0,
                   nesterov=True, loss_scale=1.0)
net = TrainWrap(n, loss_fn, optimizer)

batch = 2
x = Tensor(np.random.randn(batch, 3, 299, 299), mstype.float32)
label = Tensor(np.zeros([batch, 1000]).astype(np.float32))
export(net, x, label, file_name="mindir/xception_train", file_format='MINDIR')

if len(sys.argv) > 1:
    SaveInOut(sys.argv[1] + "xception", x, label, n, net)
Beispiel #11
0
    def __init__(self, config):
        super(SSD300VGG16, self).__init__()

        # VGG16 backbone: block1~5
        self.backbone = vgg16()

        # SSD blocks: block6~7
        self.b6_1 = nn.Conv2d(in_channels=512,
                              out_channels=1024,
                              kernel_size=3,
                              padding=6,
                              dilation=6,
                              pad_mode='pad')
        self.b6_2 = nn.Dropout(0.5)

        self.b7_1 = nn.Conv2d(in_channels=1024,
                              out_channels=1024,
                              kernel_size=1)
        self.b7_2 = nn.Dropout(0.5)

        # Extra Feature Layers: block8~11
        self.b8_1 = nn.Conv2d(in_channels=1024,
                              out_channels=256,
                              kernel_size=1,
                              padding=1,
                              pad_mode='pad')
        self.b8_2 = nn.Conv2d(in_channels=256,
                              out_channels=512,
                              kernel_size=3,
                              stride=2,
                              pad_mode='valid')

        self.b9_1 = nn.Conv2d(in_channels=512,
                              out_channels=128,
                              kernel_size=1,
                              padding=1,
                              pad_mode='pad')
        self.b9_2 = nn.Conv2d(in_channels=128,
                              out_channels=256,
                              kernel_size=3,
                              stride=2,
                              pad_mode='valid')

        self.b10_1 = nn.Conv2d(in_channels=256,
                               out_channels=128,
                               kernel_size=1)
        self.b10_2 = nn.Conv2d(in_channels=128,
                               out_channels=256,
                               kernel_size=3,
                               pad_mode='valid')

        self.b11_1 = nn.Conv2d(in_channels=256,
                               out_channels=128,
                               kernel_size=1)
        self.b11_2 = nn.Conv2d(in_channels=128,
                               out_channels=256,
                               kernel_size=3,
                               pad_mode='valid')

        # boxes
        self.multi_box = MultiBox(config)
        if not self.training:
            self.activation = P.Sigmoid()
    def __init__(self,
                 outer_nc,
                 inner_nc,
                 in_planes=None,
                 dropout=False,
                 submodule=None,
                 outermost=False,
                 innermost=False,
                 alpha=0.2,
                 norm_mode='batch'):
        super(UnetSkipConnectionBlock, self).__init__()
        downnorm = nn.BatchNorm2d(inner_nc)
        upnorm = nn.BatchNorm2d(outer_nc)
        use_bias = False
        if norm_mode == 'instance':
            downnorm = nn.BatchNorm2d(inner_nc, affine=False)
            upnorm = nn.BatchNorm2d(outer_nc, affine=False)
            use_bias = True
        if in_planes is None:
            in_planes = outer_nc
        downconv = nn.Conv2d(in_planes,
                             inner_nc,
                             kernel_size=4,
                             stride=2,
                             padding=1,
                             has_bias=use_bias,
                             pad_mode='pad')
        downrelu = nn.LeakyReLU(alpha)
        uprelu = nn.ReLU()

        if outermost:
            upconv = nn.Conv2dTranspose(inner_nc * 2,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        pad_mode='pad')
            down = [downconv]
            up = [uprelu, upconv, nn.Tanh()]
            model = down + [submodule] + up
        elif innermost:
            upconv = nn.Conv2dTranspose(inner_nc,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        has_bias=use_bias,
                                        pad_mode='pad')
            down = [downrelu, downconv]
            up = [uprelu, upconv, upnorm]
            model = down + up
        else:
            upconv = nn.Conv2dTranspose(inner_nc * 2,
                                        outer_nc,
                                        kernel_size=4,
                                        stride=2,
                                        padding=1,
                                        has_bias=use_bias,
                                        pad_mode='pad')
            down = [downrelu, downconv, downnorm]
            up = [uprelu, upconv, upnorm]

            model = down + [submodule] + up
            if dropout:
                model.append(nn.Dropout(0.5))

        self.model = nn.SequentialCell(model)
        self.skip_connections = not outermost
        self.concat = ops.Concat(axis=1)
Beispiel #13
0
    def __init__(self,
                 num_classes,
                 is_training=True,
                 stem_filters=32,
                 penultimate_filters=1056,
                 filters_multiplier=2):
        super(NASNetAMobile, self).__init__()
        self.is_training = is_training
        self.stem_filters = stem_filters
        self.penultimate_filters = penultimate_filters
        self.filters_multiplier = filters_multiplier

        filters = self.penultimate_filters // 24
        # 24 is default value for the architecture

        self.conv0 = nn.SequentialCell([
            nn.Conv2d(in_channels=3,
                      out_channels=self.stem_filters,
                      kernel_size=3,
                      stride=2,
                      pad_mode='pad',
                      padding=0,
                      has_bias=False),
            nn.BatchNorm2d(num_features=self.stem_filters,
                           eps=0.001,
                           momentum=0.9,
                           affine=True)
        ])

        self.cell_stem_0 = CellStem0(self.stem_filters,
                                     num_filters=filters //
                                     (filters_multiplier**2))
        self.cell_stem_1 = CellStem1(self.stem_filters,
                                     num_filters=filters // filters_multiplier)

        self.cell_0 = FirstCell(
            in_channels_left=filters,
            out_channels_left=filters // 2,  # 1, 0.5
            in_channels_right=2 * filters,
            out_channels_right=filters)  # 2, 1
        self.cell_1 = NormalCell(
            in_channels_left=2 * filters,
            out_channels_left=filters,  # 2, 1
            in_channels_right=6 * filters,
            out_channels_right=filters)  # 6, 1
        self.cell_2 = NormalCell(
            in_channels_left=6 * filters,
            out_channels_left=filters,  # 6, 1
            in_channels_right=6 * filters,
            out_channels_right=filters)  # 6, 1
        self.cell_3 = NormalCell(
            in_channels_left=6 * filters,
            out_channels_left=filters,  # 6, 1
            in_channels_right=6 * filters,
            out_channels_right=filters)  # 6, 1

        self.reduction_cell_0 = ReductionCell0(
            in_channels_left=6 * filters,
            out_channels_left=2 * filters,  # 6, 2
            in_channels_right=6 * filters,
            out_channels_right=2 * filters)  # 6, 2

        self.cell_6 = FirstCell(
            in_channels_left=6 * filters,
            out_channels_left=filters,  # 6, 1
            in_channels_right=8 * filters,
            out_channels_right=2 * filters)  # 8, 2
        self.cell_7 = NormalCell(
            in_channels_left=8 * filters,
            out_channels_left=2 * filters,  # 8, 2
            in_channels_right=12 * filters,
            out_channels_right=2 * filters)  # 12, 2
        self.cell_8 = NormalCell(
            in_channels_left=12 * filters,
            out_channels_left=2 * filters,  # 12, 2
            in_channels_right=12 * filters,
            out_channels_right=2 * filters)  # 12, 2
        self.cell_9 = NormalCell(
            in_channels_left=12 * filters,
            out_channels_left=2 * filters,  # 12, 2
            in_channels_right=12 * filters,
            out_channels_right=2 * filters)  # 12, 2

        if is_training:
            self.aux_logits = AuxLogits(in_channels=12 * filters,
                                        out_channels=num_classes)

        self.reduction_cell_1 = ReductionCell1(
            in_channels_left=12 * filters,
            out_channels_left=4 * filters,  # 12, 4
            in_channels_right=12 * filters,
            out_channels_right=4 * filters)  # 12, 4

        self.cell_12 = FirstCell(
            in_channels_left=12 * filters,
            out_channels_left=2 * filters,  # 12, 2
            in_channels_right=16 * filters,
            out_channels_right=4 * filters)  # 16, 4
        self.cell_13 = NormalCell(
            in_channels_left=16 * filters,
            out_channels_left=4 * filters,  # 16, 4
            in_channels_right=24 * filters,
            out_channels_right=4 * filters)  # 24, 4
        self.cell_14 = NormalCell(
            in_channels_left=24 * filters,
            out_channels_left=4 * filters,  # 24, 4
            in_channels_right=24 * filters,
            out_channels_right=4 * filters)  # 24, 4
        self.cell_15 = NormalCell(
            in_channels_left=24 * filters,
            out_channels_left=4 * filters,  # 24, 4
            in_channels_right=24 * filters,
            out_channels_right=4 * filters)  # 24, 4

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(keep_prob=0.5)
        self.classifier = nn.Dense(in_channels=24 * filters,
                                   out_channels=num_classes)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self._initialize_weights()
Beispiel #14
0
    def __init__(self,
                 model_cfgs,
                 num_classes=1000,
                 multiplier=1.,
                 final_drop=0.,
                 round_nearest=8):
        super(MobileNetV3, self).__init__()
        self.cfgs = model_cfgs['cfg']
        self.inplanes = 16
        self.features = []
        first_conv_in_channel = 3
        first_conv_out_channel = _make_divisible(multiplier * self.inplanes)

        self.features.append(
            nn.Conv2d(in_channels=first_conv_in_channel,
                      out_channels=first_conv_out_channel,
                      kernel_size=3,
                      padding=1,
                      stride=2,
                      has_bias=False,
                      pad_mode='pad'))
        self.features.append(nn.BatchNorm2d(first_conv_out_channel))
        self.features.append(Activation('hswish'))
        for layer_cfg in self.cfgs:
            self.features.append(
                self._make_layer(
                    kernel_size=layer_cfg[0],
                    exp_ch=_make_divisible(multiplier * layer_cfg[1]),
                    out_channel=_make_divisible(multiplier * layer_cfg[2]),
                    use_se=layer_cfg[3],
                    act_func=layer_cfg[4],
                    stride=layer_cfg[5]))
        output_channel = _make_divisible(multiplier *
                                         model_cfgs["cls_ch_squeeze"])
        self.features.append(
            nn.Conv2d(in_channels=_make_divisible(multiplier *
                                                  self.cfgs[-1][2]),
                      out_channels=output_channel,
                      kernel_size=1,
                      padding=0,
                      stride=1,
                      has_bias=False,
                      pad_mode='pad'))
        self.features.append(nn.BatchNorm2d(output_channel))
        self.features.append(Activation('hswish'))
        self.features.append(GlobalAvgPooling(keep_dims=True))
        self.features.append(
            nn.Conv2d(in_channels=output_channel,
                      out_channels=model_cfgs['cls_ch_expand'],
                      kernel_size=1,
                      padding=0,
                      stride=1,
                      has_bias=False,
                      pad_mode='pad'))
        self.features.append(Activation('hswish'))
        if final_drop > 0:
            self.features.append((nn.Dropout(final_drop)))

        # make it nn.CellList
        self.features = nn.SequentialCell(self.features)
        self.output = nn.Conv2d(in_channels=model_cfgs['cls_ch_expand'],
                                out_channels=num_classes,
                                kernel_size=1,
                                has_bias=True,
                                pad_mode='pad')
        self.squeeze = P.Squeeze(axis=(2, 3))

        self._initialize_weights()
    def __init__(self,
                 src_dim,
                 tgt_dim,
                 attn_embed_dim,
                 num_attn_heads=1,
                 query_act=None,
                 key_act=None,
                 value_act=None,
                 out_act=None,
                 has_attention_mask=True,
                 attention_dropout_prob=0.0,
                 initializer_range=0.02,
                 do_return_2d_tensor=True,
                 compute_type=mstype.float32):
        super(MultiHeadAttention, self).__init__()
        if attn_embed_dim % num_attn_heads != 0:
            raise ValueError(
                f"The hidden size {attn_embed_dim} is not a multiple of the "
                f"number of attention heads {num_attn_heads}")

        self.attn_embed_dim = attn_embed_dim
        self.num_attn_heads = num_attn_heads
        self.size_per_head = attn_embed_dim // num_attn_heads
        self.src_dim = src_dim
        self.tgt_dim = tgt_dim
        self.has_attention_mask = has_attention_mask

        if attn_embed_dim != self.num_attn_heads * self.size_per_head:
            raise ValueError(
                "`attn_embed_dim` must be divided by num_attn_heads.")

        self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))],
                                 dtype=compute_type)
        self.reshape = P.Reshape()

        self.query_layer = nn.Dense(
            src_dim,
            attn_embed_dim,
            activation=query_act,
            has_bias=True,
            weight_init=TruncatedNormal(initializer_range)).to_float(
                compute_type)
        self.key_layer = nn.Dense(
            tgt_dim,
            attn_embed_dim,
            activation=key_act,
            has_bias=True,
            weight_init=TruncatedNormal(initializer_range)).to_float(
                compute_type)
        self.value_layer = nn.Dense(
            tgt_dim,
            attn_embed_dim,
            activation=value_act,
            has_bias=True,
            weight_init=TruncatedNormal(initializer_range)).to_float(
                compute_type)
        self.out_layer = nn.Dense(
            attn_embed_dim,
            attn_embed_dim,
            activation=out_act,
            has_bias=True,
            weight_init=TruncatedNormal(initializer_range)).to_float(
                compute_type)

        self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
        self.multiply = P.Mul()
        self.transpose = P.Transpose()
        self.multiply_data = Tensor([-10000.0], dtype=compute_type)
        self.matmul = P.BatchMatMul()

        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout(1.0 - attention_dropout_prob)

        if self.has_attention_mask:
            self.expand_dims = P.ExpandDims()
            self.sub = P.Sub()
            self.add = P.TensorAdd()
            self.cast = P.Cast()
            self.get_dtype = P.DType()

        self.do_return_2d_tensor = do_return_2d_tensor
        self.cast_compute_type = SaturateCast(dst_type=compute_type)
        self.softmax_cast = P.Cast()
        self.get_shape = P.Shape()
        self.transpose_orders = (0, 2, 1, 3)
Beispiel #16
0
 def __init__(self, num_classes=1000, create_aux_logits=False):
     super(Inceptionv3, self).__init__()
     self.create_aux_logits = create_aux_logits
     # N x 3 x 299 x 299
     self.Conv2d_1a_3x3 = Conv2dBlock(in_channels=3,
                                      out_channels=32,
                                      kernel_size=3,
                                      stride=2,
                                      pad_mode="valid")
     # N x 32 x 149 x 149
     self.Conv2d_2a_3x3 = Conv2dBlock(in_channels=32,
                                      out_channels=32,
                                      kernel_size=3,
                                      pad_mode="valid")
     # N x 32 x 147 x 147
     self.Conv2d_2b_3x3 = Conv2dBlock(in_channels=32,
                                      out_channels=64,
                                      kernel_size=3,
                                      pad_mode="same")
     # N x 64 x 147 x 147
     self.MaxPool_3a_3x3 = nn.MaxPool2d(kernel_size=3,
                                        stride=2,
                                        pad_mode="valid")
     # N x 64 x 73 x 73
     self.Conv2d_3b_1x1 = Conv2dBlock(in_channels=64,
                                      out_channels=80,
                                      kernel_size=1)
     # N x 80 x 73 x 73
     self.Conv2d_4a_3x3 = Conv2dBlock(in_channels=80,
                                      out_channels=192,
                                      kernel_size=3,
                                      pad_mode="valid")
     # N x 192 x 71 x 71
     self.MaxPool_5a_3x3 = nn.MaxPool2d(kernel_size=3,
                                        stride=2,
                                        pad_mode="valid")
     # N x 192 x 35 x 35
     self.Mixed_5b = InceptionBlockA(in_channels=192, var_channels=32)
     # N x 256 x 35 x 35
     self.Mixed_5c = InceptionBlockA(in_channels=256, var_channels=64)
     # N x 288 x 35 x 35
     self.Mixed_5d = InceptionBlockA(in_channels=288, var_channels=64)
     # N x 288 x 35 x 35
     self.Mixed_6a = InceptionBlockB_1(in_channels=288)
     # N x 768 x 17 x 17
     self.Mixed_6b = InceptionBlockB_2(in_channels=768, var_channels=128)
     # N x 768 x 17 x 17
     self.Mixed_6c = InceptionBlockB_2(in_channels=768, var_channels=160)
     # N x 768 x 17 x 17
     self.Mixed_6d = InceptionBlockB_2(in_channels=768, var_channels=160)
     # N x 768 x 17 x 17
     self.Mixed_6e = InceptionBlockB_2(in_channels=768, var_channels=192)
     # N x 768 x 17 x 17
     if create_aux_logits:
         self.AuxLogits = InceptionBlockAux(in_channels=768,
                                            num_classes=num_classes)
     # N x 768 x 17 x 17
     self.Mixed_7a = InceptionBlockC_1(in_channels=768)
     # N x 1280 x 8 x 8
     self.Mixed_7b = InceptionBlockC_2(in_channels=1280)
     # N x 2048 x 8 x 8
     self.Mixed_7c = InceptionBlockC_2(in_channels=2048)
     # N x 2048 x 8 x 8
     self.mean = P.ReduceMean(keep_dims=True)
     # N x 2048 x 1 x 1
     self.Dropout_last = nn.Dropout(keep_prob=0.8)
     # N x 2048 x 1 x 1
     self.Conv2d_last = Conv2dBlock(in_channels=2048,
                                    out_channels=num_classes,
                                    kernel_size=1,
                                    with_relu=False,
                                    with_bn=False)
     # N x num_classes x 1 x 1
     self.fc = nn.Dense(in_channels=2048, out_channels=num_classes)
     self.flatten = nn.Flatten()
Beispiel #17
0
    def __init__(self,
                 num_classes,
                 input_nc=1,
                 padding=1,
                 pad_mode='pad',
                 has_bias=False,
                 use_dropout=False):
        super(DFCNN, self).__init__()

        if pad_mode == 'pad':
            assert padding >= 0, "when the pad_mode is 'pad', the padding must be greater than or equal to 0!"

        if pad_mode == 'same' or pad_mode == 'valid':
            assert padding == 0, "when the pad_mode is 'same' or 'valid', the padding must be equal to 0!"

        self.use_dropout = use_dropout

        # structure

        # seq 1
        self.conv11 = nn.Conv2d(in_channels=input_nc,
                                out_channels=64,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn11 = nn.BatchNorm2d(64)
        self.relu11 = nn.ReLU()
        self.conv12 = nn.Conv2d(in_channels=64,
                                out_channels=64,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn12 = nn.BatchNorm2d(64)
        self.relu12 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='valid')

        # seq 2
        self.conv21 = nn.Conv2d(in_channels=64,
                                out_channels=128,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn21 = nn.BatchNorm2d(128)
        self.relu21 = nn.ReLU()
        self.conv22 = nn.Conv2d(in_channels=128,
                                out_channels=128,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn22 = nn.BatchNorm2d(128)
        self.relu22 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='valid')

        # seq 3
        self.conv31 = nn.Conv2d(in_channels=128,
                                out_channels=256,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn31 = nn.BatchNorm2d(256)
        self.relu31 = nn.ReLU()
        self.conv32 = nn.Conv2d(in_channels=256,
                                out_channels=256,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn32 = nn.BatchNorm2d(256)
        self.relu32 = nn.ReLU()
        self.conv33 = nn.Conv2d(in_channels=256,
                                out_channels=256,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn33 = nn.BatchNorm2d(256)
        self.relu33 = nn.ReLU()
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='valid')

        # seq 4
        self.conv41 = nn.Conv2d(in_channels=256,
                                out_channels=512,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn41 = nn.BatchNorm2d(512)
        self.relu41 = nn.ReLU()
        self.conv42 = nn.Conv2d(in_channels=512,
                                out_channels=512,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn42 = nn.BatchNorm2d(512)
        self.relu42 = nn.ReLU()
        self.conv43 = nn.Conv2d(in_channels=512,
                                out_channels=512,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn43 = nn.BatchNorm2d(512)
        self.relu43 = nn.ReLU()
        self.maxpool4 = nn.MaxPool2d(kernel_size=1, stride=1, pad_mode='valid')

        # seq 5
        self.conv51 = nn.Conv2d(in_channels=512,
                                out_channels=512,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn51 = nn.BatchNorm2d(512)
        self.relu51 = nn.ReLU()
        self.conv52 = nn.Conv2d(in_channels=512,
                                out_channels=512,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn52 = nn.BatchNorm2d(512)
        self.relu52 = nn.ReLU()
        self.conv53 = nn.Conv2d(in_channels=512,
                                out_channels=512,
                                kernel_size=3,
                                stride=1,
                                padding=padding,
                                has_bias=has_bias,
                                pad_mode=pad_mode)
        self.bn53 = nn.BatchNorm2d(512)
        self.relu53 = nn.ReLU()
        self.maxpool5 = nn.MaxPool2d(kernel_size=1, stride=1, pad_mode='valid')

        self.bn = nn.BatchNorm2d(512)
        if self.use_dropout:
            self.drop1 = nn.Dropout(0.8)
            self.drop2 = nn.Dropout(0.8)
            self.drop3 = nn.Dropout(0.8)
            self.drop4 = nn.Dropout(0.8)
            self.drop5 = nn.Dropout(0.8)
            self.drop_fc1 = nn.Dropout(0.5)
            self.drop_fc2 = nn.Dropout(0.5)
        self.fc1 = nn.Dense(25 * 512, 4096, activation='relu')
        self.fc2 = nn.Dense(4096, 4096, activation='relu')
        self.fc3 = nn.Dense(4096, num_classes, activation='relu')

        # operation
        self.transpose = ops.Transpose()
        self.reshape = ops.Reshape()
Beispiel #18
0
    def __init__(self, weight, vocab_size, cell, batch_size):
        super(textrcnn, self).__init__()
        self.num_hiddens = 512
        self.embed_size = 300
        self.num_classes = 2
        self.batch_size = batch_size
        k = (1 / self.num_hiddens)**0.5

        self.embedding = nn.Embedding(vocab_size,
                                      self.embed_size,
                                      embedding_table=weight)
        self.embedding.embedding_table.requires_grad = False
        self.cell = cell

        self.cast = P.Cast()

        self.h1 = Tensor(
            np.zeros(shape=(self.batch_size,
                            self.num_hiddens)).astype(np.float16))
        self.c1 = Tensor(
            np.zeros(shape=(self.batch_size,
                            self.num_hiddens)).astype(np.float16))

        if cell == "lstm":
            self.lstm = P.DynamicRNN(forget_bias=0.0)
            self.w1_fw = Parameter(np.random.uniform(
                -k, k, (self.embed_size + self.num_hiddens,
                        4 * self.num_hiddens)).astype(np.float16),
                                   name="w1_fw")
            self.b1_fw = Parameter(np.random.uniform(
                -k, k, (4 * self.num_hiddens)).astype(np.float16),
                                   name="b1_fw")
            self.w1_bw = Parameter(np.random.uniform(
                -k, k, (self.embed_size + self.num_hiddens,
                        4 * self.num_hiddens)).astype(np.float16),
                                   name="w1_bw")
            self.b1_bw = Parameter(np.random.uniform(
                -k, k, (4 * self.num_hiddens)).astype(np.float16),
                                   name="b1_bw")
            self.h1 = Tensor(
                np.zeros(shape=(1, self.batch_size,
                                self.num_hiddens)).astype(np.float16))
            self.c1 = Tensor(
                np.zeros(shape=(1, self.batch_size,
                                self.num_hiddens)).astype(np.float16))

        if cell == "vanilla":
            self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens)
            self.rnnU_fw = nn.Dense(self.embed_size, self.num_hiddens)
            self.rnnW_bw = nn.Dense(self.num_hiddens, self.num_hiddens)
            self.rnnU_bw = nn.Dense(self.embed_size, self.num_hiddens)

        if cell == "gru":
            self.rnnWr_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWz_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWh_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWr_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.ones = Tensor(
                np.ones(shape=(self.batch_size,
                               self.num_hiddens)).astype(np.float16))
            self.rnnWr_fw.to_float(mstype.float16)
            self.rnnWz_fw.to_float(mstype.float16)
            self.rnnWh_fw.to_float(mstype.float16)
            self.rnnWr_bw.to_float(mstype.float16)
            self.rnnWz_bw.to_float(mstype.float16)
            self.rnnWh_bw.to_float(mstype.float16)

        self.transpose = P.Transpose()
        self.reduce_max = P.ReduceMax()
        self.expand_dims = P.ExpandDims()
        self.concat = P.Concat()

        self.reshape = P.Reshape()
        self.left_pad_tensor = Tensor(
            np.zeros(
                (1, self.batch_size, self.num_hiddens)).astype(np.float16))
        self.right_pad_tensor = Tensor(
            np.zeros(
                (1, self.batch_size, self.num_hiddens)).astype(np.float16))
        self.output_dense = nn.Dense(self.num_hiddens * 1, 2)
        self.concat0 = P.Concat(0)
        self.concat2 = P.Concat(2)
        self.concat1 = P.Concat(1)
        self.text_rep_dense = nn.Dense(2 * self.num_hiddens + self.embed_size,
                                       self.num_hiddens)
        self.mydense = nn.Dense(self.num_hiddens, 2)
        self.drop_out = nn.Dropout(keep_prob=0.7)
        self.tanh = P.Tanh()
        self.sigmoid = P.Sigmoid()
        self.slice = P.Slice()
        self.text_rep_dense.to_float(mstype.float16)
        self.mydense.to_float(mstype.float16)
        self.output_dense.to_float(mstype.float16)
Beispiel #19
0
        'desc_inputs': [[1, 512]],
        'desc_bprop': [[1, 512]]}),
    ('LogicalNot', {
        'block': P.LogicalNot(),
        'desc_inputs': [convert([256], np.bool_)],
        'desc_bprop': [[256]]}),  # 自定义算子 input bool没转换,gongchen提单。
    ('Equal', {
        'block': P.Equal(),
        'desc_inputs': [convert([256], np.float16), convert([256], np.float16)],
        'desc_bprop': [[256]]}),
    ('Greater', {
        'block': P.Greater(),
        'desc_inputs': [convert([256], np.float16), convert([256], np.float16)],
        'desc_bprop': [[256]]}),
    ('Dropout', {
        'block': nn.Dropout(),
        'desc_inputs': [[1, 512, 7, 7]],
        'desc_bprop': [[1, 512, 7, 7]]}),  # 输入有标量插件产生了段错误。
    ('MatMul', {
        'block': P.MatMul(),
        'desc_inputs': [[64, 512], [512, 64]],  # fp16不行。很有问题。
        'desc_bprop': [[64, 64]]}),
    ('Maximum', {
        'block': P.Maximum(),
        'desc_inputs': [[64, 1], [64, 1]],
        'desc_bprop': [[64, 1]]}),
]

test_case_lists = [test_case_reid_ops]
test_case = functools.reduce(lambda x, y: x + y, test_case_lists)
# use -k to select certain testcast
Beispiel #20
0
def test_check_dropout_1():
    x = Tensor(np.ones([20, 16, 50]), mstype.float32)
    m = nn.Dropout(0.8)
    m(x)
    def __init__(self, config):
        super(PANGUALPHA_Model, self).__init__()
        self.get_attention_mask = AttentionMask(config)
        self.word_embedding = EmbeddingLookup(config).set_comm_fusion(1)
        self.eod_reset = config.eod_reset
        if config.load_ckpt_path:
            # Loading the embedding table from the ckpt path:
            embedding_path = os.path.join(config.load_ckpt_path, 'position_embedding.npy')
            if os.path.exists(embedding_path):
                p_table = np.load(embedding_path)
                position_table_param = Tensor(p_table, mstype.float32)
            else:
                raise ValueError(f"{embedding_path} file not exits, please check whether position_embedding file exit.")
        else:
            position_table_param = TruncatedNormal(0.02)
            
        self.position_embedding = nn.Embedding(
            config.seq_length,
            config.embedding_size,
            embedding_table=position_table_param).set_comm_fusion(1)
        self.word_embedding.embedding_table.parallel_optimizer = False
        self.position_embedding.embedding_table.parallel_optimizer = False
        self.position_embedding.gather.shard(((1, 1), (config.dp,)))
        self.position_embedding.expand.shard(((config.dp, 1),))
        self.blocks = nn.CellList()
        fusion_group_num = 4
        fusion_group_size = config.num_layers // fusion_group_num
        fusion_group_size = max(fusion_group_size, 1)

        num_layers = config.num_layers - 1
        self.num_layers = num_layers

        for i in range(num_layers):
            per_block = Block(config, i + 1).set_comm_fusion(int(i / fusion_group_size) + 2)
            per_block.recompute()
            per_block.attention.dropout.dropout_gen_mask.recompute(False)
            per_block.attention.prob_dropout.dropout_gen_mask.recompute(False)
            per_block.output.dropout.dropout_gen_mask.recompute(False)
            per_block.attention.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
            per_block.attention.prob_dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
            per_block.output.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
            self.blocks.append(per_block)

        if config.self_layernorm:
            self.layernorm = LayerNorm((config.embedding_size,), config.dp).to_float(
                mstype.float32).set_comm_fusion(
                int((num_layers - 1) / fusion_group_size) + 2)
        else:
            self.layernorm = nn.LayerNorm((config.embedding_size,)).to_float(
                mstype.float32).set_comm_fusion(
                int((num_layers - 1) / fusion_group_size) + 2)
            self.layernorm.layer_norm.shard(((config.dp, 1, 1), (1,), (1,)))
        self.layernorm.gamma.parallel_optimizer = False
        self.layernorm.beta.parallel_optimizer = False
        self.use_past = config.use_past
        self.past = tuple([None] * config.num_layers)
        self.add = P.TensorAdd().shard(((config.dp, 1, 1), (config.dp, 1, 1)))
        self.expand_dims = P.ExpandDims().shard(((config.dp, 1, 1),))
        self.dtype = config.compute_dtype
        self.dropout = nn.Dropout(1 - config.dropout_rate)
        self.dropout.dropout_gen_mask.shard(((config.dp, 1, 1),))
        self.dropout.dropout_do_mask.shard(((config.dp, 1, 1),))

        if config.load_ckpt_path:
            # Loading the embedding table from the ckpt path:
            embedding_path = os.path.join(config.load_ckpt_path, 'top_query_embedding.npy')
            if os.path.exists(embedding_path):
                top_query_table = np.load(embedding_path)
                top_query_table_param = Tensor(top_query_table, mstype.float32)
            else:
                raise ValueError(f"{embedding_path} file not exits, please check whether top_query_embedding file exist.")
        else:
            top_query_table_param = TruncatedNormal(0.02)
            
        self.top_query_embedding = nn.Embedding(config.seq_length, config.embedding_size, \
                                                embedding_table=top_query_table_param).set_comm_fusion(
            int((config.num_layers - 1) / fusion_group_num) + 2)
        self.top_query_embedding.embedding_table.parallel_optimizer = False
        self.top_query_embedding.gather.shard(((1, 1), (config.dp,)))
        self.top_query_embedding.expand.shard(((config.dp, 1),))
        self.top_query_layer = QueryLayer(config)

        self.top_query_layer.recompute()

        self.top_query_layer.output.dropout.dropout_gen_mask.recompute(False)
        self.top_query_layer.attention.dropout.dropout_gen_mask.recompute(False)
        self.top_query_layer.attention.prob_dropout.dropout_gen_mask.recompute(False)

        self.top_query_layer.output.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
        self.top_query_layer.attention.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
        self.top_query_layer.attention.prob_dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)

        self.top_query_layer.set_comm_fusion(int((config.num_layers - 1) / fusion_group_num) + 2)
Beispiel #22
0
def test_check_dropout_3():
    x = Tensor(np.ones([20, 16, 50]), mstype.float32)
    m = nn.Dropout(0.3, seed0=1, seed1=1)
    m(x)
Beispiel #23
0
    def __init__(self):
        super(PReLUGradNet, self).__init__()
        self.prelu_grad = G.PReLUGrad()

    def construct(self, dout, x, w):
        return self.prelu_grad(dout, x, w)


test_cases = [
    ('SoftMaxGrad', {
        'block': SoftMaxGrad(VirtualNetWithLoss(P.Softmax())),
        'desc_inputs': [[128, 32, 32, 64]],
        'desc_bprop': [[128, 32, 32, 64]],
    }),
    ('DropoutGrad', {
        'block': DropoutGrad(VirtualNetWithLoss(nn.Dropout())),
        'desc_inputs': [[128, 32, 32, 64]],
        'desc_bprop': [[128, 32, 32, 64]],
    }),
    ('ScalarSummary', {
        'block': ScalarSummaryNet(),
        'desc_inputs': [Tensor(2.2)],
    }),
    ('L2Normalize', {
        'block':
        L2NormalizeNet(),
        'desc_inputs': [
            Tensor(np.array([[1.0, 2, 3], [4.0, 5, 6], [7.0, 8, 9]]),
                   mindspore.float32)
        ],
    }),
Beispiel #24
0
    def __init__(self,
                 num_classes,
                 feature_shape,
                 backbone,
                 channel,
                 depth,
                 scale_sizes,
                 atrous_rates,
                 decoder_output_stride,
                 output_stride,
                 fine_tune_batch_norm=False):
        super(SingleDeepLabV3, self).__init__()
        self.num_classes = num_classes
        self.channel = channel
        self.depth = depth
        self.scale_sizes = []
        for scale_size in np.sort(scale_sizes):
            self.scale_sizes.append(scale_size)
        self.net = backbone
        self.aspp = ASPP(channel=self.channel,
                         depth=self.depth,
                         feature_shape=[feature_shape[2], feature_shape[3]],
                         scale_sizes=self.scale_sizes,
                         atrous_rates=atrous_rates,
                         output_stride=output_stride,
                         fine_tune_batch_norm=fine_tune_batch_norm)

        atrous_rates_len = 0
        if atrous_rates is not None:
            atrous_rates_len = len(atrous_rates)
        self.fc1 = _conv_bn_relu(depth * (2 + atrous_rates_len),
                                 depth,
                                 ksize=1,
                                 stride=1,
                                 use_batch_statistics=fine_tune_batch_norm)
        self.fc2 = nn.Conv2d(depth,
                             num_classes,
                             kernel_size=1,
                             stride=1,
                             has_bias=True)
        self.upsample = P.ResizeBilinear(
            (int(feature_shape[2]), int(feature_shape[3])), align_corners=True)
        self.samples = []
        for scale_size in self.scale_sizes:
            self.samples.append(SampleBlock(feature_shape, scale_size))
        self.samples = nn.CellList(self.samples)
        self.feature_shape = [
            float(feature_shape[0]),
            float(feature_shape[1]),
            float(feature_shape[2]),
            float(feature_shape[3])
        ]

        self.pad = P.Pad(((0, 0), (0, 0), (1, 1), (1, 1)))
        self.dropout = nn.Dropout(keep_prob=0.9)
        self.shape = P.Shape()
        self.decoder_output_stride = decoder_output_stride
        if decoder_output_stride is not None:
            self.decoder = Decoder(
                low_level_channel=depth,
                channel=depth,
                depth=depth,
                feature_shape=[feature_shape[2], feature_shape[3]],
                scale_sizes=self.scale_sizes,
                decoder_output_stride=decoder_output_stride,
                fine_tune_batch_norm=fine_tune_batch_norm)
Beispiel #25
0
     'desc_inputs': [[3, 2, 1, 3], Tensor(np.array([[0, 1], [0, 1], [0, 1]]).astype(np.int32))],
     'desc_bprop': [[4, 1, 3]],
     'skip': ['backward']}),
 ('DropoutGenMask', {
     'block': P.DropoutGenMask(),
     'desc_const': [(2, 2), Tensor(0.5, mstype.float32)],
     'desc_inputs': [],
     'desc_bprop': [Tensor(np.ones(1).astype(np.int8))],
     'skip': ['backward']}),
 ('DropoutDoMask', {
     'block': P.DropoutDoMask(),
     'desc_const': [Tensor(0.5)],
     'desc_inputs': [[64, 12, 128, 128], Tensor(np.ones(1572864).astype(np.uint8))],
     'desc_bprop': [[64, 12, 128, 128]]}),
 ('Dropout', {
     'block': nn.Dropout(0.5),
     'desc_inputs': [[64, 12, 128, 128]],
     'desc_bprop': [[64, 12, 128, 128]]}),
 ('ReduceMean0', {
     'block': P.ReduceMean(),
     'desc_const': [(2,)],
     'desc_inputs': [[3, 2, 2]],
     'desc_bprop': [[3, 2]]}),
 ('ReduceMean1', {
     'block': P.ReduceMean(),
     'desc_const': [2],
     'desc_inputs': [[3, 2, 2]],
     'desc_bprop': [[3, 2]]}),
 ('All', {
     'block': P.ReduceAll(),
     'desc_const': [(1,)],
Beispiel #26
0
 def __init__(self, model_settings, model_size_info):
     super(DSCNN, self).__init__()
     # N C H W
     label_count = model_settings['label_count']
     input_frequency_size = model_settings['dct_coefficient_count']
     input_time_size = model_settings['spectrogram_length']
     t_dim = input_time_size
     f_dim = input_frequency_size
     num_layers = model_size_info[0]
     conv_feat = [None] * num_layers
     conv_kt = [None] * num_layers
     conv_kf = [None] * num_layers
     conv_st = [None] * num_layers
     conv_sf = [None] * num_layers
     i = 1
     for layer_no in range(0, num_layers):
         conv_feat[layer_no] = model_size_info[i]
         i += 1
         conv_kt[layer_no] = model_size_info[i]
         i += 1
         conv_kf[layer_no] = model_size_info[i]
         i += 1
         conv_st[layer_no] = model_size_info[i]
         i += 1
         conv_sf[layer_no] = model_size_info[i]
         i += 1
     seq_cell = []
     in_channel = 1
     for layer_no in range(0, num_layers):
         if layer_no == 0:
             seq_cell.append(
                 nn.Conv2d(in_channels=in_channel,
                           out_channels=conv_feat[layer_no],
                           kernel_size=(conv_kt[layer_no],
                                        conv_kf[layer_no]),
                           stride=(conv_st[layer_no], conv_sf[layer_no]),
                           pad_mode="same",
                           padding=0,
                           has_bias=False))
             seq_cell.append(
                 nn.BatchNorm2d(num_features=conv_feat[layer_no],
                                momentum=0.98))
             in_channel = conv_feat[layer_no]
         else:
             seq_cell.append(
                 DepthWiseConv(in_planes=in_channel,
                               kernel_size=(conv_kt[layer_no],
                                            conv_kf[layer_no]),
                               stride=(conv_st[layer_no],
                                       conv_sf[layer_no]),
                               pad_mode='same',
                               pad=0))
             seq_cell.append(
                 nn.BatchNorm2d(num_features=in_channel, momentum=0.98))
             seq_cell.append(nn.ReLU())
             seq_cell.append(
                 nn.Conv2d(in_channels=in_channel,
                           out_channels=conv_feat[layer_no],
                           kernel_size=(1, 1),
                           pad_mode="same"))
             seq_cell.append(
                 nn.BatchNorm2d(num_features=conv_feat[layer_no],
                                momentum=0.98))
             seq_cell.append(nn.ReLU())
             in_channel = conv_feat[layer_no]
         t_dim = math.ceil(t_dim / float(conv_st[layer_no]))
         f_dim = math.ceil(f_dim / float(conv_sf[layer_no]))
     seq_cell.append(nn.AvgPool2d(kernel_size=(t_dim, f_dim)))  # to fix ?
     seq_cell.append(nn.Flatten())
     seq_cell.append(nn.Dropout(model_settings['dropout1']))
     seq_cell.append(nn.Dense(in_channel, label_count))
     self.model = nn.SequentialCell(seq_cell)
Beispiel #27
0
 def __init__(self):
     super(Net_dropout, self).__init__()
     self.dropout = nn.Dropout(0.5)
Beispiel #28
0
    def __init__(self,
                 device_target,
                 num_classes=1000,
                 width_mult=1.,
                 has_dropout=False,
                 inverted_residual_setting=None,
                 round_nearest=8):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        last_channel = 1280
        # setting of inverted residual blocks
        self.cfgs = inverted_residual_setting
        if inverted_residual_setting is None:
            self.cfgs = [
                # t, c, n, s
                [1, 16, 1, 1],
                [6, 24, 2, 2],
                [6, 32, 3, 2],
                [6, 64, 4, 2],
                [6, 96, 3, 1],
                [6, 160, 3, 2],
                [6, 320, 1, 1],
            ]

        # building first layer
        input_channel = _make_divisible(input_channel * width_mult,
                                        round_nearest)
        self.out_channels = _make_divisible(
            last_channel * max(1.0, width_mult), round_nearest)
        features = [ConvBNReLU(device_target, 3, input_channel, stride=2)]
        # building inverted residual blocks
        for t, c, n, s in self.cfgs:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(
                    block(device_target,
                          input_channel,
                          output_channel,
                          stride,
                          expand_ratio=t))
                input_channel = output_channel
        # building last several layers
        features.append(
            ConvBNReLU(device_target,
                       input_channel,
                       self.out_channels,
                       kernel_size=1))
        # make it nn.CellList
        self.features = nn.SequentialCell(features)
        # mobilenet head
        head = ([
            GlobalAvgPooling(),
            nn.Dense(self.out_channels, num_classes, has_bias=True)
        ] if not has_dropout else [
            GlobalAvgPooling(),
            nn.Dropout(0.2),
            nn.Dense(self.out_channels, num_classes, has_bias=True)
        ])
        self.head = nn.SequentialCell(head)

        self._initialize_weights()
Beispiel #29
0
 def __init__(self,
              hidden_size: int,
              hidden_dropout_prob: int = 0.1) -> None:
     super().__init__()
     self.layer_norm = nn.LayerNorm((hidden_size, ), epsilon=1e-12)
     self.dropout = nn.Dropout(1.0 - hidden_dropout_prob)
Beispiel #30
0
    def __init__(self,
                 config: TransformerConfig,
                 is_training: bool,
                 use_one_hot_embeddings: bool = False,
                 use_positional_embedding: bool = True):
        super(Transformer, self).__init__()

        self.use_positional_embedding = use_positional_embedding
        config = copy.deepcopy(config)
        self.is_training = is_training
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_dropout_prob = 0.0

        self.input_mask_from_dataset = config.input_mask_from_dataset
        self.batch_size = config.batch_size
        self.max_positions = config.seq_length
        self.attn_embed_dim = config.hidden_size
        self.num_layers = config.num_hidden_layers
        self.word_embed_dim = config.hidden_size

        self.last_idx = self.num_layers - 1

        self.embedding_lookup = EmbeddingLookup(
            vocab_size=config.vocab_size,
            embed_dim=self.word_embed_dim,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if self.use_positional_embedding:
            self.positional_embedding = PositionalEmbedding(
                embedding_size=self.word_embed_dim,
                max_position_embeddings=config.max_position_embeddings)

        self.encoder = TransformerEncoder(
            attn_embed_dim=self.attn_embed_dim,
            encoder_layers=self.num_layers,
            num_attn_heads=config.num_attention_heads,
            intermediate_size=config.intermediate_size,
            attention_dropout_prob=config.attention_dropout_prob,
            initializer_range=config.initializer_range,
            hidden_dropout_prob=config.hidden_dropout_prob,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type)

        self.decoder = TransformerDecoder(
            attn_embed_dim=self.attn_embed_dim,
            decoder_layers=self.num_layers,
            num_attn_heads=config.num_attention_heads,
            intermediate_size=config.intermediate_size,
            attn_dropout_prob=config.attention_dropout_prob,
            initializer_range=config.initializer_range,
            dropout_prob=config.hidden_dropout_prob,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type)

        self.cast = P.Cast()
        self.dtype = config.dtype
        self.cast_compute_type = SaturateCast(dst_type=config.compute_type)
        self.slice = P.StridedSlice()
        self.dropout = nn.Dropout(keep_prob=1 - config.hidden_dropout_prob)

        self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)

        self.scale = Tensor([math.sqrt(float(self.word_embed_dim))],
                            dtype=mstype.float32)
        self.multiply = P.Mul()