def __init__(self, config): super(CRNN, self).__init__() self.batch_size = config.batch_size self.input_size = config.input_size self.hidden_size = config.hidden_size self.num_classes = config.class_num self.reshape = P.Reshape() self.cast = P.Cast() k = (1 / self.hidden_size)**0.5 self.rnn1 = P.DynamicRNN(forget_bias=0.0) self.rnn1_bw = P.DynamicRNN(forget_bias=0.0) self.rnn2 = P.DynamicRNN(forget_bias=0.0) self.rnn2_bw = P.DynamicRNN(forget_bias=0.0) w1 = np.random.uniform( -k, k, (self.input_size + self.hidden_size, 4 * self.hidden_size)) self.w1 = Parameter(w1.astype(np.float32), name="w1") w2 = np.random.uniform( -k, k, (2 * self.hidden_size + self.hidden_size, 4 * self.hidden_size)) self.w2 = Parameter(w2.astype(np.float32), name="w2") w1_bw = np.random.uniform( -k, k, (self.input_size + self.hidden_size, 4 * self.hidden_size)) self.w1_bw = Parameter(w1_bw.astype(np.float32), name="w1_bw") w2_bw = np.random.uniform( -k, k, (2 * self.hidden_size + self.hidden_size, 4 * self.hidden_size)) self.w2_bw = Parameter(w2_bw.astype(np.float32), name="w2_bw") self.b1 = Parameter(np.random.uniform( -k, k, (4 * self.hidden_size)).astype(np.float32), name="b1") self.b2 = Parameter(np.random.uniform( -k, k, (4 * self.hidden_size)).astype(np.float32), name="b2") self.b1_bw = Parameter(np.random.uniform( -k, k, (4 * self.hidden_size)).astype(np.float32), name="b1_bw") self.b2_bw = Parameter(np.random.uniform( -k, k, (4 * self.hidden_size)).astype(np.float32), name="b2_bw") self.h1 = Tensor( np.zeros(shape=(1, self.batch_size, self.hidden_size)).astype(np.float32)) self.h2 = Tensor( np.zeros(shape=(1, self.batch_size, self.hidden_size)).astype(np.float32)) self.h1_bw = Tensor( np.zeros(shape=(1, self.batch_size, self.hidden_size)).astype(np.float32)) self.h2_bw = Tensor( np.zeros(shape=(1, self.batch_size, self.hidden_size)).astype(np.float32)) self.c1 = Tensor( np.zeros(shape=(1, self.batch_size, self.hidden_size)).astype(np.float32)) self.c2 = Tensor( np.zeros(shape=(1, self.batch_size, self.hidden_size)).astype(np.float32)) self.c1_bw = Tensor( np.zeros(shape=(1, self.batch_size, self.hidden_size)).astype(np.float32)) self.c2_bw = Tensor( np.zeros(shape=(1, self.batch_size, self.hidden_size)).astype(np.float32)) self.fc_weight = np.random.random( (self.num_classes, self.hidden_size)).astype(np.float32) self.fc_bias = np.random.random((self.num_classes)).astype(np.float32) self.fc = nn.Dense(in_channels=self.hidden_size, out_channels=self.num_classes, weight_init=Tensor(self.fc_weight), bias_init=Tensor(self.fc_bias)) self.fc.to_float(mstype.float32) self.expand_dims = P.ExpandDims() self.concat = P.Concat() self.transpose = P.Transpose() self.squeeze = P.Squeeze(axis=0) self.vgg = VGG() self.reverse_seq1 = P.ReverseSequence(batch_dim=1, seq_dim=0) self.reverse_seq2 = P.ReverseSequence(batch_dim=1, seq_dim=0) self.reverse_seq3 = P.ReverseSequence(batch_dim=1, seq_dim=0) self.reverse_seq4 = P.ReverseSequence(batch_dim=1, seq_dim=0) self.seq_length = Tensor( np.ones((self.batch_size), np.int32) * config.num_step, mstype.int32) self.concat1 = P.Concat(axis=2) self.dropout = nn.Dropout(0.5) self.rnn_dropout = nn.Dropout(0.9) self.use_dropout = config.use_dropout
def test_check_dropout_3(): Tensor(np.ones([20, 16, 50]).astype(np.int32)) with pytest.raises(ValueError): nn.Dropout(3, 0, 1)
def __init__(self, attention_probs_dropout_prob: float = 0.1) -> None: super().__init__() self.dropout = nn.Dropout(1.0 - attention_probs_dropout_prob)
def __init__(self): super().__init__() self.matmul1 = P.MatMul() self.dropout = nn.Dropout() self.matmul2 = P.MatMul()
def __init__(self, batch_size, from_tensor_width, to_tensor_width, from_seq_length, to_seq_length, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, has_attention_mask=False, attention_probs_dropout_prob=0.0, use_one_hot_embeddings=False, initializer_range=0.02, do_return_2d_tensor=False, use_relative_positions=False, compute_type=mstype.float32): super(BertAttention, self).__init__() self.batch_size = batch_size self.from_seq_length = from_seq_length self.to_seq_length = to_seq_length self.num_attention_heads = num_attention_heads self.size_per_head = size_per_head self.has_attention_mask = has_attention_mask self.use_relative_positions = use_relative_positions self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))], dtype=compute_type) self.reshape = P.Reshape() self.shape_from_2d = (-1, from_tensor_width) self.shape_to_2d = (-1, to_tensor_width) weight = TruncatedNormal(initializer_range) units = num_attention_heads * size_per_head self.query_layer = nn.Dense(from_tensor_width, units, activation=query_act, weight_init=weight).to_float(compute_type) self.key_layer = nn.Dense(to_tensor_width, units, activation=key_act, weight_init=weight).to_float(compute_type) self.value_layer = nn.Dense(to_tensor_width, units, activation=value_act, weight_init=weight).to_float(compute_type) self.shape_from = (batch_size, from_seq_length, num_attention_heads, size_per_head) self.shape_to = (batch_size, to_seq_length, num_attention_heads, size_per_head) self.matmul_trans_b = P.BatchMatMul(transpose_b=True) self.multiply = P.Mul() self.transpose = P.Transpose() self.trans_shape = (0, 2, 1, 3) self.trans_shape_relative = (2, 0, 1, 3) self.trans_shape_position = (1, 2, 0, 3) self.multiply_data = Tensor([ -10000.0, ], dtype=compute_type) self.batch_num = batch_size * num_attention_heads self.matmul = P.BatchMatMul() self.softmax = nn.Softmax() self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) if self.has_attention_mask: self.expand_dims = P.ExpandDims() self.sub = P.Sub() self.add = P.TensorAdd() self.cast = P.Cast() self.get_dtype = P.DType() if do_return_2d_tensor: self.shape_return = (batch_size * from_seq_length, num_attention_heads * size_per_head) else: self.shape_return = (batch_size, from_seq_length, num_attention_heads * size_per_head) self.cast_compute_type = SaturateCast(dst_type=compute_type) if self.use_relative_positions: self._generate_relative_positions_embeddings = \ RelaPosEmbeddingsGenerator(length=to_seq_length, depth=size_per_head, max_relative_position=16, initializer_range=initializer_range, use_one_hot_embeddings=use_one_hot_embeddings)
def __init__(self, num_classes=10, dropout_keep_prob=0.8): super(Logits, self).__init__() self.avg_pool = nn.AvgPool2d(8, pad_mode='valid') self.dropout = nn.Dropout(keep_prob=dropout_keep_prob) self.flatten = P.Flatten() self.fc = nn.Dense(2048, num_classes)
def __init__(self, input_size, hidden_size, num_layers=1, has_bias=True, batch_first=False, dropout=0, bidirectional=False): super(LSTM, self).__init__() validator.check_value_type("batch_first", batch_first, [bool], self.cls_name) validator.check_positive_int(hidden_size, "hidden_size", self.cls_name) validator.check_positive_int(num_layers, "num_layers", self.cls_name) self.is_ascend = context.get_context("device_target") == "Ascend" self.batch_first = batch_first self.transpose = P.Transpose() self.num_layers = num_layers self.bidirectional = bidirectional self.dropout = dropout self.lstm = P.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, bidirectional=bidirectional, dropout=float(dropout)) weight_size = 0 gate_size = 4 * hidden_size stdv = 1 / math.sqrt(hidden_size) num_directions = 2 if bidirectional else 1 if self.is_ascend: self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0) self.concat = P.Concat(axis=0) self.concat_2dim = P.Concat(axis=2) self.cast = P.Cast() self.shape = P.Shape() if dropout != 0: self.dropout_op = nn.Dropout(float(dropout)) b0 = np.zeros(gate_size, dtype=np.float16) self.w_list = [] self.b_list = [] self.rnns_fw = P.DynamicRNN(forget_bias=0.0) self.rnns_bw = P.DynamicRNN(forget_bias=0.0) for layer in range(num_layers): w_shape = input_size if layer == 0 else (num_directions * hidden_size) w_np = np.random.uniform( -stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) self.w_list.append( Parameter(initializer(Tensor(w_np), [w_shape + hidden_size, gate_size]), name='weight_fw' + str(layer))) if has_bias: b_np = np.random.uniform(-stdv, stdv, gate_size).astype(np.float16) self.b_list.append( Parameter(initializer(Tensor(b_np), [gate_size]), name='bias_fw' + str(layer))) else: self.b_list.append( Parameter(initializer(Tensor(b0), [gate_size]), name='bias_fw' + str(layer))) if bidirectional: w_bw_np = np.random.uniform( -stdv, stdv, (w_shape + hidden_size, gate_size)).astype(np.float16) self.w_list.append( Parameter( initializer(Tensor(w_bw_np), [w_shape + hidden_size, gate_size]), name='weight_bw' + str(layer))) b_bw_np = np.random.uniform( -stdv, stdv, (4 * hidden_size)).astype(np.float16) if has_bias else b0 self.b_list.append( Parameter(initializer(Tensor(b_bw_np), [gate_size]), name='bias_bw' + str(layer))) self.w_list = ParameterTuple(self.w_list) self.b_list = ParameterTuple(self.b_list) else: for layer in range(num_layers): input_layer_size = input_size if layer == 0 else hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * hidden_size if has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight')
def __init__(self, num_classes=1000): """ Constructor Args: num_classes: number of classes. """ super(Xception, self).__init__() self.num_classes = num_classes self.conv1 = nn.Conv2d(3, 32, 3, 2, pad_mode='valid', weight_init='xavier_uniform') self.bn1 = nn.BatchNorm2d(32, momentum=0.9) self.relu = nn.ReLU() self.conv2 = nn.Conv2d(32, 64, 3, pad_mode='valid', weight_init='xavier_uniform') self.bn2 = nn.BatchNorm2d(64, momentum=0.9) # Entry flow self.block1 = Block(64, 128, 2, 2, start_with_relu=False, grow_first=True) self.block2 = Block(128, 256, 2, 2, start_with_relu=True, grow_first=True) self.block3 = Block(256, 728, 2, 2, start_with_relu=True, grow_first=True) # Middle flow self.block4 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True) self.block5 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True) self.block6 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True) self.block7 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True) self.block8 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True) self.block9 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True) self.block10 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True) self.block11 = Block(728, 728, 3, 1, start_with_relu=True, grow_first=True) # Exit flow self.block12 = Block(728, 1024, 2, 2, start_with_relu=True, grow_first=False) self.conv3 = SeparableConv2d(1024, 1536, 3, 1, 1) self.bn3 = nn.BatchNorm2d(1536, momentum=0.9) self.conv4 = SeparableConv2d(1536, 2048, 3, 1, 1) self.bn4 = nn.BatchNorm2d(2048, momentum=0.9) self.avg_pool = nn.AvgPool2d(10) self.dropout = nn.Dropout() self.fc = nn.Dense(2048, num_classes)
def __init__(self, dropout_prob=0.1): super(ResidualConnection, self).__init__() self.add = P.TensorAdd() self.dropout = nn.Dropout(1 - dropout_prob) self.use_dropout = dropout_prob > 0
# limitations under the License. # ============================================================================ """inceptionv4_train_export""" import sys import numpy as np from train_utils import SaveInOut, TrainWrap from official.cv.xception.src.Xception import Xception import mindspore.common.dtype as mstype from mindspore import context, Tensor, nn from mindspore.train.serialization import export context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", save_graphs=False) n = Xception(num_classes=1000) n.dropout = nn.Dropout(keep_prob=1.0) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=False) optimizer = nn.SGD(n.trainable_params(), learning_rate=0.01, momentum=0.9, dampening=0.0, weight_decay=0.0, nesterov=True, loss_scale=1.0) net = TrainWrap(n, loss_fn, optimizer) batch = 2 x = Tensor(np.random.randn(batch, 3, 299, 299), mstype.float32) label = Tensor(np.zeros([batch, 1000]).astype(np.float32)) export(net, x, label, file_name="mindir/xception_train", file_format='MINDIR') if len(sys.argv) > 1: SaveInOut(sys.argv[1] + "xception", x, label, n, net)
def __init__(self, config): super(SSD300VGG16, self).__init__() # VGG16 backbone: block1~5 self.backbone = vgg16() # SSD blocks: block6~7 self.b6_1 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, padding=6, dilation=6, pad_mode='pad') self.b6_2 = nn.Dropout(0.5) self.b7_1 = nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=1) self.b7_2 = nn.Dropout(0.5) # Extra Feature Layers: block8~11 self.b8_1 = nn.Conv2d(in_channels=1024, out_channels=256, kernel_size=1, padding=1, pad_mode='pad') self.b8_2 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, pad_mode='valid') self.b9_1 = nn.Conv2d(in_channels=512, out_channels=128, kernel_size=1, padding=1, pad_mode='pad') self.b9_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, pad_mode='valid') self.b10_1 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1) self.b10_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, pad_mode='valid') self.b11_1 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=1) self.b11_2 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, pad_mode='valid') # boxes self.multi_box = MultiBox(config) if not self.training: self.activation = P.Sigmoid()
def __init__(self, outer_nc, inner_nc, in_planes=None, dropout=False, submodule=None, outermost=False, innermost=False, alpha=0.2, norm_mode='batch'): super(UnetSkipConnectionBlock, self).__init__() downnorm = nn.BatchNorm2d(inner_nc) upnorm = nn.BatchNorm2d(outer_nc) use_bias = False if norm_mode == 'instance': downnorm = nn.BatchNorm2d(inner_nc, affine=False) upnorm = nn.BatchNorm2d(outer_nc, affine=False) use_bias = True if in_planes is None: in_planes = outer_nc downconv = nn.Conv2d(in_planes, inner_nc, kernel_size=4, stride=2, padding=1, has_bias=use_bias, pad_mode='pad') downrelu = nn.LeakyReLU(alpha) uprelu = nn.ReLU() if outermost: upconv = nn.Conv2dTranspose(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, pad_mode='pad') down = [downconv] up = [uprelu, upconv, nn.Tanh()] model = down + [submodule] + up elif innermost: upconv = nn.Conv2dTranspose(inner_nc, outer_nc, kernel_size=4, stride=2, padding=1, has_bias=use_bias, pad_mode='pad') down = [downrelu, downconv] up = [uprelu, upconv, upnorm] model = down + up else: upconv = nn.Conv2dTranspose(inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1, has_bias=use_bias, pad_mode='pad') down = [downrelu, downconv, downnorm] up = [uprelu, upconv, upnorm] model = down + [submodule] + up if dropout: model.append(nn.Dropout(0.5)) self.model = nn.SequentialCell(model) self.skip_connections = not outermost self.concat = ops.Concat(axis=1)
def __init__(self, num_classes, is_training=True, stem_filters=32, penultimate_filters=1056, filters_multiplier=2): super(NASNetAMobile, self).__init__() self.is_training = is_training self.stem_filters = stem_filters self.penultimate_filters = penultimate_filters self.filters_multiplier = filters_multiplier filters = self.penultimate_filters // 24 # 24 is default value for the architecture self.conv0 = nn.SequentialCell([ nn.Conv2d(in_channels=3, out_channels=self.stem_filters, kernel_size=3, stride=2, pad_mode='pad', padding=0, has_bias=False), nn.BatchNorm2d(num_features=self.stem_filters, eps=0.001, momentum=0.9, affine=True) ]) self.cell_stem_0 = CellStem0(self.stem_filters, num_filters=filters // (filters_multiplier**2)) self.cell_stem_1 = CellStem1(self.stem_filters, num_filters=filters // filters_multiplier) self.cell_0 = FirstCell( in_channels_left=filters, out_channels_left=filters // 2, # 1, 0.5 in_channels_right=2 * filters, out_channels_right=filters) # 2, 1 self.cell_1 = NormalCell( in_channels_left=2 * filters, out_channels_left=filters, # 2, 1 in_channels_right=6 * filters, out_channels_right=filters) # 6, 1 self.cell_2 = NormalCell( in_channels_left=6 * filters, out_channels_left=filters, # 6, 1 in_channels_right=6 * filters, out_channels_right=filters) # 6, 1 self.cell_3 = NormalCell( in_channels_left=6 * filters, out_channels_left=filters, # 6, 1 in_channels_right=6 * filters, out_channels_right=filters) # 6, 1 self.reduction_cell_0 = ReductionCell0( in_channels_left=6 * filters, out_channels_left=2 * filters, # 6, 2 in_channels_right=6 * filters, out_channels_right=2 * filters) # 6, 2 self.cell_6 = FirstCell( in_channels_left=6 * filters, out_channels_left=filters, # 6, 1 in_channels_right=8 * filters, out_channels_right=2 * filters) # 8, 2 self.cell_7 = NormalCell( in_channels_left=8 * filters, out_channels_left=2 * filters, # 8, 2 in_channels_right=12 * filters, out_channels_right=2 * filters) # 12, 2 self.cell_8 = NormalCell( in_channels_left=12 * filters, out_channels_left=2 * filters, # 12, 2 in_channels_right=12 * filters, out_channels_right=2 * filters) # 12, 2 self.cell_9 = NormalCell( in_channels_left=12 * filters, out_channels_left=2 * filters, # 12, 2 in_channels_right=12 * filters, out_channels_right=2 * filters) # 12, 2 if is_training: self.aux_logits = AuxLogits(in_channels=12 * filters, out_channels=num_classes) self.reduction_cell_1 = ReductionCell1( in_channels_left=12 * filters, out_channels_left=4 * filters, # 12, 4 in_channels_right=12 * filters, out_channels_right=4 * filters) # 12, 4 self.cell_12 = FirstCell( in_channels_left=12 * filters, out_channels_left=2 * filters, # 12, 2 in_channels_right=16 * filters, out_channels_right=4 * filters) # 16, 4 self.cell_13 = NormalCell( in_channels_left=16 * filters, out_channels_left=4 * filters, # 16, 4 in_channels_right=24 * filters, out_channels_right=4 * filters) # 24, 4 self.cell_14 = NormalCell( in_channels_left=24 * filters, out_channels_left=4 * filters, # 24, 4 in_channels_right=24 * filters, out_channels_right=4 * filters) # 24, 4 self.cell_15 = NormalCell( in_channels_left=24 * filters, out_channels_left=4 * filters, # 24, 4 in_channels_right=24 * filters, out_channels_right=4 * filters) # 24, 4 self.relu = nn.ReLU() self.dropout = nn.Dropout(keep_prob=0.5) self.classifier = nn.Dense(in_channels=24 * filters, out_channels=num_classes) self.shape = P.Shape() self.reshape = P.Reshape() self._initialize_weights()
def __init__(self, model_cfgs, num_classes=1000, multiplier=1., final_drop=0., round_nearest=8): super(MobileNetV3, self).__init__() self.cfgs = model_cfgs['cfg'] self.inplanes = 16 self.features = [] first_conv_in_channel = 3 first_conv_out_channel = _make_divisible(multiplier * self.inplanes) self.features.append( nn.Conv2d(in_channels=first_conv_in_channel, out_channels=first_conv_out_channel, kernel_size=3, padding=1, stride=2, has_bias=False, pad_mode='pad')) self.features.append(nn.BatchNorm2d(first_conv_out_channel)) self.features.append(Activation('hswish')) for layer_cfg in self.cfgs: self.features.append( self._make_layer( kernel_size=layer_cfg[0], exp_ch=_make_divisible(multiplier * layer_cfg[1]), out_channel=_make_divisible(multiplier * layer_cfg[2]), use_se=layer_cfg[3], act_func=layer_cfg[4], stride=layer_cfg[5])) output_channel = _make_divisible(multiplier * model_cfgs["cls_ch_squeeze"]) self.features.append( nn.Conv2d(in_channels=_make_divisible(multiplier * self.cfgs[-1][2]), out_channels=output_channel, kernel_size=1, padding=0, stride=1, has_bias=False, pad_mode='pad')) self.features.append(nn.BatchNorm2d(output_channel)) self.features.append(Activation('hswish')) self.features.append(GlobalAvgPooling(keep_dims=True)) self.features.append( nn.Conv2d(in_channels=output_channel, out_channels=model_cfgs['cls_ch_expand'], kernel_size=1, padding=0, stride=1, has_bias=False, pad_mode='pad')) self.features.append(Activation('hswish')) if final_drop > 0: self.features.append((nn.Dropout(final_drop))) # make it nn.CellList self.features = nn.SequentialCell(self.features) self.output = nn.Conv2d(in_channels=model_cfgs['cls_ch_expand'], out_channels=num_classes, kernel_size=1, has_bias=True, pad_mode='pad') self.squeeze = P.Squeeze(axis=(2, 3)) self._initialize_weights()
def __init__(self, src_dim, tgt_dim, attn_embed_dim, num_attn_heads=1, query_act=None, key_act=None, value_act=None, out_act=None, has_attention_mask=True, attention_dropout_prob=0.0, initializer_range=0.02, do_return_2d_tensor=True, compute_type=mstype.float32): super(MultiHeadAttention, self).__init__() if attn_embed_dim % num_attn_heads != 0: raise ValueError( f"The hidden size {attn_embed_dim} is not a multiple of the " f"number of attention heads {num_attn_heads}") self.attn_embed_dim = attn_embed_dim self.num_attn_heads = num_attn_heads self.size_per_head = attn_embed_dim // num_attn_heads self.src_dim = src_dim self.tgt_dim = tgt_dim self.has_attention_mask = has_attention_mask if attn_embed_dim != self.num_attn_heads * self.size_per_head: raise ValueError( "`attn_embed_dim` must be divided by num_attn_heads.") self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))], dtype=compute_type) self.reshape = P.Reshape() self.query_layer = nn.Dense( src_dim, attn_embed_dim, activation=query_act, has_bias=True, weight_init=TruncatedNormal(initializer_range)).to_float( compute_type) self.key_layer = nn.Dense( tgt_dim, attn_embed_dim, activation=key_act, has_bias=True, weight_init=TruncatedNormal(initializer_range)).to_float( compute_type) self.value_layer = nn.Dense( tgt_dim, attn_embed_dim, activation=value_act, has_bias=True, weight_init=TruncatedNormal(initializer_range)).to_float( compute_type) self.out_layer = nn.Dense( attn_embed_dim, attn_embed_dim, activation=out_act, has_bias=True, weight_init=TruncatedNormal(initializer_range)).to_float( compute_type) self.matmul_trans_b = P.BatchMatMul(transpose_b=True) self.multiply = P.Mul() self.transpose = P.Transpose() self.multiply_data = Tensor([-10000.0], dtype=compute_type) self.matmul = P.BatchMatMul() self.softmax = nn.Softmax() self.dropout = nn.Dropout(1.0 - attention_dropout_prob) if self.has_attention_mask: self.expand_dims = P.ExpandDims() self.sub = P.Sub() self.add = P.TensorAdd() self.cast = P.Cast() self.get_dtype = P.DType() self.do_return_2d_tensor = do_return_2d_tensor self.cast_compute_type = SaturateCast(dst_type=compute_type) self.softmax_cast = P.Cast() self.get_shape = P.Shape() self.transpose_orders = (0, 2, 1, 3)
def __init__(self, num_classes=1000, create_aux_logits=False): super(Inceptionv3, self).__init__() self.create_aux_logits = create_aux_logits # N x 3 x 299 x 299 self.Conv2d_1a_3x3 = Conv2dBlock(in_channels=3, out_channels=32, kernel_size=3, stride=2, pad_mode="valid") # N x 32 x 149 x 149 self.Conv2d_2a_3x3 = Conv2dBlock(in_channels=32, out_channels=32, kernel_size=3, pad_mode="valid") # N x 32 x 147 x 147 self.Conv2d_2b_3x3 = Conv2dBlock(in_channels=32, out_channels=64, kernel_size=3, pad_mode="same") # N x 64 x 147 x 147 self.MaxPool_3a_3x3 = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid") # N x 64 x 73 x 73 self.Conv2d_3b_1x1 = Conv2dBlock(in_channels=64, out_channels=80, kernel_size=1) # N x 80 x 73 x 73 self.Conv2d_4a_3x3 = Conv2dBlock(in_channels=80, out_channels=192, kernel_size=3, pad_mode="valid") # N x 192 x 71 x 71 self.MaxPool_5a_3x3 = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid") # N x 192 x 35 x 35 self.Mixed_5b = InceptionBlockA(in_channels=192, var_channels=32) # N x 256 x 35 x 35 self.Mixed_5c = InceptionBlockA(in_channels=256, var_channels=64) # N x 288 x 35 x 35 self.Mixed_5d = InceptionBlockA(in_channels=288, var_channels=64) # N x 288 x 35 x 35 self.Mixed_6a = InceptionBlockB_1(in_channels=288) # N x 768 x 17 x 17 self.Mixed_6b = InceptionBlockB_2(in_channels=768, var_channels=128) # N x 768 x 17 x 17 self.Mixed_6c = InceptionBlockB_2(in_channels=768, var_channels=160) # N x 768 x 17 x 17 self.Mixed_6d = InceptionBlockB_2(in_channels=768, var_channels=160) # N x 768 x 17 x 17 self.Mixed_6e = InceptionBlockB_2(in_channels=768, var_channels=192) # N x 768 x 17 x 17 if create_aux_logits: self.AuxLogits = InceptionBlockAux(in_channels=768, num_classes=num_classes) # N x 768 x 17 x 17 self.Mixed_7a = InceptionBlockC_1(in_channels=768) # N x 1280 x 8 x 8 self.Mixed_7b = InceptionBlockC_2(in_channels=1280) # N x 2048 x 8 x 8 self.Mixed_7c = InceptionBlockC_2(in_channels=2048) # N x 2048 x 8 x 8 self.mean = P.ReduceMean(keep_dims=True) # N x 2048 x 1 x 1 self.Dropout_last = nn.Dropout(keep_prob=0.8) # N x 2048 x 1 x 1 self.Conv2d_last = Conv2dBlock(in_channels=2048, out_channels=num_classes, kernel_size=1, with_relu=False, with_bn=False) # N x num_classes x 1 x 1 self.fc = nn.Dense(in_channels=2048, out_channels=num_classes) self.flatten = nn.Flatten()
def __init__(self, num_classes, input_nc=1, padding=1, pad_mode='pad', has_bias=False, use_dropout=False): super(DFCNN, self).__init__() if pad_mode == 'pad': assert padding >= 0, "when the pad_mode is 'pad', the padding must be greater than or equal to 0!" if pad_mode == 'same' or pad_mode == 'valid': assert padding == 0, "when the pad_mode is 'same' or 'valid', the padding must be equal to 0!" self.use_dropout = use_dropout # structure # seq 1 self.conv11 = nn.Conv2d(in_channels=input_nc, out_channels=64, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn11 = nn.BatchNorm2d(64) self.relu11 = nn.ReLU() self.conv12 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn12 = nn.BatchNorm2d(64) self.relu12 = nn.ReLU() self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='valid') # seq 2 self.conv21 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn21 = nn.BatchNorm2d(128) self.relu21 = nn.ReLU() self.conv22 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn22 = nn.BatchNorm2d(128) self.relu22 = nn.ReLU() self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='valid') # seq 3 self.conv31 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn31 = nn.BatchNorm2d(256) self.relu31 = nn.ReLU() self.conv32 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn32 = nn.BatchNorm2d(256) self.relu32 = nn.ReLU() self.conv33 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn33 = nn.BatchNorm2d(256) self.relu33 = nn.ReLU() self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='valid') # seq 4 self.conv41 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn41 = nn.BatchNorm2d(512) self.relu41 = nn.ReLU() self.conv42 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn42 = nn.BatchNorm2d(512) self.relu42 = nn.ReLU() self.conv43 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn43 = nn.BatchNorm2d(512) self.relu43 = nn.ReLU() self.maxpool4 = nn.MaxPool2d(kernel_size=1, stride=1, pad_mode='valid') # seq 5 self.conv51 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn51 = nn.BatchNorm2d(512) self.relu51 = nn.ReLU() self.conv52 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn52 = nn.BatchNorm2d(512) self.relu52 = nn.ReLU() self.conv53 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=padding, has_bias=has_bias, pad_mode=pad_mode) self.bn53 = nn.BatchNorm2d(512) self.relu53 = nn.ReLU() self.maxpool5 = nn.MaxPool2d(kernel_size=1, stride=1, pad_mode='valid') self.bn = nn.BatchNorm2d(512) if self.use_dropout: self.drop1 = nn.Dropout(0.8) self.drop2 = nn.Dropout(0.8) self.drop3 = nn.Dropout(0.8) self.drop4 = nn.Dropout(0.8) self.drop5 = nn.Dropout(0.8) self.drop_fc1 = nn.Dropout(0.5) self.drop_fc2 = nn.Dropout(0.5) self.fc1 = nn.Dense(25 * 512, 4096, activation='relu') self.fc2 = nn.Dense(4096, 4096, activation='relu') self.fc3 = nn.Dense(4096, num_classes, activation='relu') # operation self.transpose = ops.Transpose() self.reshape = ops.Reshape()
def __init__(self, weight, vocab_size, cell, batch_size): super(textrcnn, self).__init__() self.num_hiddens = 512 self.embed_size = 300 self.num_classes = 2 self.batch_size = batch_size k = (1 / self.num_hiddens)**0.5 self.embedding = nn.Embedding(vocab_size, self.embed_size, embedding_table=weight) self.embedding.embedding_table.requires_grad = False self.cell = cell self.cast = P.Cast() self.h1 = Tensor( np.zeros(shape=(self.batch_size, self.num_hiddens)).astype(np.float16)) self.c1 = Tensor( np.zeros(shape=(self.batch_size, self.num_hiddens)).astype(np.float16)) if cell == "lstm": self.lstm = P.DynamicRNN(forget_bias=0.0) self.w1_fw = Parameter(np.random.uniform( -k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(np.float16), name="w1_fw") self.b1_fw = Parameter(np.random.uniform( -k, k, (4 * self.num_hiddens)).astype(np.float16), name="b1_fw") self.w1_bw = Parameter(np.random.uniform( -k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(np.float16), name="w1_bw") self.b1_bw = Parameter(np.random.uniform( -k, k, (4 * self.num_hiddens)).astype(np.float16), name="b1_bw") self.h1 = Tensor( np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float16)) self.c1 = Tensor( np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float16)) if cell == "vanilla": self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens) self.rnnU_fw = nn.Dense(self.embed_size, self.num_hiddens) self.rnnW_bw = nn.Dense(self.num_hiddens, self.num_hiddens) self.rnnU_bw = nn.Dense(self.embed_size, self.num_hiddens) if cell == "gru": self.rnnWr_fw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWz_fw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWh_fw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWr_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.ones = Tensor( np.ones(shape=(self.batch_size, self.num_hiddens)).astype(np.float16)) self.rnnWr_fw.to_float(mstype.float16) self.rnnWz_fw.to_float(mstype.float16) self.rnnWh_fw.to_float(mstype.float16) self.rnnWr_bw.to_float(mstype.float16) self.rnnWz_bw.to_float(mstype.float16) self.rnnWh_bw.to_float(mstype.float16) self.transpose = P.Transpose() self.reduce_max = P.ReduceMax() self.expand_dims = P.ExpandDims() self.concat = P.Concat() self.reshape = P.Reshape() self.left_pad_tensor = Tensor( np.zeros( (1, self.batch_size, self.num_hiddens)).astype(np.float16)) self.right_pad_tensor = Tensor( np.zeros( (1, self.batch_size, self.num_hiddens)).astype(np.float16)) self.output_dense = nn.Dense(self.num_hiddens * 1, 2) self.concat0 = P.Concat(0) self.concat2 = P.Concat(2) self.concat1 = P.Concat(1) self.text_rep_dense = nn.Dense(2 * self.num_hiddens + self.embed_size, self.num_hiddens) self.mydense = nn.Dense(self.num_hiddens, 2) self.drop_out = nn.Dropout(keep_prob=0.7) self.tanh = P.Tanh() self.sigmoid = P.Sigmoid() self.slice = P.Slice() self.text_rep_dense.to_float(mstype.float16) self.mydense.to_float(mstype.float16) self.output_dense.to_float(mstype.float16)
'desc_inputs': [[1, 512]], 'desc_bprop': [[1, 512]]}), ('LogicalNot', { 'block': P.LogicalNot(), 'desc_inputs': [convert([256], np.bool_)], 'desc_bprop': [[256]]}), # 自定义算子 input bool没转换,gongchen提单。 ('Equal', { 'block': P.Equal(), 'desc_inputs': [convert([256], np.float16), convert([256], np.float16)], 'desc_bprop': [[256]]}), ('Greater', { 'block': P.Greater(), 'desc_inputs': [convert([256], np.float16), convert([256], np.float16)], 'desc_bprop': [[256]]}), ('Dropout', { 'block': nn.Dropout(), 'desc_inputs': [[1, 512, 7, 7]], 'desc_bprop': [[1, 512, 7, 7]]}), # 输入有标量插件产生了段错误。 ('MatMul', { 'block': P.MatMul(), 'desc_inputs': [[64, 512], [512, 64]], # fp16不行。很有问题。 'desc_bprop': [[64, 64]]}), ('Maximum', { 'block': P.Maximum(), 'desc_inputs': [[64, 1], [64, 1]], 'desc_bprop': [[64, 1]]}), ] test_case_lists = [test_case_reid_ops] test_case = functools.reduce(lambda x, y: x + y, test_case_lists) # use -k to select certain testcast
def test_check_dropout_1(): x = Tensor(np.ones([20, 16, 50]), mstype.float32) m = nn.Dropout(0.8) m(x)
def __init__(self, config): super(PANGUALPHA_Model, self).__init__() self.get_attention_mask = AttentionMask(config) self.word_embedding = EmbeddingLookup(config).set_comm_fusion(1) self.eod_reset = config.eod_reset if config.load_ckpt_path: # Loading the embedding table from the ckpt path: embedding_path = os.path.join(config.load_ckpt_path, 'position_embedding.npy') if os.path.exists(embedding_path): p_table = np.load(embedding_path) position_table_param = Tensor(p_table, mstype.float32) else: raise ValueError(f"{embedding_path} file not exits, please check whether position_embedding file exit.") else: position_table_param = TruncatedNormal(0.02) self.position_embedding = nn.Embedding( config.seq_length, config.embedding_size, embedding_table=position_table_param).set_comm_fusion(1) self.word_embedding.embedding_table.parallel_optimizer = False self.position_embedding.embedding_table.parallel_optimizer = False self.position_embedding.gather.shard(((1, 1), (config.dp,))) self.position_embedding.expand.shard(((config.dp, 1),)) self.blocks = nn.CellList() fusion_group_num = 4 fusion_group_size = config.num_layers // fusion_group_num fusion_group_size = max(fusion_group_size, 1) num_layers = config.num_layers - 1 self.num_layers = num_layers for i in range(num_layers): per_block = Block(config, i + 1).set_comm_fusion(int(i / fusion_group_size) + 2) per_block.recompute() per_block.attention.dropout.dropout_gen_mask.recompute(False) per_block.attention.prob_dropout.dropout_gen_mask.recompute(False) per_block.output.dropout.dropout_gen_mask.recompute(False) per_block.attention.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) per_block.attention.prob_dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) per_block.output.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) self.blocks.append(per_block) if config.self_layernorm: self.layernorm = LayerNorm((config.embedding_size,), config.dp).to_float( mstype.float32).set_comm_fusion( int((num_layers - 1) / fusion_group_size) + 2) else: self.layernorm = nn.LayerNorm((config.embedding_size,)).to_float( mstype.float32).set_comm_fusion( int((num_layers - 1) / fusion_group_size) + 2) self.layernorm.layer_norm.shard(((config.dp, 1, 1), (1,), (1,))) self.layernorm.gamma.parallel_optimizer = False self.layernorm.beta.parallel_optimizer = False self.use_past = config.use_past self.past = tuple([None] * config.num_layers) self.add = P.TensorAdd().shard(((config.dp, 1, 1), (config.dp, 1, 1))) self.expand_dims = P.ExpandDims().shard(((config.dp, 1, 1),)) self.dtype = config.compute_dtype self.dropout = nn.Dropout(1 - config.dropout_rate) self.dropout.dropout_gen_mask.shard(((config.dp, 1, 1),)) self.dropout.dropout_do_mask.shard(((config.dp, 1, 1),)) if config.load_ckpt_path: # Loading the embedding table from the ckpt path: embedding_path = os.path.join(config.load_ckpt_path, 'top_query_embedding.npy') if os.path.exists(embedding_path): top_query_table = np.load(embedding_path) top_query_table_param = Tensor(top_query_table, mstype.float32) else: raise ValueError(f"{embedding_path} file not exits, please check whether top_query_embedding file exist.") else: top_query_table_param = TruncatedNormal(0.02) self.top_query_embedding = nn.Embedding(config.seq_length, config.embedding_size, \ embedding_table=top_query_table_param).set_comm_fusion( int((config.num_layers - 1) / fusion_group_num) + 2) self.top_query_embedding.embedding_table.parallel_optimizer = False self.top_query_embedding.gather.shard(((1, 1), (config.dp,))) self.top_query_embedding.expand.shard(((config.dp, 1),)) self.top_query_layer = QueryLayer(config) self.top_query_layer.recompute() self.top_query_layer.output.dropout.dropout_gen_mask.recompute(False) self.top_query_layer.attention.dropout.dropout_gen_mask.recompute(False) self.top_query_layer.attention.prob_dropout.dropout_gen_mask.recompute(False) self.top_query_layer.output.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) self.top_query_layer.attention.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) self.top_query_layer.attention.prob_dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) self.top_query_layer.set_comm_fusion(int((config.num_layers - 1) / fusion_group_num) + 2)
def test_check_dropout_3(): x = Tensor(np.ones([20, 16, 50]), mstype.float32) m = nn.Dropout(0.3, seed0=1, seed1=1) m(x)
def __init__(self): super(PReLUGradNet, self).__init__() self.prelu_grad = G.PReLUGrad() def construct(self, dout, x, w): return self.prelu_grad(dout, x, w) test_cases = [ ('SoftMaxGrad', { 'block': SoftMaxGrad(VirtualNetWithLoss(P.Softmax())), 'desc_inputs': [[128, 32, 32, 64]], 'desc_bprop': [[128, 32, 32, 64]], }), ('DropoutGrad', { 'block': DropoutGrad(VirtualNetWithLoss(nn.Dropout())), 'desc_inputs': [[128, 32, 32, 64]], 'desc_bprop': [[128, 32, 32, 64]], }), ('ScalarSummary', { 'block': ScalarSummaryNet(), 'desc_inputs': [Tensor(2.2)], }), ('L2Normalize', { 'block': L2NormalizeNet(), 'desc_inputs': [ Tensor(np.array([[1.0, 2, 3], [4.0, 5, 6], [7.0, 8, 9]]), mindspore.float32) ], }),
def __init__(self, num_classes, feature_shape, backbone, channel, depth, scale_sizes, atrous_rates, decoder_output_stride, output_stride, fine_tune_batch_norm=False): super(SingleDeepLabV3, self).__init__() self.num_classes = num_classes self.channel = channel self.depth = depth self.scale_sizes = [] for scale_size in np.sort(scale_sizes): self.scale_sizes.append(scale_size) self.net = backbone self.aspp = ASPP(channel=self.channel, depth=self.depth, feature_shape=[feature_shape[2], feature_shape[3]], scale_sizes=self.scale_sizes, atrous_rates=atrous_rates, output_stride=output_stride, fine_tune_batch_norm=fine_tune_batch_norm) atrous_rates_len = 0 if atrous_rates is not None: atrous_rates_len = len(atrous_rates) self.fc1 = _conv_bn_relu(depth * (2 + atrous_rates_len), depth, ksize=1, stride=1, use_batch_statistics=fine_tune_batch_norm) self.fc2 = nn.Conv2d(depth, num_classes, kernel_size=1, stride=1, has_bias=True) self.upsample = P.ResizeBilinear( (int(feature_shape[2]), int(feature_shape[3])), align_corners=True) self.samples = [] for scale_size in self.scale_sizes: self.samples.append(SampleBlock(feature_shape, scale_size)) self.samples = nn.CellList(self.samples) self.feature_shape = [ float(feature_shape[0]), float(feature_shape[1]), float(feature_shape[2]), float(feature_shape[3]) ] self.pad = P.Pad(((0, 0), (0, 0), (1, 1), (1, 1))) self.dropout = nn.Dropout(keep_prob=0.9) self.shape = P.Shape() self.decoder_output_stride = decoder_output_stride if decoder_output_stride is not None: self.decoder = Decoder( low_level_channel=depth, channel=depth, depth=depth, feature_shape=[feature_shape[2], feature_shape[3]], scale_sizes=self.scale_sizes, decoder_output_stride=decoder_output_stride, fine_tune_batch_norm=fine_tune_batch_norm)
'desc_inputs': [[3, 2, 1, 3], Tensor(np.array([[0, 1], [0, 1], [0, 1]]).astype(np.int32))], 'desc_bprop': [[4, 1, 3]], 'skip': ['backward']}), ('DropoutGenMask', { 'block': P.DropoutGenMask(), 'desc_const': [(2, 2), Tensor(0.5, mstype.float32)], 'desc_inputs': [], 'desc_bprop': [Tensor(np.ones(1).astype(np.int8))], 'skip': ['backward']}), ('DropoutDoMask', { 'block': P.DropoutDoMask(), 'desc_const': [Tensor(0.5)], 'desc_inputs': [[64, 12, 128, 128], Tensor(np.ones(1572864).astype(np.uint8))], 'desc_bprop': [[64, 12, 128, 128]]}), ('Dropout', { 'block': nn.Dropout(0.5), 'desc_inputs': [[64, 12, 128, 128]], 'desc_bprop': [[64, 12, 128, 128]]}), ('ReduceMean0', { 'block': P.ReduceMean(), 'desc_const': [(2,)], 'desc_inputs': [[3, 2, 2]], 'desc_bprop': [[3, 2]]}), ('ReduceMean1', { 'block': P.ReduceMean(), 'desc_const': [2], 'desc_inputs': [[3, 2, 2]], 'desc_bprop': [[3, 2]]}), ('All', { 'block': P.ReduceAll(), 'desc_const': [(1,)],
def __init__(self, model_settings, model_size_info): super(DSCNN, self).__init__() # N C H W label_count = model_settings['label_count'] input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] t_dim = input_time_size f_dim = input_frequency_size num_layers = model_size_info[0] conv_feat = [None] * num_layers conv_kt = [None] * num_layers conv_kf = [None] * num_layers conv_st = [None] * num_layers conv_sf = [None] * num_layers i = 1 for layer_no in range(0, num_layers): conv_feat[layer_no] = model_size_info[i] i += 1 conv_kt[layer_no] = model_size_info[i] i += 1 conv_kf[layer_no] = model_size_info[i] i += 1 conv_st[layer_no] = model_size_info[i] i += 1 conv_sf[layer_no] = model_size_info[i] i += 1 seq_cell = [] in_channel = 1 for layer_no in range(0, num_layers): if layer_no == 0: seq_cell.append( nn.Conv2d(in_channels=in_channel, out_channels=conv_feat[layer_no], kernel_size=(conv_kt[layer_no], conv_kf[layer_no]), stride=(conv_st[layer_no], conv_sf[layer_no]), pad_mode="same", padding=0, has_bias=False)) seq_cell.append( nn.BatchNorm2d(num_features=conv_feat[layer_no], momentum=0.98)) in_channel = conv_feat[layer_no] else: seq_cell.append( DepthWiseConv(in_planes=in_channel, kernel_size=(conv_kt[layer_no], conv_kf[layer_no]), stride=(conv_st[layer_no], conv_sf[layer_no]), pad_mode='same', pad=0)) seq_cell.append( nn.BatchNorm2d(num_features=in_channel, momentum=0.98)) seq_cell.append(nn.ReLU()) seq_cell.append( nn.Conv2d(in_channels=in_channel, out_channels=conv_feat[layer_no], kernel_size=(1, 1), pad_mode="same")) seq_cell.append( nn.BatchNorm2d(num_features=conv_feat[layer_no], momentum=0.98)) seq_cell.append(nn.ReLU()) in_channel = conv_feat[layer_no] t_dim = math.ceil(t_dim / float(conv_st[layer_no])) f_dim = math.ceil(f_dim / float(conv_sf[layer_no])) seq_cell.append(nn.AvgPool2d(kernel_size=(t_dim, f_dim))) # to fix ? seq_cell.append(nn.Flatten()) seq_cell.append(nn.Dropout(model_settings['dropout1'])) seq_cell.append(nn.Dense(in_channel, label_count)) self.model = nn.SequentialCell(seq_cell)
def __init__(self): super(Net_dropout, self).__init__() self.dropout = nn.Dropout(0.5)
def __init__(self, device_target, num_classes=1000, width_mult=1., has_dropout=False, inverted_residual_setting=None, round_nearest=8): super(MobileNetV2, self).__init__() block = InvertedResidual input_channel = 32 last_channel = 1280 # setting of inverted residual blocks self.cfgs = inverted_residual_setting if inverted_residual_setting is None: self.cfgs = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] # building first layer input_channel = _make_divisible(input_channel * width_mult, round_nearest) self.out_channels = _make_divisible( last_channel * max(1.0, width_mult), round_nearest) features = [ConvBNReLU(device_target, 3, input_channel, stride=2)] # building inverted residual blocks for t, c, n, s in self.cfgs: output_channel = _make_divisible(c * width_mult, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append( block(device_target, input_channel, output_channel, stride, expand_ratio=t)) input_channel = output_channel # building last several layers features.append( ConvBNReLU(device_target, input_channel, self.out_channels, kernel_size=1)) # make it nn.CellList self.features = nn.SequentialCell(features) # mobilenet head head = ([ GlobalAvgPooling(), nn.Dense(self.out_channels, num_classes, has_bias=True) ] if not has_dropout else [ GlobalAvgPooling(), nn.Dropout(0.2), nn.Dense(self.out_channels, num_classes, has_bias=True) ]) self.head = nn.SequentialCell(head) self._initialize_weights()
def __init__(self, hidden_size: int, hidden_dropout_prob: int = 0.1) -> None: super().__init__() self.layer_norm = nn.LayerNorm((hidden_size, ), epsilon=1e-12) self.dropout = nn.Dropout(1.0 - hidden_dropout_prob)
def __init__(self, config: TransformerConfig, is_training: bool, use_one_hot_embeddings: bool = False, use_positional_embedding: bool = True): super(Transformer, self).__init__() self.use_positional_embedding = use_positional_embedding config = copy.deepcopy(config) self.is_training = is_training if not is_training: config.hidden_dropout_prob = 0.0 config.attention_dropout_prob = 0.0 self.input_mask_from_dataset = config.input_mask_from_dataset self.batch_size = config.batch_size self.max_positions = config.seq_length self.attn_embed_dim = config.hidden_size self.num_layers = config.num_hidden_layers self.word_embed_dim = config.hidden_size self.last_idx = self.num_layers - 1 self.embedding_lookup = EmbeddingLookup( vocab_size=config.vocab_size, embed_dim=self.word_embed_dim, use_one_hot_embeddings=use_one_hot_embeddings) if self.use_positional_embedding: self.positional_embedding = PositionalEmbedding( embedding_size=self.word_embed_dim, max_position_embeddings=config.max_position_embeddings) self.encoder = TransformerEncoder( attn_embed_dim=self.attn_embed_dim, encoder_layers=self.num_layers, num_attn_heads=config.num_attention_heads, intermediate_size=config.intermediate_size, attention_dropout_prob=config.attention_dropout_prob, initializer_range=config.initializer_range, hidden_dropout_prob=config.hidden_dropout_prob, hidden_act=config.hidden_act, compute_type=config.compute_type) self.decoder = TransformerDecoder( attn_embed_dim=self.attn_embed_dim, decoder_layers=self.num_layers, num_attn_heads=config.num_attention_heads, intermediate_size=config.intermediate_size, attn_dropout_prob=config.attention_dropout_prob, initializer_range=config.initializer_range, dropout_prob=config.hidden_dropout_prob, hidden_act=config.hidden_act, compute_type=config.compute_type) self.cast = P.Cast() self.dtype = config.dtype self.cast_compute_type = SaturateCast(dst_type=config.compute_type) self.slice = P.StridedSlice() self.dropout = nn.Dropout(keep_prob=1 - config.hidden_dropout_prob) self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config) self.scale = Tensor([math.sqrt(float(self.word_embed_dim))], dtype=mstype.float32) self.multiply = P.Mul()