Example #1
0
    def __init__(self, block, num_classes=100, batch_size=32):
        """init"""
        super(ResNet, self).__init__()
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.head = Head()

        self.layer1 = MakeLayer0(block,
                                 in_channels=64,
                                 out_channels=256,
                                 stride=1)
        self.layer2 = MakeLayer1(block,
                                 in_channels=256,
                                 out_channels=512,
                                 stride=2)
        self.layer3 = MakeLayer2(block,
                                 in_channels=512,
                                 out_channels=1024,
                                 stride=2)
        self.layer4 = MakeLayer3(block,
                                 in_channels=1024,
                                 out_channels=2048,
                                 stride=2)

        self.pool = ops.ReduceMean(keep_dims=True)
        self.squeeze = ops.Squeeze(axis=(2, 3))
        self.fc = fc_with_initialize(512 * block.expansion, num_classes)

        # pipeline parallel config
        self.head.pipeline_stage = 0
        self.layer1.pipeline_stage = 0
        self.layer2.pipeline_stage = 0
        self.layer3.pipeline_stage = 1
        self.layer4.pipeline_stage = 1
        self.fc.pipeline_stage = 1
Example #2
0
    def __init__(self, block, num_classes=100, batch_size=32):
        """init"""
        super(ResNet, self).__init__()
        self.batch_size = batch_size
        self.num_classes = num_classes

        self.conv1 = conv7x7(3, 64, stride=2, padding=0)

        self.bn1 = bn_with_initialize(64)
        self.relu = ops.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same")

        self.layer1 = MakeLayer0(block,
                                 in_channels=64,
                                 out_channels=256,
                                 stride=1)
        self.layer2 = MakeLayer1(block,
                                 in_channels=256,
                                 out_channels=512,
                                 stride=2)
        self.layer3 = MakeLayer2(block,
                                 in_channels=512,
                                 out_channels=1024,
                                 stride=2)
        self.layer4 = MakeLayer3(block,
                                 in_channels=1024,
                                 out_channels=2048,
                                 stride=2)

        self.pool = ops.ReduceMean(keep_dims=True)
        self.squeeze = ops.Squeeze(axis=(2, 3))
        self.fc = fc_with_initialize(512 * block.expansion, num_classes)
Example #3
0
    def variable_recurrent(self, x, h, seq_length, w_ih, w_hh, b_ih, b_hh):
        '''recurrent steps with sequence length'''
        time_step = x.shape[0]
        h_t = h
        if self.is_lstm:
            hidden_size = h[0].shape[-1]
            zero_output = P.ZerosLike()(h_t[0])
        else:
            hidden_size = h.shape[-1]
            zero_output = P.ZerosLike()(h_t)
        seq_length = P.Cast()(seq_length, mindspore.float32)
        seq_length = P.BroadcastTo((hidden_size, -1))(seq_length)
        seq_length = P.Cast()(seq_length, mindspore.int32)
        seq_length = P.Transpose()(seq_length, (1, 0))

        outputs = []
        state_t = h_t
        t = 0
        while t < time_step:
            x_t = x[t:t + 1:1]
            x_t = P.Squeeze(0)(x_t)
            h_t = self.cell(x_t, state_t, w_ih, w_hh, b_ih, b_hh)
            seq_cond = seq_length > t
            if self.is_lstm:
                state_t_0 = P.Select()(seq_cond, h_t[0], state_t[0])
                state_t_1 = P.Select()(seq_cond, h_t[1], state_t[1])
                output = P.Select()(seq_cond, h_t[0], zero_output)
                state_t = (state_t_0, state_t_1)
            else:
                state_t = P.Select()(seq_cond, h_t, state_t)
                output = P.Select()(seq_cond, h_t, zero_output)
            outputs.append(output)
            t += 1
        outputs = P.Stack()(outputs)
        return outputs, state_t
Example #4
0
 def construct(self, lstm_outputs, next_ids):
     total_loss = []
     for lstm_output, next_token_id in zip(lstm_outputs, next_ids):
         next_token_id_flat = next_token_id.view((-1, 1))
         if self.training and self.sample_softmax:
             lstm_output = lstm_output.view((-1, self.hidden_size))
             loss = self.sampled_softmax_loss(self.weight, self.bias, next_token_id_flat, lstm_output)
         else:
             next_token_id_flat = P.Squeeze(1)(next_token_id_flat)
             output_scores = self.matmul(lstm_output, self.weight) + self.bias
             output_scores = output_scores.view((-1, output_scores.shape[-1]))
             loss = self.sparse_softmax_cross_entropy_with_logits(output_scores, next_token_id_flat)
         total_loss.append(self.reduce_mean(loss))
     
     return 0.5 * (total_loss[0] + total_loss[1])
Example #5
0
 def recurrent(self, x, h_0, w_ih, w_hh, b_ih, b_hh):
     '''recurrent steps without sequence length'''
     time_step = x.shape[0]
     outputs = []
     t = 0
     h = h_0
     while t < time_step:
         x_t = x[t:t + 1:1]
         x_t = P.Squeeze(0)(x_t)
         h = self.cell(x_t, h, w_ih, w_hh, b_ih, b_hh)
         if self.is_lstm:
             outputs.append(h[0])
         else:
             outputs.append(h)
         t += 1
     outputs = P.Stack()(outputs)
     return outputs, h
Example #6
0
    def __init__(self,
                 block,
                 layer_nums,
                 in_channels,
                 out_channels,
                 strides=(1, 2, 2, 2),
                 num_classes=100):
        super(ResNet, self).__init__()

        if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
            raise ValueError("the length of "
                             "layer_num, inchannel, outchannel list must be 4!")

        self.conv1 = _conv7x7(3, 64, stride=2)
        self.bn1 = _fused_bn(64)
        self.relu = ops.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='same')

        self.layer1 = self._make_layer(block,
                                       layer_nums[0],
                                       in_channel=in_channels[0],
                                       out_channel=out_channels[0],
                                       stride=strides[0])
        self.layer2 = self._make_layer(block,
                                       layer_nums[1],
                                       in_channel=in_channels[1],
                                       out_channel=out_channels[1],
                                       stride=strides[1])
        self.layer3 = self._make_layer(block,
                                       layer_nums[2],
                                       in_channel=in_channels[2],
                                       out_channel=out_channels[2],
                                       stride=strides[2])
        self.layer4 = self._make_layer(block,
                                       layer_nums[3],
                                       in_channel=in_channels[3],
                                       out_channel=out_channels[3],
                                       stride=strides[3])

        self.mean = ops.ReduceMean(keep_dims=True)
        self.end_point = nn.Dense(out_channels[3], num_classes, has_bias=True,
                                  weight_init=dense_weight_variable())
        self.squeeze = ops.Squeeze()
        self.cast = ops.Cast()
Example #7
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=False):
        super(BertModel, self).__init__()
        config = copy.deepcopy(config)
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_probs_dropout_prob = 0.0

        self.input_mask_from_dataset = config.input_mask_from_dataset
        self.token_type_ids_from_dataset = config.token_type_ids_from_dataset
        self.batch_size = config.batch_size
        self.seq_length = config.seq_length
        self.hidden_size = config.hidden_size
        self.num_hidden_layers = config.num_hidden_layers
        self.embedding_size = config.hidden_size
        self.token_type_ids = None

        self.last_idx = self.num_hidden_layers - 1
        #output_embedding_shape = [self.batch_size, self.seq_length,
        #                          self.embedding_size]
        output_embedding_shape = [-1, self.seq_length,
                                  self.embedding_size]

        if not self.token_type_ids_from_dataset:
            self.token_type_ids = initializer(
                "zeros", [self.batch_size, self.seq_length], mstype.int32).to_tensor()

        self.bert_embedding_lookup = EmbeddingLookup(
            vocab_size=config.vocab_size,
            embedding_size=self.embedding_size,
            embedding_shape=output_embedding_shape,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=config.initializer_range)

        self.bert_embedding_postprocessor = EmbeddingPostprocessor(
            embedding_size=self.embedding_size,
            embedding_shape=output_embedding_shape,
            use_relative_positions=config.use_relative_positions,
            use_token_type=True,
            token_type_vocab_size=config.type_vocab_size,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=0.02,
            max_position_embeddings=config.max_position_embeddings,
            dropout_prob=config.hidden_dropout_prob)

        self.bert_encoder = BertTransformer(
            batch_size=self.batch_size,
            hidden_size=self.hidden_size,
            seq_length=self.seq_length,
            num_attention_heads=config.num_attention_heads,
            num_hidden_layers=self.num_hidden_layers,
            intermediate_size=config.intermediate_size,
            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=config.initializer_range,
            hidden_dropout_prob=config.hidden_dropout_prob,
            use_relative_positions=config.use_relative_positions,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type,
            return_all_encoders=True,
            enable_fused_layernorm=config.enable_fused_layernorm)

        self.cast = ops.Cast()
        self.dtype = config.dtype
        self.cast_compute_type = SaturateCast(dst_type=config.compute_type)
        self.slice = ops.StridedSlice()

        self.squeeze_1 = ops.Squeeze(axis=1)
        self.dense = nn.Dense(self.hidden_size, self.hidden_size,
                              activation="tanh",
                              weight_init=TruncatedNormal(config.initializer_range)).to_float(config.compute_type)
        self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)
Example #8
0
 def __init__(self):
     super(SqueezeNet, self).__init__()
     self.squeeze = P.Squeeze()
Example #9
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=False):
        super(BertModel, self).__init__()
        self._bertconfig=BertConfig(
                 seq_length=config["max_position_embeddings"],
                 vocab_size=config["vocab_size"],
                 hidden_size=config["hidden_size"],
                 num_hidden_layers=config["num_hidden_layers"],
                 num_attention_heads=config["num_attention_heads"],
                 intermediate_size=config["hidden_size"]*4,
                 hidden_act=config["hidden_act"],
                 hidden_dropout_prob=config["hidden_dropout_prob"],
                 attention_probs_dropout_prob=config["attention_probs_dropout_prob"],
                 max_position_embeddings=config["max_position_embeddings"],
                 type_vocab_size=config["type_vocab_size"],
                 initializer_range=config["initializer_range"],
                 use_relative_positions=False,
                 dtype=mstype.float32,
                 compute_type=mstype.float32)
        config = copy.deepcopy(self._bertconfig)
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_probs_dropout_prob = 0.0

        self.seq_length = config.seq_length
        self.hidden_size = config.hidden_size
        self.num_hidden_layers = config.num_hidden_layers
        self.embedding_size = config.hidden_size
        self.token_type_ids = None

        self.last_idx = self.num_hidden_layers - 1
        output_embedding_shape = [-1, self.seq_length, self.embedding_size]

        self.bert_embedding_lookup = nn.Embedding(
            vocab_size=config.vocab_size,
            embedding_size=self.embedding_size,
            use_one_hot=use_one_hot_embeddings)

        self.bert_embedding_postprocessor = EmbeddingPostprocessor(
            embedding_size=self.embedding_size,
            embedding_shape=output_embedding_shape,
            use_relative_positions=config.use_relative_positions,
            use_token_type=True,
            token_type_vocab_size=config.type_vocab_size,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=0.02,
            max_position_embeddings=config.max_position_embeddings,
            dropout_prob=config.hidden_dropout_prob)

        self.bert_encoder = BertTransformer(
            hidden_size=self.hidden_size,
            seq_length=self.seq_length,
            num_attention_heads=config.num_attention_heads,
            num_hidden_layers=self.num_hidden_layers,
            intermediate_size=config.intermediate_size,
            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=config.initializer_range,
            hidden_dropout_prob=config.hidden_dropout_prob,
            use_relative_positions=config.use_relative_positions,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type,
            return_all_encoders=True)

        self.cast = P.Cast()
        self.dtype = config.dtype
        self.cast_compute_type = SaturateCast(dst_type=config.compute_type)
        self.slice = P.StridedSlice()

        self.squeeze_1 = P.Squeeze(axis=1)
        self.dense = nn.Dense(self.hidden_size, self.hidden_size,
                              activation="tanh",
                              weight_init=TruncatedNormal(config.initializer_range)).to_float(config.compute_type)
        self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)