Beispiel #1
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 has_bias=True,
                 activation=None):
        super(Dense_Thor, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.thor = True
        self.batch_size = batch_size
        if isinstance(weight_init, Tensor):
            if weight_init.dim() != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(initializer(weight_init, [out_channels, in_channels]), name="weight")

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.dim() != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]), name="bias")

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()

        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None

        self.matrix_A_inv = Parameter(Tensor(np.zeros([128, 128, 16, 16]).astype(np.float16)), name='matrix_A_inv',
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16)), name="matrix_G_inv",
                                      requires_grad=False)
        self.fake_G = Tensor(np.zeros([63, 63, 16, 16]).astype(np.float16))

        self.matmul = P.MatMul(transpose_b=True)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = Tensor(damping)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.vector_matmul = P.CusBatchMatMul()
        self.pad = P.Pad(((0, 23), (0, 23)))
        self.pad1 = P.Pad(((0, 7), (0, 7)))
        self.slice = P.Slice()
        self.gather = P.GatherV2()
        self.assignadd = P.AssignAdd()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
        self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
        self.fused_abs_max1 = P.CusFusedAbsMax1([1001, 1001])
        self.fused_abs_max2 = P.CusFusedAbsMax1()
        self.log = P.Log()
        self.exp = P.Exp()
        self.dampingA = Tensor(np.identity(2048), mstype.float32)
        self.dampingG = Tensor(np.identity(1024), mstype.float32)
        self.add = P.TensorAdd()
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)
Beispiel #2
0
 def __init__(self):
     super(Triu, self).__init__()
     self.dtype = P.DType()
     self.mul = P.Mul()
     self.cast = P.Cast()
Beispiel #3
0
    def __init__(self, args, strategy):
        super(SemiAutoOneHotNet, self).__init__()
        self.a = args.a
        self.b = args.b
        self.c = args.c
        self.d = args.d
        self.e = args.e
        self.cast = P.Cast()
        self.cast.set_strategy(strategy=strategy.twod_strategy)
        self.cast1 = P.Cast()
        self.cast1.set_strategy(strategy=strategy.twod_strategy)
        self.cast2 = P.Cast()
        self.cast2.set_strategy(strategy=strategy.twod_strategy)
        self.cast3 = P.Cast()
        self.cast3.set_strategy(strategy=strategy.scalar_strategy)
        self.cast4 = P.Cast()
        self.cast4.set_strategy(strategy=strategy.scalar_strategy)
        self.a_const = Tensor(self.a, dtype=mstype.float32)
        self.b_const = Tensor(self.b, dtype=mstype.float32)
        self.c_const = Tensor(self.c, dtype=mstype.float32)
        self.d_const = Tensor(self.d, dtype=mstype.float32)
        self.e_const = Tensor(self.e, dtype=mstype.float32)
        self.m_const_zero = Tensor(0, dtype=mstype.float32)
        self.a_const_one = Tensor(1, dtype=mstype.float32)
        self.onehot = P.OneHot()
        self.onehot.set_strategy(strategy=strategy.onehot_strategy)
        self.exp = P.Exp()
        self.exp.set_strategy(strategy=strategy.twod_strategy)
        self.exp2 = P.Exp()
        self.exp2.set_strategy(strategy=strategy.twod_strategy)
        self.exp3 = P.Exp()
        self.exp3.set_strategy(strategy=strategy.twod_strategy)
        self.mul_const = P.Mul()
        self.mul_const.set_strategy(strategy=strategy.scalar_twod_strategy)
        self.mul_const2 = P.TensorAdd()
        self.mul_const2.set_strategy(strategy=strategy.scalar_twod_strategy)
        self.mul_const3 = P.Sub()
        self.mul_const3.set_strategy(strategy=strategy.twod_scalar_strategy)
        self.mul_const4 = P.Sub()
        self.mul_const4.set_strategy(strategy=strategy.scalar_twod_strategy)
        self.mul_const5 = P.Mul()
        self.mul_const5.set_strategy(strategy=strategy.twod_scalar_strategy)
        self.mul = P.Mul()
        self.mul.set_strategy(strategy=strategy.twod_twod_strategy)
        self.mul2 = P.Mul()
        self.mul2.set_strategy(strategy=strategy.twod_twod_strategy)
        self.mul3 = P.TensorAdd()
        self.mul3.set_strategy(strategy=strategy.twod_twod_strategy)
        self.mul4 = P.Sub()
        self.mul4.set_strategy(strategy=strategy.twod_twodbc_strategy)
        self.mul5 = P.RealDiv()
        self.mul5.set_strategy(strategy=strategy.twod_twodbc_strategy)
        self.mul6 = P.Mul()
        self.mul6.set_strategy(strategy=strategy.twod_twod_strategy)
        self.mul7 = P.Mul()
        self.mul7.set_strategy(strategy=strategy.twod_scalar_strategy)
        self.mul8 = P.RealDiv()
        self.mul8.set_strategy(strategy=strategy.scalar_scalar_strategy)
        self.mul9 = P.TensorAdd()
        self.mul9.set_strategy(strategy=strategy.twod_scalar_strategy)

        self.reduce_max = P.ReduceMax(keep_dims=True)
        self.reduce_max.set_strategy(strategy=strategy.twod_strategy)

        self.reduce_sum = P.ReduceSum(keep_dims=False)
        self.reduce_sum.set_strategy(strategy=strategy.twod_strategy)
        self.reduce_sum_2 = P.ReduceSum(keep_dims=False)
        self.reduce_sum_2.set_strategy(strategy=strategy.twod_strategy)
        self.reduce_sum_3 = P.ReduceSum(keep_dims=False)
        self.reduce_sum_3.set_strategy(strategy=strategy.oned_strategy)

        self.reshape = P.Reshape()
        self.log = P.Log()
        self.log.set_strategy(strategy=strategy.twod_strategy)

        self.on_value = Tensor(1.0, mstype.float32)
        self.off_value = Tensor(0.0, mstype.float32)
        self.normalize = P.L2Normalize(axis=1)
        self.normalize.set_strategy(strategy=strategy.twod_strategy_m)
        self.normalize2 = P.L2Normalize(axis=1)
        self.normalize2.set_strategy(strategy=strategy.twod_strategy_m)
        self.fc = P.MatMul(transpose_b=True)
        self.fc.set_strategy(strategy=strategy.twodbc_twod_strategy)
        weight_shape = [args.num_classes, args.emb_size]
        weight_np = np.zeros(weight_shape, np.float32)
        self.weight = Parameter(Tensor(weight_np),
                                name='model_parallel_weight')
Beispiel #4
0
 def __init__(self, strategy1):
     super().__init__()
     self.matmul = P.MatMul().set_strategy(strategy1)
     self.cast = P.Cast()
def _tensors_allreduce_mean(mul, degree, grad):
    degree = F.scalar_cast(degree, F.dtype(grad))
    grad = _all_reduce_G(grad)
    cast_op = P.Cast()
    return mul(grad, cast_op(F.scalar_to_array(1.0 / degree), F.dtype(grad)))
 def __init__(self, config, is_training, use_one_hot_embeddings=False):
     super(TransformerNetworkWithLoss, self).__init__(auto_prefix=False)
     self.transformer = TransformerModel(config, is_training,
                                         use_one_hot_embeddings)
     self.loss = TransformerTrainingLoss(config)
     self.cast = P.Cast()
Beispiel #7
0
    def __init__(self, config):
        super(WideDeepModel, self).__init__()
        emb_128_size = 650000
        emb64_single_size = 17300
        emb64_multi_size = 20900
        indicator_size = 16
        deep_dim_list = [1024, 1024, 1024, 1024, 1024]
        # deep_dropout=0.0
        wide_reg_coef = [0.0, 0.0]
        deep_reg_coef = [0.0, 0.0]
        wide_lr = 0.2
        deep_lr = 1.0

        self.input_emb_dim = config.input_emb_dim
        self.batch_size = config.batch_size
        self.deep_layer_act = config.deep_layers_act
        self.init_args = config.init_args
        self.weight_init, self.bias_init = config.weight_bias_init
        self.weight_bias_init = config.weight_bias_init
        self.emb_init = config.emb_init

        self.keep_prob = config.keep_prob
        self.layer_dims = deep_dim_list + [1]
        self.all_dim_list = [self.input_emb_dim] + self.layer_dims

        self.continue_field_size = 32
        self.emb_128_size = emb_128_size
        self.emb64_single_size = emb64_single_size
        self.emb64_multi_size = emb64_multi_size
        self.indicator_size = indicator_size

        self.wide_l1_coef, self.wide_l2_coef = wide_reg_coef
        self.deep_l1_coef, self.deep_l2_coef = deep_reg_coef
        self.wide_lr = wide_lr
        self.deep_lr = deep_lr

        init_acts_embedding_metrix = [
            ('emb128_embedding', [self.emb_128_size, 128], self.emb_init),
            ('emb64_single', [self.emb64_single_size, 64], self.emb_init),
            ('emb64_multi', [self.emb64_multi_size, 64], self.emb_init),
            ('emb64_indicator', [self.indicator_size, 64], self.emb_init)
        ]
        var_map = init_var_dict(self.init_args, init_acts_embedding_metrix)
        self.emb128_embedding = var_map["emb128_embedding"]
        self.emb64_single = var_map["emb64_single"]
        self.emb64_multi = var_map["emb64_multi"]
        self.emb64_indicator = var_map["emb64_indicator"]

        init_acts_wide_weight = [
            ('wide_continue_w', [self.continue_field_size], self.emb_init),
            ('wide_emb128_w', [self.emb_128_size], self.emb_init),
            ('wide_emb64_single_w', [self.emb64_single_size], self.emb_init),
            ('wide_emb64_multi_w', [self.emb64_multi_size], self.emb_init),
            ('wide_indicator_w', [self.indicator_size], self.emb_init),
            ('wide_bias', [1], self.emb_init)
        ]
        var_map = init_var_dict(self.init_args, init_acts_wide_weight)
        self.wide_continue_w = var_map["wide_continue_w"]
        self.wide_emb128_w = var_map["wide_emb128_w"]
        self.wide_emb64_single_w = var_map["wide_emb64_single_w"]
        self.wide_emb64_multi_w = var_map["wide_emb64_multi_w"]
        self.wide_indicator_w = var_map["wide_indicator_w"]
        self.wide_bias = var_map["wide_bias"]

        self.dense_layer_1 = DenseLayer(self.all_dim_list[0],
                                        self.all_dim_list[1],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)
        self.dense_layer_2 = DenseLayer(self.all_dim_list[1],
                                        self.all_dim_list[2],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)
        self.dense_layer_3 = DenseLayer(self.all_dim_list[2],
                                        self.all_dim_list[3],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)
        self.dense_layer_4 = DenseLayer(self.all_dim_list[3],
                                        self.all_dim_list[4],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)
        self.dense_layer_5 = DenseLayer(self.all_dim_list[4],
                                        self.all_dim_list[5],
                                        self.weight_bias_init,
                                        self.deep_layer_act,
                                        drop_out=config.dropout_flag,
                                        convert_dtype=True)

        self.deep_predict = DenseLayer(self.all_dim_list[5],
                                       self.all_dim_list[6],
                                       self.weight_bias_init,
                                       self.deep_layer_act,
                                       drop_out=config.dropout_flag,
                                       convert_dtype=True,
                                       use_activation=False)

        self.gather_v2 = P.GatherV2()
        self.mul = P.Mul()
        self.reduce_sum_false = P.ReduceSum(keep_dims=False)
        self.reduce_sum_true = P.ReduceSum(keep_dims=True)
        self.reshape = P.Reshape()
        self.square = P.Square()
        self.shape = P.Shape()
        self.tile = P.Tile()
        self.concat = P.Concat(axis=1)
        self.cast = P.Cast()
        self.reduceMean_false = P.ReduceMean(keep_dims=False)
        self.Concat = P.Concat(axis=1)
        self.BiasAdd = P.BiasAdd()
        self.expand_dims = P.ExpandDims()
        self.flatten = Flatten()
Beispiel #8
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 has_bias=True,
                 batch_first=False,
                 dropout=0,
                 bidirectional=False):
        super(LSTM, self).__init__()
        validator.check_value_type("batch_first", batch_first, [bool],
                                   self.cls_name)
        validator.check_positive_int(hidden_size, "hidden_size", self.cls_name)
        validator.check_positive_int(num_layers, "num_layers", self.cls_name)
        self.is_ascend = context.get_context("device_target") == "Ascend"

        self.batch_first = batch_first
        self.transpose = P.Transpose()
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.lstm = P.LSTM(input_size=input_size,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           has_bias=has_bias,
                           bidirectional=bidirectional,
                           dropout=float(dropout))

        weight_size = 0
        gate_size = 4 * hidden_size
        stdv = 1 / math.sqrt(hidden_size)
        num_directions = 2 if bidirectional else 1
        if self.is_ascend:
            self.reverse_seq = P.ReverseSequence(batch_dim=1, seq_dim=0)
            self.concat = P.Concat(axis=0)
            self.concat_2dim = P.Concat(axis=2)
            self.cast = P.Cast()
            self.shape = P.Shape()
            if dropout != 0:
                self.dropout_op = nn.Dropout(float(dropout))
            b0 = np.zeros(gate_size, dtype=np.float16)
            self.w_list = []
            self.b_list = []
            self.rnns_fw = P.DynamicRNN(forget_bias=0.0)
            self.rnns_bw = P.DynamicRNN(forget_bias=0.0)

            for layer in range(num_layers):
                w_shape = input_size if layer == 0 else (num_directions *
                                                         hidden_size)
                w_np = np.random.uniform(
                    -stdv, stdv,
                    (w_shape + hidden_size, gate_size)).astype(np.float16)
                self.w_list.append(
                    Parameter(initializer(Tensor(w_np),
                                          [w_shape + hidden_size, gate_size]),
                              name='weight_fw' + str(layer)))
                if has_bias:
                    b_np = np.random.uniform(-stdv, stdv,
                                             gate_size).astype(np.float16)
                    self.b_list.append(
                        Parameter(initializer(Tensor(b_np), [gate_size]),
                                  name='bias_fw' + str(layer)))
                else:
                    self.b_list.append(
                        Parameter(initializer(Tensor(b0), [gate_size]),
                                  name='bias_fw' + str(layer)))
                if bidirectional:
                    w_bw_np = np.random.uniform(
                        -stdv, stdv,
                        (w_shape + hidden_size, gate_size)).astype(np.float16)
                    self.w_list.append(
                        Parameter(
                            initializer(Tensor(w_bw_np),
                                        [w_shape + hidden_size, gate_size]),
                            name='weight_bw' + str(layer)))
                    b_bw_np = np.random.uniform(
                        -stdv, stdv,
                        (4 *
                         hidden_size)).astype(np.float16) if has_bias else b0
                    self.b_list.append(
                        Parameter(initializer(Tensor(b_bw_np), [gate_size]),
                                  name='bias_bw' + str(layer)))
            self.w_list = ParameterTuple(self.w_list)
            self.b_list = ParameterTuple(self.b_list)
        else:
            for layer in range(num_layers):
                input_layer_size = input_size if layer == 0 else hidden_size * num_directions
                increment_size = gate_size * input_layer_size
                increment_size += gate_size * hidden_size
                if has_bias:
                    increment_size += 2 * gate_size
                weight_size += increment_size * num_directions
            w_np = np.random.uniform(-stdv, stdv,
                                     (weight_size, 1, 1)).astype(np.float32)
            self.weight = Parameter(initializer(Tensor(w_np),
                                                [weight_size, 1, 1]),
                                    name='weight')
 def __init__(self, config, is_training, use_one_hot_embeddings=False):
     super(BertNetworkWithLoss, self).__init__()
     self.bert = BertPreTraining(config, is_training,
                                 use_one_hot_embeddings)
     self.loss = BertPretrainingLoss(config)
     self.cast = P.Cast()
Beispiel #10
0
    def __init__(self, config):
        super(Faster_Rcnn_Resnet50, self).__init__()
        self.dtype = np.float32
        self.ms_type = mstype.float32
        self.train_batch_size = config.batch_size
        self.num_classes = config.num_classes
        self.anchor_scales = config.anchor_scales
        self.anchor_ratios = config.anchor_ratios
        self.anchor_strides = config.anchor_strides
        self.target_means = tuple(config.rcnn_target_means)
        self.target_stds = tuple(config.rcnn_target_stds)

        # Anchor generator
        anchor_base_sizes = None
        self.anchor_base_sizes = list(
            self.anchor_strides
        ) if anchor_base_sizes is None else anchor_base_sizes

        self.anchor_generators = []
        for anchor_base in self.anchor_base_sizes:
            self.anchor_generators.append(
                AnchorGenerator(anchor_base, self.anchor_scales,
                                self.anchor_ratios))

        self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)

        featmap_sizes = config.feature_shapes
        assert len(featmap_sizes) == len(self.anchor_generators)

        self.anchor_list = self.get_anchors(featmap_sizes)

        # Backbone resnet50
        self.backbone = ResNetFea(ResidualBlockUsing, config.resnet_block,
                                  config.resnet_in_channels,
                                  config.resnet_out_channels, False)

        # Fpn
        self.fpn_ncek = FeatPyramidNeck(config.fpn_in_channels,
                                        config.fpn_out_channels,
                                        config.fpn_num_outs)

        # Rpn and rpn loss
        self.gt_labels_stage1 = Tensor(
            np.ones((self.train_batch_size, config.num_gts)).astype(np.uint8))
        self.rpn_with_loss = RPN(config, self.train_batch_size,
                                 config.rpn_in_channels,
                                 config.rpn_feat_channels, config.num_anchors,
                                 config.rpn_cls_out_channels)

        # Proposal
        self.proposal_generator = Proposal(config, self.train_batch_size,
                                           config.activate_num_classes,
                                           config.use_sigmoid_cls)
        self.proposal_generator.set_train_local(config, True)
        self.proposal_generator_test = Proposal(config, config.test_batch_size,
                                                config.activate_num_classes,
                                                config.use_sigmoid_cls)
        self.proposal_generator_test.set_train_local(config, False)

        # Assign and sampler stage two
        self.bbox_assigner_sampler_for_rcnn = BboxAssignSampleForRcnn(
            config, self.train_batch_size, config.num_bboxes_stage2, True)
        self.decode = P.BoundingBoxDecode(max_shape=(config.img_height, config.img_width), means=self.target_means, \
                                          stds=self.target_stds)
        # Roi
        self.roi_init(config)

        # Rcnn
        self.rcnn = Rcnn(
            config, config.rcnn_in_channels * config.roi_layer['out_size'] *
            config.roi_layer['out_size'], self.train_batch_size,
            self.num_classes)

        # Op declare
        self.squeeze = P.Squeeze()
        self.cast = P.Cast()

        self.concat = P.Concat(axis=0)
        self.concat_1 = P.Concat(axis=1)
        self.concat_2 = P.Concat(axis=2)
        self.reshape = P.Reshape()
        self.select = P.Select()
        self.greater = P.Greater()
        self.transpose = P.Transpose()

        # Improve speed
        self.concat_start = min(self.num_classes - 2, 55)
        self.concat_end = (self.num_classes - 1)

        # Test mode
        self.test_mode_init(config)

        # Init tensor
        self.init_tensor(config)
Beispiel #11
0
 def __init__(self):
     super(ParameterReduce, self).__init__()
     self.cast = P.Cast()
     self.reduce = P.AllReduce()
Beispiel #12
0
    def __init__(self,
                 vocab_size,
                 embedding_size,
                 field_size,
                 param_init='normal',
                 target='CPU',
                 slice_mode='batch_slice',
                 feature_num_list=None,
                 max_norm=None,
                 sparse=True,
                 operator='SUM'):
        super(MultiFieldEmbeddingLookup,
              self).__init__(vocab_size, embedding_size, param_init, target,
                             slice_mode, feature_num_list, max_norm, sparse)
        self.field_size = validator.check_value_type('field_size', field_size,
                                                     [int], self.cls_name)
        self.operator = operator

        self.mul = P.Mul()
        self.inf_mask_mul = P.Mul()
        self.bias_add = P.TensorAdd()
        self.inf_add = P.TensorAdd()
        self.merge_op = None
        self.count_op = P.UnsortedSegmentSum()
        self.abs = P.Abs()
        self.equal = P.Equal()
        self.add = P.TensorAdd()
        self.cast = P.Cast()
        self.div_no_nan = P.DivNoNan()
        self.expand = P.ExpandDims()
        self.max_mask_mul = P.Mul()
        self.max_no_equal = P.NotEqual()

        if operator == MultiFieldEmbeddingLookup.OPERATOR_SUM:
            self.merge_op = P.UnsortedSegmentSum()
        elif operator == MultiFieldEmbeddingLookup.OPERATOR_MAX:
            self.merge_op = P.UnsortedSegmentMax()
        elif operator == MultiFieldEmbeddingLookup.OPERATOR_MEAN:
            self.merge_op = P.UnsortedSegmentSum()
        else:
            raise ValueError(
                "The operator supports ['SUM', 'MAX', 'MEAN'], but found: " +
                str(operator))

        parallel_mode = _get_parallel_mode()
        is_auto_parallel = parallel_mode in (ParallelMode.SEMI_AUTO_PARALLEL,
                                             ParallelMode.AUTO_PARALLEL)
        if slice_mode in ["table_row_slice", "batch_slice"
                          ] and is_auto_parallel:
            self.merge_op.shard(
                ((get_group_size(), 1, 1), (get_group_size(), 1)))
            self.expand.shard(((get_group_size(), ), ))
            self.bias_add.shard(((1, 1), (1, 1)))
            self.mul.shard(
                ((get_group_size(), 1, 1), (get_group_size(), 1, 1)))
            self.count_op.shard(((get_group_size(), 1), (get_group_size(), 1)))
            self.add.shard(((get_group_size(), ), (get_group_size(), )))
            self.div_no_nan.shard(
                ((get_group_size(), 1), (get_group_size(), 1)))
            self.max_mask_mul.shard(
                ((get_group_size(), 1), (get_group_size(), 1)))
            self.max_no_equal.shard(((1, ), ()))
            if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX:
                self.equal.shard(((get_group_size(), 1, 1), ()))
                self.inf_mask_mul.shard(((get_group_size(), 1, 1), ()))
                self.merge_op.shard(
                    ((get_group_size(), 1), (get_group_size(), )))
                self.count_op.shard(
                    ((get_group_size(), ), (get_group_size(), )))
                self.inf_add.shard(
                    ((get_group_size(), 1, 1), (get_group_size(), 1, 1)))
        elif slice_mode == "table_column_slice" and is_auto_parallel:
            self.merge_op.shard(((1, 1, get_group_size()), (1, 1)))
            self.div_no_nan.shard(((1, get_group_size()), (1, 1)))
            self.bias_add.shard(((1, 1), (1, 1)))
            self.mul.shard(((1, 1, 1), (1, 1, get_group_size())))
            self.count_op.shard(((1, 1), (1, 1)))
            self.add.shard(((1, ), (1, )))
            self.max_mask_mul.shard(((1, get_group_size()), (1, 1)))
            self.expand.shard(((1, ), ))
            self.max_no_equal.shard(((1, ), ()))
            if operator == MultiFieldEmbeddingLookup.OPERATOR_MAX:
                self.equal.shard(((1, 1, 1), ()))
                self.inf_mask_mul.shard(((1, 1, 1), ()))
                self.merge_op.shard(((1, get_group_size()), (1, )))
                self.count_op.shard(((1, ), (1, )))
                self.inf_add.shard(((1, 1, get_group_size()), (1, 1, 1)))
        else:
            if is_auto_parallel:
                raise ValueError(
                    "slice_mode should be  ['table_row_slice', 'batch_slice' and \
                       'table_column_slice'], but get " + str(slice_mode))

        # Min value for fp32
        self.negative_inf_value = -3.402823466E+38
Beispiel #13
0
    def __init__(self, weights_update=False):
        """
        VGG16 feature extraction

        Args:
            weights_updata(bool): whether update weights for top two layers, default is False.
        """
        super(VGG16FeatureExtraction, self).__init__()
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, pad_mode="same")
        self.avg_pool = nn.AvgPool2d(kernel_size=2, stride=2)

        self.conv1_1 = _conv(in_channels=3, out_channels=64, kernel_size=3,\
            padding=1, weights_update=weights_update)
        self.conv1_2 = _conv(in_channels=64, out_channels=64, kernel_size=3,\
            padding=1, weights_update=weights_update)

        self.conv2_1 = _conv(in_channels=64, out_channels=128, kernel_size=3,\
            padding=1, weights_update=weights_update)
        self.conv2_2 = _conv(in_channels=128, out_channels=128, kernel_size=3,\
            padding=1, weights_update=weights_update)

        self.conv3_1 = _conv(in_channels=128,
                             out_channels=256,
                             kernel_size=3,
                             padding=1)
        self.conv3_2 = _conv(in_channels=256,
                             out_channels=256,
                             kernel_size=3,
                             padding=1)
        self.conv3_3 = _conv(in_channels=256,
                             out_channels=256,
                             kernel_size=3,
                             padding=1)

        self.conv4_1 = _conv(in_channels=256,
                             out_channels=512,
                             kernel_size=3,
                             padding=1)
        self.conv4_2 = _conv(in_channels=512,
                             out_channels=512,
                             kernel_size=3,
                             padding=1)
        self.conv4_3 = _conv(in_channels=512,
                             out_channels=512,
                             kernel_size=3,
                             padding=1)

        self.conv5_1 = _conv(in_channels=512,
                             out_channels=512,
                             kernel_size=3,
                             padding=1)
        self.conv5_2 = _conv(in_channels=512,
                             out_channels=512,
                             kernel_size=3,
                             padding=1)
        self.conv5_3 = _conv(in_channels=512,
                             out_channels=512,
                             kernel_size=3,
                             padding=1)
        self.cast = P.Cast()
 def __init__(self):
     super(Net, self).__init__()
     self.add = P.TensorAdd()
     self.cast = P.Cast()
     self.relu = P.ReLU()
     self.biasadd = P.BiasAdd()
    def __init__(self, config, use_one_hot_embeddings=False):
        super(TransformerInferModel, self).__init__()
        config = copy.deepcopy(config)
        config.hidden_dropout_prob = 0.0
        config.attention_dropout_prob = 0.0

        self.input_mask_from_dataset = config.input_mask_from_dataset
        self.batch_size = config.batch_size
        self.seq_length = config.seq_length
        self.hidden_size = config.hidden_size
        self.num_hidden_layers = config.num_hidden_layers
        self.embedding_size = config.hidden_size
        self.attn_embed_dim = config.hidden_size
        self.num_layers = config.num_hidden_layers
        self.last_idx = self.num_hidden_layers - 1

        self.embedding_lookup = EmbeddingLookup(
            vocab_size=config.vocab_size,
            embed_dim=self.embedding_size,
            use_one_hot_embeddings=use_one_hot_embeddings)

        self.positional_embedding = PositionalEmbedding(
            embedding_size=self.embedding_size,
            max_position_embeddings=config.max_position_embeddings)
        # use for infer
        self.projection = PredLogProbs(batch_size=config.batch_size *
                                       config.beam_width,
                                       seq_length=1,
                                       width=self.hidden_size,
                                       compute_type=config.compute_type)

        self.encoder = TransformerEncoder(
            attn_embed_dim=self.attn_embed_dim,
            encoder_layers=self.num_layers,
            num_attn_heads=config.num_attention_heads,
            intermediate_size=config.intermediate_size,
            attention_dropout_prob=config.attention_dropout_prob,
            initializer_range=config.initializer_range,
            hidden_dropout_prob=config.hidden_dropout_prob,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type)

        decoder_cell = TransformerDecoderStep(
            config=config,
            num_hidden_layers=config.num_hidden_layers,
            attn_embed_dim=self.attn_embed_dim,
            seq_length=config.seq_length,
            num_attn_heads=config.num_attention_heads,
            intermediate_size=config.intermediate_size,
            hidden_dropout_prob=config.hidden_dropout_prob,
            compute_type=config.compute_type,
            initializer_range=config.initializer_range,
            hidden_act="relu",
            embedding_lookup=self.embedding_lookup,
            positional_embedding=self.positional_embedding,
            attn_dropout_prob=config.attention_dropout_prob,
            projection=self.projection)

        # link beam_search after decoder
        self.decoder = BeamSearchDecoder(
            batch_size=config.batch_size,
            seq_length=config.seq_length,
            vocab_size=config.vocab_size,
            decoder=decoder_cell,
            beam_width=config.beam_width,
            length_penalty_weight=config.length_penalty_weight,
            max_decode_length=config.max_decode_length)

        self.cast = P.Cast()
        self.dtype = config.dtype
        self.cast_compute_type = SaturateCast(dst_type=config.compute_type)
        self.expand = P.ExpandDims()
        self.multiply = P.Mul()

        self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(
            config)

        # use for infer
        self.tile_beam = TileBeam(beam_width=config.beam_width)
        ones = np.ones(shape=(config.batch_size, config.max_decode_length))
        self.encode_mask = Tensor(ones, dtype=mstype.float32)

        self.scale = Tensor([math.sqrt(float(self.embedding_size))],
                            dtype=mstype.float32)
        self.reshape = P.Reshape()
 def __init__(self, dtype):
     super(Cast, self).__init__()
     self.op = P.Cast()
     self.dtype = dtype
Beispiel #17
0
    def __init__(self,
                 num_features,
                 eps=1e-5,
                 momentum=0.9,
                 affine=True,
                 gamma_init='ones',
                 beta_init='zeros',
                 moving_mean_init='zeros',
                 moving_var_init='ones',
                 use_batch_statistics=None,
                 device_num_each_group=1):
        super(_BatchNorm, self).__init__()
        if num_features < 1:
            raise ValueError("num_features must be at least 1")

        if momentum < 0 or momentum > 1:
            raise ValueError(
                "momentum should be a number in range [0, 1], but got {}".
                format(momentum))

        self.use_batch_statistics = use_batch_statistics
        self.num_features = num_features
        self.eps = eps
        self.moving_mean = Parameter(initializer(moving_mean_init,
                                                 num_features),
                                     name="mean",
                                     requires_grad=False)
        self.moving_variance = Parameter(initializer(moving_var_init,
                                                     num_features),
                                         name="variance",
                                         requires_grad=False)
        self.gamma = Parameter(initializer(gamma_init, num_features),
                               name="gamma",
                               requires_grad=affine)
        self.beta = Parameter(initializer(beta_init, num_features),
                              name="beta",
                              requires_grad=affine)
        self.group = check_int_positive(device_num_each_group)
        self.is_global = False
        if self.group != 1:
            self.rank_id = get_rank()
            self.rank_size = get_group_size()
            self.device_list = [i for i in range(0, self.rank_size)]
            self.rank_list = self.list_group(self.device_list, self.group)
            self.rank_list_idx = len(self.rank_list)
            for i in range(self.rank_list_idx):
                if self.rank_id in self.rank_list[i] and self.group != 1:
                    self.is_global = True
                    management.create_group('group' + str(i),
                                            self.rank_list[i])
                    self.all_reduce = P.AllReduce(
                        P.ReduceOp.SUM,
                        'group' + str(i)).add_prim_attr('fusion', 1)
        self.shape = P.Shape()
        self.reduce_mean = P.ReduceMean(keep_dims=True)
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.cast = P.Cast()
        self.dtype = P.DType()
        self.reshape = P.Reshape()
        self.is_ascend = context.get_context("device_target") == "Ascend"
        self.is_graph_mode = context.get_context("mode") == context.GRAPH_MODE
        self.momentum = 1.0 - momentum
        if context.get_context("enable_ge"):
            self.is_ge_backend = True
        else:
            self.is_ge_backend = False

        if self.is_graph_mode and (self.is_ge_backend or self.is_ascend):
            self.bn_train = P.BatchNorm(is_training=True, epsilon=self.eps)
        else:
            self.bn_train = P.FusedBatchNorm(mode=1,
                                             epsilon=self.eps,
                                             momentum=self.momentum)
        self.bn_infer = P.BatchNorm(is_training=False, epsilon=self.eps)

        data_parallel_strategy = ((1, ), (1, ))
        data_parallel_strategy_one = ((1, ), ())
        self.sub_mean = P.Sub().set_strategy(data_parallel_strategy)
        self.sub_var = P.Sub().set_strategy(data_parallel_strategy)
        self.mul_mean = P.Mul().set_strategy(data_parallel_strategy_one)
        self.mul_var = P.Mul().set_strategy(data_parallel_strategy_one)
        self.assign_sub_mean = P.AssignSub().set_strategy(
            data_parallel_strategy)
        self.assign_sub_var = P.AssignSub().set_strategy(
            data_parallel_strategy)
Beispiel #18
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=False):
        super(BertModel, self).__init__()
        config = copy.deepcopy(config)
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_probs_dropout_prob = 0.0

        self.input_mask_from_dataset = config.input_mask_from_dataset
        self.token_type_ids_from_dataset = config.token_type_ids_from_dataset
        self.batch_size = config.batch_size
        self.seq_length = config.seq_length
        self.hidden_size = config.hidden_size
        self.num_hidden_layers = config.num_hidden_layers
        self.embedding_size = config.hidden_size
        self.token_type_ids = None

        self.last_idx = self.num_hidden_layers - 1
        output_embedding_shape = [self.batch_size, self.seq_length,
                                  self.embedding_size]

        if not self.token_type_ids_from_dataset:
            self.token_type_ids = initializer(
                "zeros", [self.batch_size, self.seq_length], mstype.int32).init_data()

        self.bert_embedding_lookup = EmbeddingLookup(
            vocab_size=config.vocab_size,
            embedding_size=self.embedding_size,
            embedding_shape=output_embedding_shape,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=config.initializer_range)

        self.bert_embedding_postprocessor = EmbeddingPostprocessor(
            embedding_size=self.embedding_size,
            embedding_shape=output_embedding_shape,
            use_relative_positions=config.use_relative_positions,
            use_token_type=True,
            token_type_vocab_size=config.type_vocab_size,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=0.02,
            max_position_embeddings=config.max_position_embeddings,
            dropout_prob=config.hidden_dropout_prob)

        self.bert_encoder = BertTransformer(
            batch_size=self.batch_size,
            hidden_size=self.hidden_size,
            seq_length=self.seq_length,
            num_attention_heads=config.num_attention_heads,
            num_hidden_layers=self.num_hidden_layers,
            intermediate_size=config.intermediate_size,
            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=config.initializer_range,
            hidden_dropout_prob=config.hidden_dropout_prob,
            use_relative_positions=config.use_relative_positions,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type,
            return_all_encoders=True,
            enable_fused_layernorm=config.enable_fused_layernorm)

        self.cast = P.Cast()
        self.dtype = config.dtype
        self.cast_compute_type = SaturateCast(dst_type=config.compute_type)
        self.slice = P.StridedSlice()

        self.squeeze_1 = P.Squeeze(axis=1)
        self.dense = nn.Dense(self.hidden_size, self.hidden_size,
                              activation="tanh",
                              weight_init=TruncatedNormal(config.initializer_range)).to_float(config.compute_type)
        self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)
 def __init__(self):
     super(ClipGradients, self).__init__()
     self.clip_by_norm = nn.ClipByNorm()
     self.cast = P.Cast()
     self.dtype = P.DType()
Beispiel #20
0
 def __init__(self):
     super(Net, self).__init__()
     self.softmax = P.Softmax(axis=1)
     self.cast = P.Cast()
     self.relu = P.ReLU()
     self.biasadd = P.BiasAdd()
    def __init__(self,
                 config,
                 batch_size,
                 num_classes,
                 use_sigmoid_cls,
                 target_means=(.0, .0, .0, .0),
                 target_stds=(1.0, 1.0, 1.0, 1.0)):
        super(Proposal, self).__init__()
        cfg = config
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.target_means = target_means
        self.target_stds = target_stds
        self.use_sigmoid_cls = use_sigmoid_cls

        if self.use_sigmoid_cls:
            self.cls_out_channels = num_classes - 1
            self.activation = P.Sigmoid()
            self.reshape_shape = (-1, 1)
        else:
            self.cls_out_channels = num_classes
            self.activation = P.Softmax(axis=1)
            self.reshape_shape = (-1, 2)

        if self.cls_out_channels <= 0:
            raise ValueError('num_classes={} is too small'.format(num_classes))

        self.num_pre = cfg.rpn_proposal_nms_pre
        self.min_box_size = cfg.rpn_proposal_min_bbox_size
        self.nms_thr = cfg.rpn_proposal_nms_thr
        self.nms_post = cfg.rpn_proposal_nms_post
        self.nms_across_levels = cfg.rpn_proposal_nms_across_levels
        self.max_num = cfg.rpn_proposal_max_num
        self.num_levels = len(cfg.anchor_strides)

        # Op Define
        self.squeeze = P.Squeeze()
        self.reshape = P.Reshape()
        self.cast = P.Cast()

        self.feature_shapes = cfg.feature_shapes

        self.transpose_shape = (1, 2, 0)

        self.decode = P.BoundingBoxDecode(max_shape=(cfg.img_height, cfg.img_width), \
                                          means=self.target_means, \
                                          stds=self.target_stds)

        self.nms = P.NMSWithMask(self.nms_thr)
        self.concat_axis0 = P.Concat(axis=0)
        self.concat_axis1 = P.Concat(axis=1)
        self.split = P.Split(axis=1, output_num=5)
        self.min = P.Minimum()
        self.gatherND = P.GatherNd()
        self.slice = P.Slice()
        self.select = P.Select()
        self.greater = P.Greater()
        self.transpose = P.Transpose()
        self.tile = P.Tile()
        self.set_train_local(config, training=True)

        self.multi_10 = Tensor(10.0, mstype.float16)
Beispiel #22
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 has_bias=True,
                 activation=None):
        super(Dense_SKFAC_GPU, self).__init__()
        self.in_channels = Validator.check_positive_int(in_channels)
        self.out_channels = Validator.check_positive_int(out_channels)
        self.has_bias = Validator.check_bool(has_bias)
        self.skfac = True
        if isinstance(weight_init, Tensor):
            if weight_init.ndim != 2 or weight_init.shape[0] != out_channels or \
                    weight_init.shape[1] != in_channels:
                raise ValueError("weight_init shape error")

        self.weight = Parameter(
            initializer(weight_init, [out_channels, in_channels]))

        if self.has_bias:
            if isinstance(bias_init, Tensor):
                if bias_init.ndim != 1 or bias_init.shape[0] != out_channels:
                    raise ValueError("bias_init shape error")

            self.bias = Parameter(initializer(bias_init, [out_channels]))

        self.matmul = P.MatMul(transpose_b=True)
        self.bias_add = P.BiasAdd()
        split_dim = 128
        self.activation = get_activation(activation)
        self.activation_flag = self.activation is not None
        self.matrix_A_inv = Parameter(Tensor(
            np.zeros((in_channels, in_channels)).astype(np.float32)),
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(Tensor(
            np.zeros((out_channels, out_channels)).astype(np.float32)),
                                      requires_grad=False)
        self.cov_step = Parameter(initializer(0, [1], mstype.int32),
                                  requires_grad=False)
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.mul = P.Mul()
        self.matmul = P.MatMul(transpose_a=True)
        self.matmul_B = P.MatMul(transpose_b=True)
        self.matmul_ = P.MatMul()
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.damping = Parameter(Tensor(damping), requires_grad=False)
        self.dampingA = Tensor(np.identity(batch_size), mstype.float32)
        self.dampingG = Tensor(np.identity(batch_size), mstype.float32)
        self.I_G = Tensor(np.identity(out_channels), mstype.float32)
        self.I_A = Tensor(np.identity(in_channels), mstype.float32)
        self.cast = P.Cast()
        self.gather = P.Gather()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.add = P.Add()
        self.sqrt = P.Sqrt()
        self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
        self.batch_coefficient = Tensor((1 / 32)**0.5, mstype.float32)
Beispiel #23
0
 def __init__(self, strategy1, strategy2, strategy3):
     super().__init__()
     self.mul = P.Mul().set_strategy(strategy1)
     self.mul2 = P.Mul().set_strategy(strategy2)
     self.cast = P.Cast().set_strategy(strategy3)
     self.cast2 = P.Cast().set_strategy(strategy3)
Beispiel #24
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 data_format='NCHW',
                 has_bias=False,
                 weight_init='normal',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 bias_init='zeros'):
        self.skfac = True
        self.hw = kernel_size * kernel_size
        kernel_size = twice(kernel_size)
        super(Conv2d_SKFAC_GPU, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            pad_mode,
            padding,
            dilation,
            group,
            data_format,
            has_bias,
            weight_init,
            bias_init,
        )
        self.conv2d = P.Conv2D(out_channel=self.out_channels,
                               kernel_size=self.kernel_size,
                               mode=1,
                               pad_mode=self.pad_mode,
                               pad=self.padding,
                               stride=self.stride,
                               dilation=self.dilation,
                               group=self.group)

        self.matrix_A_dim = self.in_channels * self.kernel_size[
            0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels
        split_dim = 128
        self.matrix_A_inv = Parameter(np.zeros(
            (self.matrix_A_dim, self.matrix_A_dim)).astype(np.float32),
                                      requires_grad=False)
        self.matrix_G_inv = Parameter(np.zeros(
            (self.matrix_G_dim, self.matrix_G_dim)).astype(np.float32),
                                      requires_grad=False)

        self.cov_step = Parameter(initializer(0, [1], mstype.int32),
                                  requires_grad=False)
        self.img2col = P.Im2Col(kernel_size=kernel_size,
                                stride=stride,
                                pad_mode="same")
        self.matmul = P.MatMul(transpose_a=True)
        self.matmul_ = P.MatMul()
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.mul = P.Mul()
        self.getG = P.InsertGradientOf(self.save_gradient)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.batch_size = Tensor(batch_size, mstype.float16)
        self.transpose = P.Transpose()
        self.cast = P.Cast()
        self.gather = P.Gather()
        self.freq = Tensor(frequency, mstype.int32)
        self.axis = 0
        self.sqrt = P.Sqrt()
        self.reduce_mean = P.ReduceMean(keep_dims=False)
        self.damping = Parameter(Tensor(damping), requires_grad=False)
        self.dampingA = Tensor(np.identity(batch_size), mstype.float32)
        self.dampingG = Tensor(np.identity(batch_size), mstype.float32)
        self.I_G = Tensor(np.identity(out_channels), mstype.float32)
        self.I_A = Tensor(np.identity(self.matrix_A_dim), mstype.float32)
        self.cholesky = P.CholeskyTrsm(split_dim=split_dim)
        self.vector_matmul = P.BatchMatMul(transpose_a=True)
        self.batch_coefficient = Tensor((1 / 32)**0.5, mstype.float32)
import mindspore.common.dtype as mstype

Conv = P.Conv2D(out_channel=64,
                kernel_size=7,
                mode=1,
                pad_mode="valid",
                pad=0,
                stride=1,
                dilation=1,
                group=1)
Relu = P.ReLU()
Fusion = Primitive('FusionOp')
Reduce = P.ReduceOp()
Biasadd = P.BiasAdd()
Biasaddgrad = G.BiasAddGrad()
Cast = P.Cast()

Fusion_relu_relu = Primitive('FusionOp_ReLU_ReLU')
Fusion_biasadd = Primitive('FusionOp_ReLU_ReLU_ReLU_BiasAdd_ReLU_ReLU_ReLU')
Fusion_biasaddgrad = Primitive(
    'FusionOp_ReLU_ReLU_ReLU_BiasAddGrad_ReLU_ReLU_ReLU')

Add = P.TensorAdd()
Sub = P.Sub()
make_tuple = Primitive('make_tuple')


class FnDict:
    def __init__(self):
        self.fnDict = {}
Beispiel #26
0
 def __init__(self,
              from_tensor_width,
              to_tensor_width,
              from_seq_length,
              to_seq_length,
              num_attention_heads=1,
              size_per_head=512,
              query_act=None,
              key_act=None,
              value_act=None,
              has_attention_mask=False,
              attention_probs_dropout_prob=0.0,
              use_one_hot_embeddings=False,
              initializer_range=0.02,
              do_return_2d_tensor=False,
              use_relative_positions=False,
              compute_type=mstype.float32):
     super(BertAttention, self).__init__()
     self.from_seq_length = from_seq_length
     self.to_seq_length = to_seq_length
     self.num_attention_heads = num_attention_heads
     self.size_per_head = size_per_head
     self.has_attention_mask = has_attention_mask
     self.use_relative_positions = use_relative_positions
     self.scores_mul = Tensor([1.0 / math.sqrt(float(self.size_per_head))],
                              dtype=compute_type)
     self.reshape = P.Reshape()
     self.shape_from_2d = (-1, from_tensor_width)
     self.shape_to_2d = (-1, to_tensor_width)
     weight = TruncatedNormal(initializer_range)
     units = num_attention_heads * size_per_head
     self.query_layer = nn.Dense(from_tensor_width,
                                 units,
                                 activation=query_act,
                                 weight_init=weight).to_float(compute_type)
     self.key_layer = nn.Dense(to_tensor_width,
                               units,
                               activation=key_act,
                               weight_init=weight).to_float(compute_type)
     self.value_layer = nn.Dense(to_tensor_width,
                                 units,
                                 activation=value_act,
                                 weight_init=weight).to_float(compute_type)
     self.shape_from = (-1, from_seq_length, num_attention_heads,
                        size_per_head)
     self.shape_to = (-1, to_seq_length, num_attention_heads, size_per_head)
     self.matmul_trans_b = P.BatchMatMul(transpose_b=True)
     self.multiply = P.Mul()
     self.transpose = P.Transpose()
     self.trans_shape = (0, 2, 1, 3)
     self.trans_shape_relative = (2, 0, 1, 3)
     self.trans_shape_position = (1, 2, 0, 3)
     self.multiply_data = Tensor([
         -10000.0,
     ], dtype=compute_type)
     self.matmul = P.BatchMatMul()
     self.softmax = nn.Softmax()
     self.dropout = nn.Dropout(1 - attention_probs_dropout_prob)
     if self.has_attention_mask:
         self.expand_dims = P.ExpandDims()
         self.sub = P.Sub()
         self.add = P.Add()
         self.cast = P.Cast()
         self.get_dtype = P.DType()
     if do_return_2d_tensor:
         self.shape_return = (-1, num_attention_heads * size_per_head)
     else:
         self.shape_return = (-1, from_seq_length,
                              num_attention_heads * size_per_head)
     self.cast_compute_type = SaturateCast(dst_type=compute_type)
     if self.use_relative_positions:
         self._generate_relative_positions_embeddings = \
             RelaPosEmbeddingsGenerator(length=to_seq_length,
                                        depth=size_per_head,
                                        max_relative_position=16,
                                        initializer_range=initializer_range,
                                        use_one_hot_embeddings=use_one_hot_embeddings)
Beispiel #27
0
def hard_swish(x):
    x = P.Cast()(x, ms.float32)
    y = x + 3.0
    y = clip_by_value(y, 0.0, 6.0)
    y = y / 6.0
    return x * y
Beispiel #28
0
 def __init__(self, config):
     super(CreateAttentionMaskFromInputMask, self).__init__()
     self.input_mask = None
     self.cast = P.Cast()
     self.reshape = P.Reshape()
     self.shape = (-1, 1, config.seq_length)
Beispiel #29
0
 def __init__(self, type0, type1):
     super(Net, self).__init__()
     self.Cast = P.Cast()
     self.type0 = type0
     self.type1 = type1
Beispiel #30
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 pad_mode='same',
                 padding=0,
                 dilation=1,
                 group=1,
                 data_format='NCHW',
                 has_bias=False,
                 weight_init='normal',
                 damping=0.03,
                 loss_scale=1,
                 frequency=278,
                 batch_size=32,
                 bias_init='zeros'):
        self.thor = True
        ksizes = (1, kernel_size, kernel_size, 1)
        self.hw = kernel_size * kernel_size
        strides = (1, stride, stride, 1)
        kernel_size = twice(kernel_size)
        super(Conv2d_Thor, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            pad_mode,
            padding,
            dilation,
            group,
            data_format,
            has_bias,
            weight_init,
            bias_init,
        )
        self.conv2d = P.Conv2D(out_channel=self.out_channels,
                               kernel_size=self.kernel_size,
                               mode=1,
                               pad_mode=self.pad_mode,
                               pad=self.padding,
                               stride=self.stride,
                               dilation=self.dilation,
                               group=self.group
                               )
        self.batch_size = batch_size
        self.img2col = P.CusImg2Col(ksizes=ksizes, strides=strides)
        self.cube_matmul = P.CusMatMulCube(transpose_a=True)
        self.matrix_combine = P.CusMatrixCombine()
        self.cholesky = P.CusCholeskyTrsm()
        self.transpose02314 = P.CusTranspose02314()
        self.matrix_A_dim = self.in_channels * self.kernel_size[0] * self.kernel_size[1]
        self.matrix_G_dim = self.out_channels
        self.matrix_A_device_shape, self.matrix_A_device_dim = caculate_device_shape(self.matrix_A_dim,
                                                                                     self.in_channels, True)
        self.matrix_G_device_shape, self.matrix_G_device_dim = caculate_device_shape(self.matrix_G_dim,
                                                                                     self.in_channels, False)
        self.matrix_A_device_temp_shape = (
            self.matrix_A_device_shape[0], self.matrix_A_device_shape[2], self.matrix_A_device_shape[1],
            self.matrix_A_device_shape[3])
        self.matrix_G_device_temp_shape = (
            self.matrix_G_device_shape[0], self.matrix_G_device_shape[2], self.matrix_G_device_shape[1],
            self.matrix_G_device_shape[3])
        self.matrix_A_inv = Parameter(
            Tensor(np.reshape(np.identity(self.matrix_A_device_dim).astype(np.float16), self.matrix_A_device_shape)),
            name='matrix_A_inv', requires_grad=False)
        self.A_inv_max = Parameter(initializer(0, [1], mstype.float32), name="A_inv_max", requires_grad=False)
        self.matrix_G_inv = Parameter(
            Tensor(np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape)),
            name="matrix_G_inv", requires_grad=False)

        self.G_inv_max = Parameter(initializer(0, [1], mstype.float32), name="G_inv_max", requires_grad=False)
        self.fake_G = Tensor(
            np.reshape(np.identity(self.matrix_G_device_dim).astype(np.float16), self.matrix_G_device_shape))

        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cov_step = Parameter(initializer(0, [1], mstype.int32), name="cov_step", requires_grad=False)
        self.mul = P.Mul()
        self.cast = P.Cast()
        self.damping = Tensor(damping)
        self.vector_matmul = P.CusBatchMatMul()
        self.diag_block_dim = 128
        self.channels_slice_flag = False
        if self.in_channels % C0 != 0:
            self.channels_slice_flag = True

        self.padA_flag = False
        if (self.matrix_A_dim // self.diag_block_dim) * self.diag_block_dim != self.matrix_A_dim \
            and self.matrix_A_dim > self.diag_block_dim:
            self.padA_flag = True
            pad_dim = self.diag_block_dim - self.matrix_A_dim % self.diag_block_dim
            self.padA = P.Pad(((0, pad_dim), (0, pad_dim)))
        self.device_shape_pad_flag = False
        if self.matrix_A_dim != self.matrix_A_device_dim:
            self.device_shape_pad_flag = True
            self.device_shape_pad = P.Pad(((0, 0), (0, C0 - self.in_channels), (0, 0), (0, C0 - self.in_channels)))
        self.slice = P.Slice()
        self.gather = P.GatherV2()
        self.freq = Tensor(frequency, mstype.int32)
        self.loss_scale = Tensor(1 / loss_scale, mstype.float16)
        self.axis = 0

        dampingA_dim = self.matrix_A_dim
        if (self.matrix_A_dim % self.diag_block_dim) != 0 and self.matrix_A_dim > self.diag_block_dim:
            dampingA_dim = (self.matrix_A_dim // self.diag_block_dim + 1) * self.diag_block_dim
        dampingG_dim = self.matrix_G_dim
        if (self.matrix_G_dim % self.diag_block_dim) != 0 and self.matrix_G_dim > self.diag_block_dim:
            dampingG_dim = (self.matrix_G_dim // self.diag_block_dim + 1) * self.diag_block_dim

        self.dampingA = Tensor(np.identity(dampingA_dim), mstype.float32)
        self.dampingG = Tensor(np.identity(dampingG_dim), mstype.float32)
        self.fused_abs_max1 = P.CusFusedAbsMax1([self.matrix_A_dim, self.matrix_A_dim])
        self.fused_abs_max2 = P.CusFusedAbsMax1()
        self.log = P.Log()
        self.exp = P.Exp()
        self.sqrt = P.Sqrt()
        self.getG = P.InsertGradientOf(self.save_gradient)