def __init__( self, ninteractions, n_atom_basis, n_hidden=0, activation=Swish(), ): super().__init__() self.n_atom_basis = n_atom_basis self.ninteractions = ninteractions sub_dim = n_atom_basis // ninteractions last_dim = n_atom_basis - (sub_dim * (ninteractions - 1)) sub_dims = [sub_dim for _ in range(ninteractions - 1)] sub_dims.append(last_dim) if n_hidden > 0: hidden_layers = [n_atom_basis for _ in range(n_hidden)] self.mcr = nn.CellList([ MLP(n_atom_basis, sub_dims[i], hidden_layers, activation=activation) for i in range(ninteractions) ]) else: self.mcr = nn.CellList([ Dense(n_atom_basis, sub_dims[i], activation=activation) for i in range(ninteractions) ]) self.concat = P.Concat(-1) self.reduce_sum = P.ReduceSum()
def __init__(self, startp, channels, last_level): super(DLAUp, self).__init__() self.startp = startp self.channels = channels self.last_level = last_level self.num_levels = len(self.channels) if self.last_level > self.startp + len( self.channels) or self.last_level < self.startp: raise ValueError("Invalid last level value.") # first ida up layers idaup_fns = [] for i in range(1, len(channels), 1): ida_up = IDAUp(channels[i], channels[i - 1]) idaup_fns.append(ida_up) self.idaup_fns = nn.CellList(idaup_fns) # final ida up if self.last_level == self.startp: self.final_up = False else: self.final_up = True final_fn = [] for i in range(1, self.last_level - self.startp): ida = IDAUp(channels[i], channels[0], up_f=2**i) final_fn.append(ida) self.final_idaup_fns = nn.CellList(final_fn)
def __init__(self, img_size_min, num_scale, scale_factor=4 / 3): super(Generator, self).__init__() self.img_size_min = img_size_min self.scale_factor = scale_factor self.num_scale = num_scale self.nf = 32 self.current_scale = 0 self.size_list = [ int(self.img_size_min * scale_factor**i) for i in range(num_scale + 1) ] print(self.size_list) self.sub_generators = nn.CellList() first_generator = nn.CellList() first_generator.append( nn.SequentialCell(nn.Conv2d(3, self.nf, 3, 1), nn.BatchNorm2d(self.nf), nn.LeakyReLU(2e-1))) for _ in range(3): first_generator.append( nn.SequentialCell(nn.Conv2d(self.nf, self.nf, 3, 1), nn.BatchNorm2d(self.nf), nn.LeakyReLU(2e-1))) first_generator.append( nn.SequentialCell(nn.Conv2d(self.nf, 3, 3, 1), nn.Tanh())) first_generator = nn.SequentialCell(*first_generator) self.sub_generators.append(first_generator)
def __init__(self, net_config): super(GatherDetectionFeatureCell, self).__init__() self.heads = net_config.heads self.nstack = net_config.num_stacks self.n = net_config.n self.cnv_dim = net_config.cnv_dim self.dims = net_config.dims self.modules = net_config.modules curr_dim = self.dims[0] self.pre = nn.SequentialCell( Convolution(3, 128, 7, stride=2), Residual(128, 256, 3, stride=2) ) self.kps = nn.CellList([ Kp_module( self.n, self.dims, self.modules ) for _ in range(self.nstack) ]) self.cnvs = nn.CellList([ Convolution(curr_dim, self.cnv_dim, 3) for _ in range(self.nstack) ]) self.inters = nn.CellList([ Residual(curr_dim, curr_dim, 3) for _ in range(self.nstack - 1) ]) self.inters_ = nn.CellList([ nn.SequentialCell( nn.Conv2d(curr_dim, curr_dim, kernel_size=1, has_bias=False), nn.BatchNorm2d(curr_dim, momentum=BN_MOMENTUM) ) for _ in range(self.nstack - 1) ]) self.cnvs_ = nn.CellList([ nn.SequentialCell( nn.Conv2d(self.cnv_dim, curr_dim, kernel_size=1, has_bias=False), nn.BatchNorm2d(curr_dim, momentum=BN_MOMENTUM) ) for _ in range(self.nstack - 1) ]) self.relu = nn.ReLU() self.hm_fn = _generate_feature(cin=self.cnv_dim, cout=curr_dim, kernel_size=3, head_name='hm', head=self.heads['hm'], num_stacks=self.nstack, with_bn=False) self.wh_fn = _generate_feature(cin=self.cnv_dim, cout=curr_dim, kernel_size=3, head_name='wh', head=self.heads['wh'], num_stacks=self.nstack, with_bn=False) self.reg_fn = _generate_feature(cin=self.cnv_dim, cout=curr_dim, kernel_size=3, head_name='reg', head=self.heads['reg'], num_stacks=self.nstack, with_bn=False)
def __init__(self, config, loc_cls_shared_addition=False): super(WeightSharedMultiBox, self).__init__() num_classes = config.num_classes out_channels = config.extras_out_channels[0] num_default = config.num_default[0] num_features = len(config.feature_size) num_addition_layers = config.num_addition_layers self.loc_cls_shared_addition = loc_cls_shared_addition if not loc_cls_shared_addition: loc_convs = [ _conv2d(out_channels, out_channels, 3, 1) for x in range(num_addition_layers) ] cls_convs = [ _conv2d(out_channels, out_channels, 3, 1) for x in range(num_addition_layers) ] addition_loc_layer_list = [] addition_cls_layer_list = [] for _ in range(num_features): addition_loc_layer = [ ConvBNReLU(out_channels, out_channels, 3, 1, 1, loc_convs[x]) for x in range(num_addition_layers) ] addition_cls_layer = [ ConvBNReLU(out_channels, out_channels, 3, 1, 1, cls_convs[x]) for x in range(num_addition_layers) ] addition_loc_layer_list.append(nn.SequentialCell(addition_loc_layer)) addition_cls_layer_list.append(nn.SequentialCell(addition_cls_layer)) self.addition_layer_loc = nn.CellList(addition_loc_layer_list) self.addition_layer_cls = nn.CellList(addition_cls_layer_list) else: convs = [ _conv2d(out_channels, out_channels, 3, 1) for x in range(num_addition_layers) ] addition_layer_list = [] for _ in range(num_features): addition_layers = [ ConvBNReLU(out_channels, out_channels, 3, 1, 1, convs[x]) for x in range(num_addition_layers) ] addition_layer_list.append(nn.SequentialCell(addition_layers)) self.addition_layer = nn.SequentialCell(addition_layer_list) loc_layers = [_conv2d(out_channels, 4 * num_default, kernel_size=3, stride=1, pad_mod='same')] cls_layers = [_conv2d(out_channels, num_classes * num_default, kernel_size=3, stride=1, pad_mod='same')] self.loc_layers = nn.SequentialCell(loc_layers) self.cls_layers = nn.SequentialCell(cls_layers) self.flatten_concat = FlattenConcat(config)
def __init__(self): super(Discriminator, self).__init__() self.nf = 32 self.current_scale = 0 self.sub_discriminators = nn.CellList() first_discriminator = nn.CellList() first_discriminator.append( nn.SequentialCell([ nn.Conv2d(3, self.nf, 3, 1, pad_mode='pad', padding=1, has_bias=True), nn.LeakyReLU(2e-1) ])) for _ in range(3): first_discriminator.append( nn.SequentialCell([ nn.Conv2d(self.nf, self.nf, 3, 1, pad_mode='pad', padding=1, has_bias=True), nn.BatchNorm2d(self.nf), nn.LeakyReLU(2e-1) ])) first_discriminator.append( nn.SequentialCell([ nn.Conv2d(self.nf, 1, 3, 1, pad_mode='pad', padding=1, has_bias=True) ])) first_discriminator = nn.SequentialCell(*first_discriminator) self.sub_discriminators.append(first_discriminator)
def __init__(self, levels, channels, kernel_size, stride): super(BottomUp, self).__init__() self.levels = levels bottom_up_cells = [ conv_bn_relu(channels, channels, kernel_size, stride, False) for x in range(self.levels) ] self.blocks = nn.CellList(bottom_up_cells)
def __init__(self, in_channels, out_channels, kernel_size, stride, pad_mode="pad", pad=0, groups=1, has_bias=False): super(GroupConv, self).__init__() assert in_channels % groups == 0 and out_channels % groups == 0 self.groups = groups self.convs = nn.CellList() self.op_split = P.Split(axis=1, output_num=self.groups) self.op_concat = P.Concat(axis=1) self.cast = P.Cast() for _ in range(groups): self.convs.append( nn.Conv2d(in_channels // groups, out_channels // groups, kernel_size=kernel_size, stride=stride, has_bias=has_bias, padding=pad, pad_mode=pad_mode, group=1, weight_init='xavier_uniform'))
def __init__(self, upsample_scales, mode="nearest", freq_axis_kernel_size=1, cin_pad=0, cin_channels=80): super(UpsampleNetwork, self).__init__() self.expand_op = P.ExpandDims() self.squeeze_op = P.Squeeze(1) up_layers = [] total_scale = np.prod(upsample_scales) self.indent = cin_pad * total_scale for scale in upsample_scales: freq_axis_padding = (freq_axis_kernel_size - 1) // 2 k_size = (freq_axis_kernel_size, scale * 2 + 1) # padding = (freq_axis_padding, scale) padding = (freq_axis_padding, freq_axis_padding, scale, scale) stretch = Resize(scale, 1, mode) conv = nn.Conv2d(1, 1, kernel_size=k_size, has_bias=False, pad_mode='pad', padding=padding) up_layers.append(stretch) up_layers.append(conv) # if upsample_activation != "none": # nonlinear = _get_activation(upsample_activation) # up_layers.append(nonlinear(**upsample_activation_params)) self.up_layers = nn.CellList(up_layers)
def __init__(self, block, inplanes, planes, blocks, args, stride=1): super(MakeLayer, self).__init__() self.inplanes = inplanes self.downsample = None if stride != 1 or self.inplanes != planes * block.expansion: self.downsample = DownSample(self.inplanes, planes, block.expansion, stride, use_inference=args.inference) self.layers = [] self.layers.append( block(self.inplanes, planes, stride, self.downsample, use_se=args.use_se, pre_bn=args.pre_bn, use_inference=args.inference, act_type=args.act_type)) self.inplanes = planes for _ in range(1, blocks): self.layers.append( block(self.inplanes, planes, use_se=args.use_se, pre_bn=args.pre_bn, use_inference=args.inference, act_type=args.act_type)) self.layers = nn.CellList(self.layers)
def progress(self): self.current_scale += 1 if self.current_scale % 4 == 0: self.nf *= 2 tmp_generator = nn.CellList() tmp_generator.append( nn.SequentialCell(nn.Conv2d(3, self.nf, 3, 1), nn.BatchNorm2d(self.nf), nn.LeakyReLU(2e-1))) for _ in range(3): tmp_generator.append( nn.SequentialCell(nn.Conv2d(self.nf, self.nf, 3, 1), nn.BatchNorm2d(self.nf), nn.LeakyReLU(2e-1))) tmp_generator.append( nn.SequentialCell(nn.Conv2d(self.nf, 3, 3, 1), nn.Tanh())) tmp_generator = nn.SequentialCell(*tmp_generator) if self.current_scale % 4 != 0: prev_generator = self.sub_generators[-1] # Initialize layers via copy if self.current_scale >= 1: tmp_generator = mindspore.load_param_into_net( tmp_generator, prev_generator.parameters_dict) # 以python的字典格式加载存储 self.sub_generators.append(tmp_generator) print("GENERATOR PROGRESSION DONE")
def progress(self): self.current_scale += 1 # Lower scale discriminators are not used in later ... replace append to assign? if self.current_scale % 4 == 0: self.nf *= 2 tmp_discriminator = nn.CellList()#tensor的list tmp_discriminator.append(nn.SequentialCell(nn.Conv2d(3, self.nf, 3, 1, padding=1, has_bias=true, pad_mode='pad'), nn.LeakyReLU(2e-1))) for _ in range(3): tmp_discriminator.append(nn.SequentialCell(nn.Conv2d(self.nf, self.nf, 3, 1, padding=1, has_bias=true, pad_mode='pad'), nn.BatchNorm2d(self.nf), nn.LeakyReLU(2e-1))) tmp_discriminator.append(nn.SequentialCell(nn.Conv2d(self.nf, 1, 3, 1, padding=1, has_bias=true, pad_mode='pad'))) tmp_discriminator = nn.SequentialCell(*tmp_discriminator) if self.current_scale % 4 != 0: prev_discriminator = self.sub_discriminators[-1] # Initialize layers via copy if self.current_scale >= 1: tmp_discriminator=mindspore.load_param_into_net(tmp_discriminator, prev_discriminator.parameters_dict)#以python的字典格式加载存储 self.sub_discriminators.append(tmp_discriminator) print("DISCRIMINATOR PROGRESSION DONE")
def __init__(self, attn_embed_dim, encoder_layers, num_attn_heads=12, intermediate_size=3072, attention_dropout_prob=0.1, initializer_range=0.02, hidden_dropout_prob=0.1, hidden_act="relu", compute_type=mstype.float32): super(TransformerEncoder, self).__init__() self.num_layers = encoder_layers layers = [] for _ in range(encoder_layers): layer = EncoderCell(attn_embed_dim=attn_embed_dim, num_attn_heads=num_attn_heads, intermediate_size=intermediate_size, attention_dropout_prob=attention_dropout_prob, initializer_range=initializer_range, hidden_dropout_prob=hidden_dropout_prob, hidden_act=hidden_act, compute_type=compute_type) layers.append(layer) self.layers = nn.CellList(layers) self.layer_norm = LayerNorm(in_channels=attn_embed_dim)
def __init__(self, batch_size=512, d_model=768, seq_length=1024, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, has_attention_mask=True, attention_dropout=0.1, hidden_dropout=0.1, compute_type=mstype.float32): super(GPT2Transformer, self).__init__() layers = [] for _ in range(num_hidden_layers): layer = DecoderBlock(batch_size=batch_size, seq_length=seq_length, d_model=d_model, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, attention_dropout=attention_dropout, hidden_dropout=hidden_dropout, has_attention_mask=has_attention_mask, compute_type=compute_type) layers.append(layer) self.layers = nn.CellList(layers) self.reshape = P.Reshape() self.new_shape = (-1, d_model) self.out_shape = (batch_size, seq_length, d_model)
def __init__(self, strategy1, strategy2, param=None): super().__init__() self.block = nn.CellList() for i in range(2): cell = MatMulCell(strategy1, strategy2, param) cell.stage = i self.block.append(cell)
def __init__(self, in_channels, out_channels, stride=1, down_sample=None, base_width=64, groups=1, use_se=False, **kwargs): super(Bottleneck, self).__init__() width = int(out_channels * (base_width / 64.0)) * groups self.groups = groups self.conv1 = conv1x1(in_channels, width, stride=1) self.bn1 = nn.BatchNorm2d(width) self.relu = P.ReLU() self.conv3x3s = nn.CellList() self.conv2 = GroupConv(width, width, 3, stride, pad=1, groups=groups) self.op_split = Split(axis=1, output_num=self.groups) self.op_concat = Concat(axis=1) self.bn2 = nn.BatchNorm2d(width) self.conv3 = conv1x1(width, out_channels * self.expansion, stride=1) self.bn3 = nn.BatchNorm2d(out_channels * self.expansion) self.use_se = use_se if self.use_se: self.se = SEBlock(out_channels * self.expansion) self.down_sample_flag = False if down_sample is not None: self.down_sample = down_sample self.down_sample_flag = True self.cast = P.Cast() self.add = TensorAdd()
def __init__(self, hidden_size, seq_length, num_hidden_layers, num_attention_heads=12, intermediate_size=3072, attention_probs_dropout_prob=0.1, use_one_hot_embeddings=False, initializer_range=0.02, hidden_dropout_prob=0.1, use_relative_positions=False, hidden_act="gelu", compute_type=mstype.float32, return_all_encoders=False): super(BertTransformer, self).__init__() self.return_all_encoders = return_all_encoders layers = [] for _ in range(num_hidden_layers): layer = BertEncoderCell( hidden_size=hidden_size, seq_length=seq_length, num_attention_heads=num_attention_heads, intermediate_size=intermediate_size, attention_probs_dropout_prob=attention_probs_dropout_prob, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=initializer_range, hidden_dropout_prob=hidden_dropout_prob, use_relative_positions=use_relative_positions, hidden_act=hidden_act, compute_type=compute_type) layers.append(layer) self.layers = nn.CellList(layers) self.reshape = P.Reshape() self.shape = (-1, hidden_size) self.out_shape = (-1, seq_length, hidden_size)
def __init__(self, config: GNMTConfig, is_training: bool, use_one_hot_embeddings: bool = False, initializer_range=0.1, infer_beam_width=1, compute_type=mstype.float16): super(GNMTDecoder, self).__init__() self.is_training = is_training self.attn_embed_dim = config.hidden_size self.num_layers = config.num_hidden_layers self.hidden_dropout_prob = config.hidden_dropout_prob self.vocab_size = config.vocab_size self.seq_length = config.max_decode_length # batchsize* beam_width for beam_search. self.batch_size = config.batch_size * infer_beam_width self.word_embed_dim = config.hidden_size self.transpose = P.Transpose() self.transpose_orders = (1, 0, 2) self.reshape = P.Reshape() self.concat = P.Concat(axis=-1) self.state_concat = P.Concat(axis=0) self.all_decoder_state = Tensor(np.zeros([self.num_layers, 2, self.batch_size, config.hidden_size]), mstype.float32) decoder_layers = [] for i in range(0, self.num_layers): if i == 0: # the inputs is [T,D,N] scaler = 1 else: # the inputs is [T,D,2N] scaler = 2 layer = DynamicRNNNet(seq_length=self.seq_length, batchsize=self.batch_size, word_embed_dim=scaler * self.word_embed_dim, hidden_size=self.word_embed_dim) decoder_layers.append(layer) self.decoder_layers = nn.CellList(decoder_layers) self.att_rnn = RecurrentAttention(rnn=self.decoder_layers[0], is_training=is_training, input_size=self.word_embed_dim, context_size=self.attn_embed_dim, hidden_size=self.attn_embed_dim, num_layers=1, dropout=config.attention_dropout_prob) self.dropout = nn.Dropout(keep_prob=1.0 - config.hidden_dropout_prob) self.classifier = nn.Dense(config.hidden_size, config.vocab_size, has_bias=True, weight_init=Uniform(initializer_range), bias_init=Uniform(initializer_range)).to_float(compute_type) self.cast = P.Cast() self.shape_op = P.Shape() self.expand = P.ExpandDims()
def __init__(self): super(Net, self).__init__() self.mul = P.Mul() self.relu = P.ReLU() self.wd = Parameter(Tensor(np.ones([8, 8, 8, 8]).astype(np.float32)), name="wide") self.wt = Parameter(Tensor(np.ones([8, 8, 8, 8]).astype(np.float32)), name="l") self.layers = nn.CellList() for i in range(3): self.layers.append(SubNet(i))
def __init__(self): super(Net, self).__init__() self.exp = P.Exp() self.mean = P.ReduceMean() layers = [] for _ in range(3): layer = LoopLayer() layers.append(layer) self.layers = nn.CellList(layers)
def __init__(self, mul_weight, num_layers, strategy1=None, strategy2=None): super().__init__() self.mul = P.Mul().shard(strategy1) self.neg = P.Neg().shard(strategy2) self.mul_weight = Parameter(mul_weight, "w1") self.num_layers = num_layers self.layers = nn.CellList() for i in range(num_layers): self.layers.append(SubNet(i))
def __init__(self, args, conv=default_conv): super(IPT, self).__init__() self.scale_idx = 0 self.args = args n_feats = args.n_feats kernel_size = 3 act = nn.ReLU() self.sub_mean = MeanShift(args.rgb_range) self.add_mean = MeanShift(args.rgb_range, sign=1) self.head = nn.CellList([ nn.SequentialCell(conv(args.n_colors, n_feats, kernel_size), ResBlock(conv, n_feats, 5, act=act), ResBlock(conv, n_feats, 5, act=act)) for _ in args.scale ]) self.body = VisionTransformer( img_dim=args.patch_size, patch_dim=args.patch_dim, num_channels=n_feats, embedding_dim=n_feats * args.patch_dim * args.patch_dim, num_heads=args.num_heads, num_layers=args.num_layers, hidden_dim=n_feats * args.patch_dim * args.patch_dim * 4, num_queries=args.num_queries, dropout_rate=args.dropout_rate, mlp=args.no_mlp, pos_every=args.pos_every, no_pos=args.no_pos) self.tail = nn.CellList([ nn.SequentialCell(Upsampler(conv, s, n_feats, act=False), conv(n_feats, args.n_colors, kernel_size)) for s in args.scale ]) self.reshape = P.Reshape() self.tile = P.Tile() self.transpose = P.Transpose()
def _generate_feature(cin, cout, kernel_size, head_name, head, num_stacks, with_bn=True): """ Generate feature extraction function of each target head """ module = None if 'hm' in head_name: module = nn.CellList([ nn.SequentialCell( Convolution(cin, cout, kernel_size, with_bn=with_bn), nn.Conv2d(cout, head, kernel_size=1, has_bias=True, bias_init=Constant(-2.19), pad_mode='pad') ) for _ in range(num_stacks) ]) else: module = nn.CellList([ nn.SequentialCell( Convolution(cin, cout, kernel_size, with_bn=with_bn), nn.Conv2d(cout, head, kernel_size=1, has_bias=True, pad_mode='pad') ) for _ in range(num_stacks) ]) return module
def __init__(self, layer): super(MakeYoloLayer, self).__init__() self.layers = [] for x in layer: if len(x) == 4: self.layers.append(Conv2dBatchReLU(x[0], x[1], x[2], x[3])) else: self.layers.append(Conv2dBatch(x[0], x[1], x[2], x[3], x[4])) self.layers = nn.CellList(self.layers)
def __init__(self, block, inplanes, planes, blocks, stride=2): super(MakeLayer, self).__init__() self.conv = conv3x3(inplanes, planes, stride=stride, padding=1, bias=True) self.bn = bn_with_initialize(planes) self.relu = P.ReLU() self.layers = [] for _ in range(0, blocks): self.layers.append(block(planes)) self.layers = nn.CellList(self.layers)
def __init__(self, 词库总数, 向量维度, 层数, 头数, 丢弃率,辞数,最大长度=1024): super(多层解码, self).__init__() self.N = 层数 self.embed = 词向量印刻(词库总数, 向量维度) self.embedP = 词向量印刻(最大长度, 向量维度) self.decoders = nn.CellList([解码层(向量维度, 头数, 丢弃率) for i in range(层数)]) self.norm = nn.LayerNorm((向量维度,), epsilon=1e-6) a = [i for i in range(辞数)] b = np.array(a).reshape(1, 辞数) self.po = Tensor(b, mindspore.int32) self.dropout = nn.Dropout(1 - 丢弃率)
def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate): super(_DenseBlock, self).__init__() self.cell_list = nn.CellList() for i in range(num_layers): layer = _DenseLayer(num_input_features + i * growth_rate, growth_rate=growth_rate, bn_size=bn_size, drop_rate=drop_rate) self.cell_list.append(layer) self.concate = P.Concat(axis=1)
def __init__(self, args, conv=default_conv): super(IPT, self).__init__() self.dytpe = mstype.float16 self.scale_idx = 0 self.args = args self.con_loss = args.con_loss n_feats = args.n_feats kernel_size = 3 act = nn.ReLU() self.head = nn.CellList([ nn.SequentialCell(conv(args.n_colors, n_feats, kernel_size).to_float(self.dytpe), ResBlock(conv, n_feats, 5, act=act).to_float(self.dytpe), ResBlock(conv, n_feats, 5, act=act).to_float(self.dytpe)) for _ in range(6)]) self.body = VisionTransformer(img_dim=args.patch_size, patch_dim=args.patch_dim, num_channels=n_feats, embedding_dim=n_feats * args.patch_dim * args.patch_dim, num_heads=args.num_heads, num_layers=args.num_layers, hidden_dim=n_feats * args.patch_dim * args.patch_dim * 4, num_queries=args.num_queries, dropout_rate=args.dropout_rate, mlp=args.no_mlp, pos_every=args.pos_every, no_pos=args.no_pos, con_loss=args.con_loss).to_float(self.dytpe) self.tail = nn.CellList([ nn.SequentialCell(Upsampler(conv, s, n_feats).to_float(self.dytpe), conv(n_feats, args.n_colors, kernel_size).to_float(self.dytpe)) \ for s in [2, 3, 4, 1, 1, 1]]) self.reshape = P.Reshape() self.tile = P.Tile() self.transpose = P.Transpose() self.s2t = P.ScalarToTensor() self.cast = P.Cast()
def __init__(self, config): super(GPT_Model, self).__init__() self.get_attention_mask = AttentionMask(config) self.word_embedding = EmbeddingLookup(config) self.position_embedding = nn.Embedding(config.seq_length, config.embedding_size, embedding_table=TruncatedNormal(0.02)) self.blocks = nn.CellList() for i in range(config.num_layers): self.blocks.append(Block(config, i+1)) self.layernorm = LayerNorm((config.embedding_size,)).to_float(config.compute_dtype) self.use_past = config.use_past self.past = tuple([None]*config.num_layers) self.num_layers = config.num_layers
def __init__(self, in_channels, out_channels, stride=1, down_sample=None, base_width=64, groups=1, use_se=False, platform="Ascend", **kwargs): super(Bottleneck, self).__init__() width = int(out_channels * (base_width / 64.0)) * groups self.groups = groups self.conv1 = conv1x1(in_channels, width, stride=1) self.bn1 = nn.BatchNorm2d(width) self.relu = P.ReLU() self.conv3x3s = nn.CellList() if platform == "GPU": self.conv2 = nn.Conv2d(width, width, 3, stride, pad_mode='pad', padding=1, group=groups) else: self.conv2 = GroupConv(width, width, 3, stride, pad=1, groups=groups) self.bn2 = nn.BatchNorm2d(width) self.conv3 = conv1x1(width, out_channels * self.expansion, stride=1) self.bn3 = nn.BatchNorm2d(out_channels * self.expansion) self.use_se = use_se if self.use_se: self.se = SEBlock(out_channels * self.expansion) self.down_sample_flag = False if down_sample is not None: self.down_sample = down_sample self.down_sample_flag = True self.cast = P.Cast() self.add = TensorAdd()