def __init__(self, num_users, num_items, num_factors, model_layers, mf_regularization, mlp_reg_layers, mf_dim): super(NCFModel, self).__init__() self.data_path = "" self.model_path = "" self.num_users = num_users self.num_items = num_items self.num_factors = num_factors self.model_layers = model_layers self.mf_regularization = mf_regularization self.mlp_reg_layers = mlp_reg_layers self.mf_dim = mf_dim self.num_layers = len(self.model_layers) # Number of layers in the MLP if self.model_layers[0] % 2 != 0: raise ValueError("The first layer size should be multiple of 2!") # Initializer for embedding layers self.embedding_initializer = "normal" self.embedding_user = nn.Embedding( self.num_users, self.num_factors + self.model_layers[0] // 2, embedding_table=self.embedding_initializer ) self.embedding_item = nn.Embedding( self.num_items, self.num_factors + self.model_layers[0] // 2, embedding_table=self.embedding_initializer ) self.mlp_dense1 = DenseLayer(in_channels=self.model_layers[0], out_channels=self.model_layers[1], activation="relu") self.mlp_dense2 = DenseLayer(in_channels=self.model_layers[1], out_channels=self.model_layers[2], activation="relu") # Logit dense layer self.logits_dense = DenseLayer(in_channels=self.model_layers[1], out_channels=1, weight_init="normal", activation=None) # ops definition self.mul = P.Mul() self.squeeze = P.Squeeze(axis=1) self.concat = P.Concat(axis=1)
def __init__(self, args, embedding_table=None): super(NewsEncoder, self).__init__() # categories self.category_embedding = nn.Embedding(args.n_categories, args.category_embedding_dim) self.category_dense = nn.Dense(args.category_embedding_dim, args.n_filters, has_bias=True, activation="relu") self.sub_category_embedding = nn.Embedding(args.n_sub_categories, args.category_embedding_dim) self.subcategory_dense = nn.Dense(args.category_embedding_dim, args.n_filters, has_bias=True, activation="relu") # title and abstract if embedding_table is None: word_embedding = [ nn.Embedding(args.n_words, args.word_embedding_dim) ] else: word_embedding = [ nn.Embedding(args.n_words, args.word_embedding_dim, embedding_table=embedding_table) ] title_CNN = [ nn.Conv1d(args.word_embedding_dim, args.n_filters, kernel_size=args.window_size, pad_mode='same', has_bias=True), nn.ReLU() ] abstract_CNN = [ nn.Conv1d(args.word_embedding_dim, args.n_filters, kernel_size=args.window_size, pad_mode='same', has_bias=True), nn.ReLU() ] if args.phase == "train": word_embedding.append( nn.Dropout(keep_prob=(1 - args.dropout_ratio))) title_CNN.append(nn.Dropout(keep_prob=(1 - args.dropout_ratio))) abstract_CNN.append(nn.Dropout(keep_prob=(1 - args.dropout_ratio))) self.word_embedding = nn.SequentialCell(word_embedding) self.title_CNN = nn.SequentialCell(title_CNN) self.abstract_CNN = nn.SequentialCell(abstract_CNN) self.title_attention = Attention(args.query_vector_dim, args.n_filters) self.abstract_attention = Attention(args.query_vector_dim, args.n_filters) self.total_attention = Attention(args.query_vector_dim, args.n_filters) self.pack = ops.Stack(axis=1) self.title_shape = (-1, args.n_words_title) self.abstract_shape = (-1, args.n_words_abstract)
def __init__(self, hidden_size, output_size, max_length, dropout_p=0.1): super(AttnDecoderRNN, self).__init__() self.hidden_size = hidden_size self.output_size = output_size self.dropout_p = dropout_p self.max_length = max_length self.embedding = nn.Embedding(self.output_size, self.hidden_size) self.attn = nn.Dense(in_channels=self.hidden_size * 2, out_channels=self.max_length).to_float( mstype.float16) self.attn_combine = nn.Dense(in_channels=self.hidden_size * 2, out_channels=self.hidden_size).to_float( mstype.float16) self.dropout = nn.Dropout(keep_prob=1.0 - self.dropout_p) self.gru = GRU(hidden_size, hidden_size).to_float(mstype.float16) self.out = nn.Dense(in_channels=self.hidden_size, out_channels=self.output_size).to_float( mstype.float16) self.transpose = P.Transpose() self.concat = P.Concat(axis=2) self.concat1 = P.Concat(axis=1) self.softmax = P.Softmax(axis=1) self.relu = P.ReLU() self.log_softmax = P.LogSoftmax(axis=1) self.bmm = P.BatchMatMul() self.unsqueeze = P.ExpandDims() self.squeeze = P.Squeeze(1) self.squeeze1 = P.Squeeze(0) self.cast = P.Cast()
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, bidirectional, weight, labels, batch_size): super(SentimentNet, self).__init__() self.num_hiddens = num_hiddens self.num_layers = num_layers self.bidirectional = bidirectional self.batch_size = batch_size self.embedding = nn.Embedding(vocab_size, embed_size, use_one_hot=False, embedding_table=Tensor(weight)) self.embedding.embedding_table.requires_grad = False self.trans = P.Transpose() self.perm = (1, 0, 2) self.h, self.c, self.w = InitialLstmWeight(embed_size, num_hiddens, num_layers, bidirectional) self.encoder = P.LSTM(input_size=embed_size, hidden_size=self.num_hiddens, num_layers=num_layers, has_bias=False, bidirectional=self.bidirectional, dropout=0.0) self.concat = P.Concat(2) if self.bidirectional: self.decoder = nn.Dense(num_hiddens * 4, labels) else: self.decoder = nn.Dense(num_hiddens * 2, labels) self.slice1 = P.Slice() self.slice2 = P.Slice() self.reshape = P.Reshape() self.num_direction = 1 if bidirectional: self.num_direction = 2
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, bidirectional, num_classes, weight, batch_size): super(SentimentNet, self).__init__() # Mapp words to vectors self.embedding = nn.Embedding(vocab_size, embed_size, embedding_table=weight) self.embedding.embedding_table.requires_grad = False self.trans = P.Transpose() self.perm = (1, 0, 2) self.encoder = nn.LSTM(input_size=embed_size, hidden_size=num_hiddens, num_layers=num_layers, has_bias=True, bidirectional=bidirectional, dropout=0.0) w_init = init_lstm_weight(embed_size, num_hiddens, num_layers, bidirectional) self.encoder.weight = w_init self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) self.concat = P.Concat(1) if bidirectional: self.decoder = nn.Dense(num_hiddens * 4, num_classes) else: self.decoder = nn.Dense(num_hiddens * 2, num_classes)
def __init__(self, config, is_training, use_one_hot_embeddings=False): super(BertModel, self).__init__() config = copy.deepcopy(config) if not is_training: config.hidden_dropout_prob = 0.0 config.attention_probs_dropout_prob = 0.0 self.seq_length = config.seq_length self.hidden_size = config.hidden_size self.num_hidden_layers = config.num_hidden_layers self.embedding_size = config.hidden_size self.token_type_ids = None self.last_idx = self.num_hidden_layers - 1 output_embedding_shape = [-1, self.seq_length, self.embedding_size] self.bert_embedding_lookup = nn.Embedding( vocab_size=config.vocab_size, embedding_size=self.embedding_size, use_one_hot=use_one_hot_embeddings) self.bert_embedding_postprocessor = EmbeddingPostprocessor( embedding_size=self.embedding_size, embedding_shape=output_embedding_shape, use_relative_positions=config.use_relative_positions, use_token_type=True, token_type_vocab_size=config.type_vocab_size, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=0.02, max_position_embeddings=config.max_position_embeddings, dropout_prob=config.hidden_dropout_prob) self.bert_encoder = BertTransformer( hidden_size=self.hidden_size, seq_length=self.seq_length, num_attention_heads=config.num_attention_heads, num_hidden_layers=self.num_hidden_layers, intermediate_size=config.intermediate_size, attention_probs_dropout_prob=config.attention_probs_dropout_prob, use_one_hot_embeddings=use_one_hot_embeddings, initializer_range=config.initializer_range, hidden_dropout_prob=config.hidden_dropout_prob, use_relative_positions=config.use_relative_positions, hidden_act=config.hidden_act, compute_type=config.compute_type, return_all_encoders=True) self.cast = P.Cast() self.dtype = config.dtype self.cast_compute_type = SaturateCast(dst_type=config.compute_type) self.slice = P.StridedSlice() self.squeeze_1 = P.Squeeze(axis=1) self.dense = nn.Dense(self.hidden_size, self.hidden_size, activation="tanh", weight_init=TruncatedNormal(config.initializer_range)).to_float(config.compute_type) self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)
def __init__(self, config, is_training=True): super(Decoder, self).__init__() self.hidden_size = config.hidden_size self.vocab_size = config.trg_vocab_size self.embedding_size = config.decoder_embedding_size self.embedding = nn.Embedding(self.vocab_size, self.embedding_size) self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \ hidden_size=self.hidden_size).to_float(config.compute_type) self.text_len = config.max_length self.shape = P.Shape() self.transpose = P.Transpose() self.p = P.Print() self.cast = P.Cast() self.concat = P.Concat(axis=2) self.squeeze = P.Squeeze(axis=0) self.expandims = P.ExpandDims() self.log_softmax = P.LogSoftmax(axis=1) weight, bias = dense_default_state( self.embedding_size + self.hidden_size * 3, self.vocab_size) self.fc = nn.Dense(self.embedding_size + self.hidden_size * 3, self.vocab_size, weight_init=weight, bias_init=bias).to_float(config.compute_type) self.attention = Attention(config) self.bmm = P.BatchMatMul() self.dropout = nn.Dropout(0.7) self.expandims = P.ExpandDims() self.dtype = config.dtype
def __init__(self, img_dim, patch_dim, num_channels, embedding_dim, num_heads, num_layers, hidden_dim, num_queries, dropout_rate=0, norm=False, mlp=False, pos_every=False, no_pos=False, con_loss=False): super(VisionTransformer, self).__init__() self.norm = norm self.mlp = mlp self.embedding_dim = embedding_dim self.num_heads = num_heads self.patch_dim = patch_dim self.num_channels = num_channels self.img_dim = img_dim self.pos_every = pos_every self.num_patches = int((img_dim // patch_dim) ** 2) self.seq_length = self.num_patches self.flatten_dim = patch_dim * patch_dim * num_channels self.out_dim = patch_dim * patch_dim * num_channels self.no_pos = no_pos self.unf = _unfold_(patch_dim) self.fold = _fold_(patch_dim, output_shape=(img_dim, img_dim)) if self.mlp is not True: self.linear_encoding = nn.Dense( self.flatten_dim, embedding_dim) self.mlp_head = nn.SequentialCell( nn.Dense(embedding_dim, hidden_dim), nn.Dropout(1. - dropout_rate), nn.ReLU(), nn.Dense(hidden_dim, self.out_dim), nn.Dropout(1. - dropout_rate)) self.query_embed = nn.Embedding( num_queries, embedding_dim * self.seq_length) encoder_layer = TransformerEncoderLayer( embedding_dim, num_heads, hidden_dim, dropout_rate) self.encoder = TransformerEncoder(encoder_layer, num_layers) decoder_layer = TransformerDecoderLayer( embedding_dim, num_heads, hidden_dim, dropout_rate) self.decoder = TransformerDecoder(decoder_layer, num_layers) self.reshape = P.Reshape() self.tile = P.Tile() self.transpose = P.Transpose() if not self.no_pos: self.position_encoding = LearnedPositionalEncoding(self.seq_length, self.embedding_dim, self.seq_length) self.dropout_layer1 = nn.Dropout(1. - dropout_rate) self.con_loss = con_loss
def __init__(self, max_position_embeddings, embedding_dim, seq_length): super(LearnedPositionalEncoding, self).__init__() self.pe = nn.Embedding(max_position_embeddings, embedding_dim) self.seq_length = seq_length self.position_ids = Tensor(np.arange(self.seq_length).astype(np.int32)) self.reshape = P.Reshape() self.position_ids = self.reshape(self.position_ids, (1, self.seq_length))
def __init__(self, config: Callable[..., None]) -> None: super().__init__() self.token_embeddings = nn.Embedding( config.vocab_size, config.hidden_size, embedding_table=TruncatedNormal(config.initializer_range), padding_idx=0, ) self.position_embeddings = nn.Embedding( config.max_position_embeddings, config.hidden_size, embedding_table=TruncatedNormal(config.initializer_range), ) self.token_type_embeddings = nn.Embedding( config.type_vocab_size, config.hidden_size, embedding_table=TruncatedNormal(config.initializer_range), ) self.layer_norm = nn.LayerNorm((config.hidden_size, ), epsilon=1e-12) self.dropout = nn.Dropout(1.0 - config.hidden_dropout_prob)
def __init__(self, config): super(PANGUALPHA_EmbeddingPipeLine, self).__init__() self.word_embedding = EmbeddingLookupPipeline(config) self.position_embedding = nn.Embedding(config.seq_length, config.embedding_size, embedding_table=Normal(0.02)) self.position_embedding.gather.shard(((1, 1), (config.dp ,))) self.position_embedding.expand.shard(((config.dp, 1),)) self.add = P.TensorAdd().shard(((config.dp, 1, 1), (config.dp, 1, 1))) self.dropout = nn.Dropout(1 - config.dropout_rate) self.dropout.dropout_gen_mask.shard(((config.dp, 1, 1),)) self.dropout.dropout_do_mask.shard(((config.dp, 1, 1),))
def __init__(self, elements, onehot=True, trainable=False): super().__init__() self.trainable = trainable # Get the number of elements, as well as the highest nuclear charge to use in the embedding vector self.nelems = len(elements) maxelem = int(max(elements) + 1) self.gate = nn.Embedding(maxelem, self.nelems, onehot) # Set trainable flag if not trainable: self.gate.embedding_table.requires_grad = False
def __init__(self, config, is_training=True): super(Decoder, self).__init__() self.vocab_size = config.ch_vocab_size self.hidden_size = config.hidden_size self.trans = P.Transpose() self.perm = (1, 0, 2) self.embedding = nn.Embedding(self.vocab_size, self.hidden_size) self.gru = GRU(config, is_training=is_training).to_float(mstype.float16) self.dense = nn.Dense(self.hidden_size, self.vocab_size) self.softmax = nn.LogSoftmax(axis=2) self.cast = P.Cast()
def __init__(self, use_relative_positions, embedding_size, embedding_shape, use_token_type=False, token_type_vocab_size=16, use_one_hot_embeddings=False, initializer_range=0.02, max_position_embeddings=512, dropout_prob=0.1): super(EmbeddingPostprocessor, self).__init__() self.use_token_type = use_token_type self.token_type_vocab_size = token_type_vocab_size self.use_one_hot_embeddings = use_one_hot_embeddings self.max_position_embeddings = max_position_embeddings self.token_type_embedding = nn.Embedding( vocab_size=token_type_vocab_size, embedding_size=embedding_size, use_one_hot=use_one_hot_embeddings) self.shape_flat = (-1, ) self.one_hot = P.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.1, mstype.float32) self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape) self.dropout = nn.Dropout(1 - dropout_prob) self.gather = P.Gather() self.use_relative_positions = use_relative_positions self.slice = P.StridedSlice() _, seq, _ = self.shape self.full_position_embedding = nn.Embedding( vocab_size=max_position_embeddings, embedding_size=embedding_size, use_one_hot=False) self.layernorm = nn.LayerNorm((embedding_size, )) self.position_ids = Tensor( np.arange(seq).reshape(-1, seq).astype(np.int32)) self.add = P.Add()
def __init__(self, config): super(GPT_Model, self).__init__() self.get_attention_mask = AttentionMask(config) self.word_embedding = EmbeddingLookup(config) self.position_embedding = nn.Embedding(config.seq_length, config.embedding_size, embedding_table=TruncatedNormal(0.02)) self.blocks = nn.CellList() for i in range(config.num_layers): self.blocks.append(Block(config, i+1)) self.layernorm = LayerNorm((config.embedding_size,)).to_float(config.compute_dtype) self.use_past = config.use_past self.past = tuple([None]*config.num_layers) self.num_layers = config.num_layers
def __init__(self, config, is_training=True): super(Encoder, self).__init__() self.vocab_size = config.en_vocab_size self.hidden_size = config.hidden_size if is_training: self.batch_size = config.batch_size else: self.batch_size = config.eval_batch_size self.trans = P.Transpose() self.perm = (1, 0, 2) self.embedding = nn.Embedding(self.vocab_size, self.hidden_size) self.gru = GRU(config, is_training=is_training).to_float(mstype.float16) self.h = Tensor(np.zeros((self.batch_size, self.hidden_size)).astype(np.float16))
def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, bidirectional, num_classes, weight, batch_size): super(SentimentNet, self).__init__() # Mapp words to vectors self.embedding = nn.Embedding(vocab_size, embed_size, embedding_table=weight) self.embedding.embedding_table.requires_grad = False self.trans = P.Transpose() self.perm = (1, 0, 2) if context.get_context("device_target") in STACK_LSTM_DEVICE: # stack lstm by user self.encoder = StackLSTM(input_size=embed_size, hidden_size=num_hiddens, num_layers=num_layers, has_bias=True, bidirectional=bidirectional, dropout=0.0) self.h, self.c = stack_lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) elif context.get_context("device_target") == "GPU": # standard lstm self.encoder = nn.LSTM(input_size=embed_size, hidden_size=num_hiddens, num_layers=num_layers, has_bias=True, bidirectional=bidirectional, dropout=0.0) self.h, self.c = lstm_default_state(batch_size, num_hiddens, num_layers, bidirectional) else: self.encoder = StackLSTMAscend(input_size=embed_size, hidden_size=num_hiddens, num_layers=num_layers, has_bias=True, bidirectional=bidirectional) self.h, self.c = stack_lstm_default_state_ascend( batch_size, num_hiddens, num_layers, bidirectional) self.concat = P.Concat(1) self.squeeze = P.Squeeze(axis=0) if bidirectional: self.decoder = nn.Dense(num_hiddens * 4, num_classes) else: self.decoder = nn.Dense(num_hiddens * 2, num_classes)
def __init__(self, config, is_training=True): super(Encoder, self).__init__() self.hidden_size = config.hidden_size self.vocab_size = config.src_vocab_size self.embedding_size = config.encoder_embedding_size self.embedding = nn.Embedding(self.vocab_size, self.embedding_size) self.rnn = BidirectionGRU(config, is_training=is_training).to_float( mstype.float16) self.fc = nn.Dense(2 * self.hidden_size, self.hidden_size).to_float(mstype.float16) self.shape = P.Shape() self.transpose = P.Transpose() self.p = P.Print() self.cast = P.Cast() self.text_len = config.max_length self.squeeze = P.Squeeze(axis=0) self.tanh = P.Tanh()
def __init__(self, config, is_training=True): super(Encoder, self).__init__() self.hidden_size = config.hidden_size self.vocab_size = config.src_vocab_size self.embedding_size = config.encoder_embedding_size self.embedding = nn.Embedding(self.vocab_size, self.embedding_size) self.rnn = GRU(input_size=self.embedding_size, \ hidden_size=self.hidden_size, bidirectional=True).to_float(config.compute_type) self.fc = nn.Dense(2 * self.hidden_size, self.hidden_size).to_float(config.compute_type) self.shape = P.Shape() self.transpose = P.Transpose() self.p = P.Print() self.cast = P.Cast() self.text_len = config.max_length self.squeeze = P.Squeeze(axis=0) self.tanh = P.Tanh() self.concat = P.Concat(2) self.dtype = config.dtype
def __init__(self, vocab_size, embedding_dims, num_class): super(FastText, self).__init__() self.vocab_size = vocab_size self.embeding_dims = embedding_dims self.num_class = num_class self.embeding_func = nn.Embedding(vocab_size=self.vocab_size, embedding_size=self.embeding_dims, padding_idx=0, embedding_table='Zeros') self.fc = nn.Dense(self.embeding_dims, out_channels=self.num_class, weight_init=XavierUniform(1)).to_float( mstype.float16) self.reducesum = P.ReduceSum() self.expand_dims = P.ExpandDims() self.squeeze = P.Squeeze(axis=1) self.cast = P.Cast() self.tile = P.Tile() self.realdiv = P.RealDiv() self.fill = P.Fill() self.log_softmax = nn.LogSoftmax(axis=1)
def __init__(self, config): super(PANGUALPHA_ModelPipeline, self).__init__() self.pangu_alpha_embedding = PANGUALPHA_EmbeddingPipeLine(config).set_comm_fusion(1) self.pangu_alpha_embedding.stage = 0 self.pangu_alpha_mask = PANGUALPHA_Mask(config) self.blocks = nn.CellList() dropout_recompute = False self.top_query_embedding = nn.Embedding(config.seq_length, config.embedding_size, embedding_table=TruncatedNormal(0.02)) self.top_query_embedding.gather.shard(((1, 1), (config.dp,))) self.top_query_embedding.expand.shard(((config.dp, 1),)) for i in range(config.num_layers): if i == config.num_layers - 1: self.top_query_embedding.set_comm_fusion(2) self.top_query_embedding.stage = i * config.stage_num // config.num_layers per_block = QueryLayer(config).set_comm_fusion(2) else: per_block = Block(config, i + 1).set_comm_fusion(2) per_block.stage = i * config.stage_num // config.num_layers per_block.recompute() self.blocks.append(per_block) if not dropout_recompute: per_block.attention.dropout.dropout_gen_mask.recompute(False).add_prim_attr("_side_effect", True) per_block.attention.prob_dropout.dropout_gen_mask.recompute(False).add_prim_attr("_side_effect", True) per_block.output.dropout.dropout_gen_mask.recompute(False).add_prim_attr("_side_effect", True) if config.self_layernorm: self.layernorm = LayerNorm((config.embedding_size,), config.dp).to_float(mstype.float32) else: self.layernorm = nn.LayerNorm( (config.embedding_size,)).to_float(mstype.float32) self.layernorm.layer_norm.shard(((config.dp, 1, 1), (1,), (1,))) self.layernorm.set_comm_fusion(2) #self.layernorm.set_comm_fusion(3) self.layernorm.stage = config.stage_num - 1 self.use_past = config.use_past self.past = tuple([None] * config.num_layers) self.dtype = config.compute_dtype self.num_layers = config.num_layers
def __init__(self, config, is_training=True, dropout=0.1): super(Decoder, self).__init__() self.vocab_size = config.ch_vocab_size self.hidden_size = config.hidden_size self.max_len = config.max_seq_length self.trans = P.Transpose() self.perm = (1, 0, 2) self.embedding = nn.Embedding(self.vocab_size, self.hidden_size) self.dropout = nn.Dropout(1 - dropout) self.attn = nn.Dense(self.hidden_size, self.max_len) self.softmax = nn.Softmax(axis=2) self.bmm = P.BatchMatMul() self.concat = P.Concat(axis=2) self.attn_combine = nn.Dense(self.hidden_size * 2, self.hidden_size) self.gru = GRU(config, is_training=is_training).to_float(mstype.float16) self.out = nn.Dense(self.hidden_size, self.vocab_size) self.logsoftmax = nn.LogSoftmax(axis=2) self.cast = P.Cast()
def __init__(self, vocab_len, word_len, num_classes, vec_length): super(TextCNN, self).__init__() self.vec_length = vec_length self.word_len = word_len self.num_classes = num_classes self.unsqueeze = P.ExpandDims() self.embedding = nn.Embedding(vocab_len, self.vec_length, embedding_table='normal') self.slice = P.Slice() self.layer1 = self.make_layer(kernel_height=3) self.layer2 = self.make_layer(kernel_height=4) self.layer3 = self.make_layer(kernel_height=5) self.concat = P.Concat(1) self.fc = nn.Dense(96 * 3, self.num_classes) self.drop = nn.Dropout(keep_prob=0.5) self.print = P.Print() self.reducemean = P.ReduceMax(keep_dims=False)
def __init__(self, config): super(PANGUALPHA_Model, self).__init__() self.get_attention_mask = AttentionMask(config) self.word_embedding = EmbeddingLookup(config).set_comm_fusion(1) self.eod_reset = config.eod_reset if config.load_ckpt_path: # Loading the embedding table from the ckpt path: embedding_path = os.path.join(config.load_ckpt_path, 'position_embedding.npy') if os.path.exists(embedding_path): p_table = np.load(embedding_path) position_table_param = Tensor(p_table, mstype.float32) else: raise ValueError(f"{embedding_path} file not exits, please check whether position_embedding file exit.") else: position_table_param = TruncatedNormal(0.02) self.position_embedding = nn.Embedding( config.seq_length, config.embedding_size, embedding_table=position_table_param).set_comm_fusion(1) self.word_embedding.embedding_table.parallel_optimizer = False self.position_embedding.embedding_table.parallel_optimizer = False self.position_embedding.gather.shard(((1, 1), (config.dp,))) self.position_embedding.expand.shard(((config.dp, 1),)) self.blocks = nn.CellList() fusion_group_num = 4 fusion_group_size = config.num_layers // fusion_group_num fusion_group_size = max(fusion_group_size, 1) num_layers = config.num_layers - 1 self.num_layers = num_layers for i in range(num_layers): per_block = Block(config, i + 1).set_comm_fusion(int(i / fusion_group_size) + 2) per_block.recompute() per_block.attention.dropout.dropout_gen_mask.recompute(False) per_block.attention.prob_dropout.dropout_gen_mask.recompute(False) per_block.output.dropout.dropout_gen_mask.recompute(False) per_block.attention.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) per_block.attention.prob_dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) per_block.output.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) self.blocks.append(per_block) if config.self_layernorm: self.layernorm = LayerNorm((config.embedding_size,), config.dp).to_float( mstype.float32).set_comm_fusion( int((num_layers - 1) / fusion_group_size) + 2) else: self.layernorm = nn.LayerNorm((config.embedding_size,)).to_float( mstype.float32).set_comm_fusion( int((num_layers - 1) / fusion_group_size) + 2) self.layernorm.layer_norm.shard(((config.dp, 1, 1), (1,), (1,))) self.layernorm.gamma.parallel_optimizer = False self.layernorm.beta.parallel_optimizer = False self.use_past = config.use_past self.past = tuple([None] * config.num_layers) self.add = P.TensorAdd().shard(((config.dp, 1, 1), (config.dp, 1, 1))) self.expand_dims = P.ExpandDims().shard(((config.dp, 1, 1),)) self.dtype = config.compute_dtype self.dropout = nn.Dropout(1 - config.dropout_rate) self.dropout.dropout_gen_mask.shard(((config.dp, 1, 1),)) self.dropout.dropout_do_mask.shard(((config.dp, 1, 1),)) if config.load_ckpt_path: # Loading the embedding table from the ckpt path: embedding_path = os.path.join(config.load_ckpt_path, 'top_query_embedding.npy') if os.path.exists(embedding_path): top_query_table = np.load(embedding_path) top_query_table_param = Tensor(top_query_table, mstype.float32) else: raise ValueError(f"{embedding_path} file not exits, please check whether top_query_embedding file exist.") else: top_query_table_param = TruncatedNormal(0.02) self.top_query_embedding = nn.Embedding(config.seq_length, config.embedding_size, \ embedding_table=top_query_table_param).set_comm_fusion( int((config.num_layers - 1) / fusion_group_num) + 2) self.top_query_embedding.embedding_table.parallel_optimizer = False self.top_query_embedding.gather.shard(((1, 1), (config.dp,))) self.top_query_embedding.expand.shard(((config.dp, 1),)) self.top_query_layer = QueryLayer(config) self.top_query_layer.recompute() self.top_query_layer.output.dropout.dropout_gen_mask.recompute(False) self.top_query_layer.attention.dropout.dropout_gen_mask.recompute(False) self.top_query_layer.attention.prob_dropout.dropout_gen_mask.recompute(False) self.top_query_layer.output.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) self.top_query_layer.attention.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) self.top_query_layer.attention.prob_dropout.dropout_gen_mask.add_prim_attr("_side_effect", True) self.top_query_layer.set_comm_fusion(int((config.num_layers - 1) / fusion_group_num) + 2)
def __init__( self, num_atomtypes, dim_atomembedding, min_rbf_dis, max_rbf_dis, num_rbf, output_dim=1, rbf_sigma=None, trainable_rbf=False, distance_expansion=None, cutoff=None, cutoff_network=None, rescale_rbf=False, use_all_interactions=False, ): super().__init__() self.num_atomtypes = num_atomtypes self.dim_atomembedding = dim_atomembedding self.num_rbf = num_rbf self.distance_expansion = distance_expansion self.rescale_rbf = rescale_rbf self.output_dim = output_dim # ~ self.n_interactions=n_interactions self.network_name = 'GNN_Model' # make a lookup table to store embeddings for each element (up to atomic # number max_z) each of which is a vector of size dim_atomembedding self.embedding = nn.Embedding(num_atomtypes, dim_atomembedding, use_one_hot=True, embedding_table=Normal(1.0)) self.filter = None self.fixed_atoms = False # layer for expanding interatomic distances in a basis if distance_expansion is not None: self.distance_expansion = distance_expansion( d_min=min_rbf_dis, d_max=max_rbf_dis, num_rbf=num_rbf, sigma=rbf_sigma, trainable=trainable_rbf) else: self.distance_expansion = None if cutoff_network is None: self.cutoff_network = None self.cutoff = None else: if cutoff is None: self.cutoff_network = cutoff_network(max_rbf_dis) self.cutoff = max_rbf_dis else: self.cutoff_network = cutoff_network(cutoff) self.cutoff = cutoff self.interactions = None self.readout = None self.use_all_interactions = use_all_interactions self.gather_interactions = None self.debug_fun = None self.ones = P.Ones()
def Embedding(num_embeddings, embedding_dim, padding_idx, std=0.01): m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx) return m
def __init__(self, weight, vocab_size, cell, batch_size): super(textrcnn, self).__init__() self.num_hiddens = 512 self.embed_size = 300 self.num_classes = 2 self.batch_size = batch_size k = (1 / self.num_hiddens)**0.5 self.embedding = nn.Embedding(vocab_size, self.embed_size, embedding_table=weight) self.embedding.embedding_table.requires_grad = False self.cell = cell self.cast = P.Cast() self.h1 = Tensor( np.zeros(shape=(self.batch_size, self.num_hiddens)).astype(np.float16)) self.c1 = Tensor( np.zeros(shape=(self.batch_size, self.num_hiddens)).astype(np.float16)) if cell == "lstm": self.lstm = P.DynamicRNN(forget_bias=0.0) self.w1_fw = Parameter(np.random.uniform( -k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(np.float16), name="w1_fw") self.b1_fw = Parameter(np.random.uniform( -k, k, (4 * self.num_hiddens)).astype(np.float16), name="b1_fw") self.w1_bw = Parameter(np.random.uniform( -k, k, (self.embed_size + self.num_hiddens, 4 * self.num_hiddens)).astype(np.float16), name="w1_bw") self.b1_bw = Parameter(np.random.uniform( -k, k, (4 * self.num_hiddens)).astype(np.float16), name="b1_bw") self.h1 = Tensor( np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float16)) self.c1 = Tensor( np.zeros(shape=(1, self.batch_size, self.num_hiddens)).astype(np.float16)) if cell == "vanilla": self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens) self.rnnU_fw = nn.Dense(self.embed_size, self.num_hiddens) self.rnnW_bw = nn.Dense(self.num_hiddens, self.num_hiddens) self.rnnU_bw = nn.Dense(self.embed_size, self.num_hiddens) if cell == "gru": self.rnnWr_fw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWz_fw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWh_fw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWr_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size, self.num_hiddens) self.ones = Tensor( np.ones(shape=(self.batch_size, self.num_hiddens)).astype(np.float16)) self.rnnWr_fw.to_float(mstype.float16) self.rnnWz_fw.to_float(mstype.float16) self.rnnWh_fw.to_float(mstype.float16) self.rnnWr_bw.to_float(mstype.float16) self.rnnWz_bw.to_float(mstype.float16) self.rnnWh_bw.to_float(mstype.float16) self.transpose = P.Transpose() self.reduce_max = P.ReduceMax() self.expand_dims = P.ExpandDims() self.concat = P.Concat() self.reshape = P.Reshape() self.left_pad_tensor = Tensor( np.zeros( (1, self.batch_size, self.num_hiddens)).astype(np.float16)) self.right_pad_tensor = Tensor( np.zeros( (1, self.batch_size, self.num_hiddens)).astype(np.float16)) self.output_dense = nn.Dense(self.num_hiddens * 1, 2) self.concat0 = P.Concat(0) self.concat2 = P.Concat(2) self.concat1 = P.Concat(1) self.text_rep_dense = nn.Dense(2 * self.num_hiddens + self.embed_size, self.num_hiddens) self.mydense = nn.Dense(self.num_hiddens, 2) self.drop_out = nn.Dropout(keep_prob=0.7) self.tanh = P.Tanh() self.sigmoid = P.Sigmoid() self.slice = P.Slice() self.text_rep_dense.to_float(mstype.float16) self.mydense.to_float(mstype.float16) self.output_dense.to_float(mstype.float16)