Ejemplo n.º 1
0
    def __init__(self,
                 num_users,
                 num_items,
                 num_factors,
                 model_layers,
                 mf_regularization,
                 mlp_reg_layers,
                 mf_dim):
        super(NCFModel, self).__init__()

        self.data_path = ""
        self.model_path = ""

        self.num_users = num_users
        self.num_items = num_items
        self.num_factors = num_factors
        self.model_layers = model_layers

        self.mf_regularization = mf_regularization
        self.mlp_reg_layers = mlp_reg_layers

        self.mf_dim = mf_dim

        self.num_layers = len(self.model_layers)  # Number of layers in the MLP

        if self.model_layers[0] % 2 != 0:
            raise ValueError("The first layer size should be multiple of 2!")

        # Initializer for embedding layers
        self.embedding_initializer = "normal"

        self.embedding_user = nn.Embedding(
            self.num_users,
            self.num_factors + self.model_layers[0] // 2,
            embedding_table=self.embedding_initializer
        )
        self.embedding_item = nn.Embedding(
            self.num_items,
            self.num_factors + self.model_layers[0] // 2,
            embedding_table=self.embedding_initializer
        )

        self.mlp_dense1 = DenseLayer(in_channels=self.model_layers[0],
                                     out_channels=self.model_layers[1],
                                     activation="relu")
        self.mlp_dense2 = DenseLayer(in_channels=self.model_layers[1],
                                     out_channels=self.model_layers[2],
                                     activation="relu")

        # Logit dense layer
        self.logits_dense = DenseLayer(in_channels=self.model_layers[1],
                                       out_channels=1,
                                       weight_init="normal",
                                       activation=None)

        # ops definition
        self.mul = P.Mul()
        self.squeeze = P.Squeeze(axis=1)
        self.concat = P.Concat(axis=1)
Ejemplo n.º 2
0
    def __init__(self, args, embedding_table=None):
        super(NewsEncoder, self).__init__()
        # categories
        self.category_embedding = nn.Embedding(args.n_categories,
                                               args.category_embedding_dim)
        self.category_dense = nn.Dense(args.category_embedding_dim,
                                       args.n_filters,
                                       has_bias=True,
                                       activation="relu")

        self.sub_category_embedding = nn.Embedding(args.n_sub_categories,
                                                   args.category_embedding_dim)
        self.subcategory_dense = nn.Dense(args.category_embedding_dim,
                                          args.n_filters,
                                          has_bias=True,
                                          activation="relu")

        # title and abstract
        if embedding_table is None:
            word_embedding = [
                nn.Embedding(args.n_words, args.word_embedding_dim)
            ]
        else:
            word_embedding = [
                nn.Embedding(args.n_words,
                             args.word_embedding_dim,
                             embedding_table=embedding_table)
            ]
        title_CNN = [
            nn.Conv1d(args.word_embedding_dim,
                      args.n_filters,
                      kernel_size=args.window_size,
                      pad_mode='same',
                      has_bias=True),
            nn.ReLU()
        ]
        abstract_CNN = [
            nn.Conv1d(args.word_embedding_dim,
                      args.n_filters,
                      kernel_size=args.window_size,
                      pad_mode='same',
                      has_bias=True),
            nn.ReLU()
        ]
        if args.phase == "train":
            word_embedding.append(
                nn.Dropout(keep_prob=(1 - args.dropout_ratio)))
            title_CNN.append(nn.Dropout(keep_prob=(1 - args.dropout_ratio)))
            abstract_CNN.append(nn.Dropout(keep_prob=(1 - args.dropout_ratio)))
        self.word_embedding = nn.SequentialCell(word_embedding)
        self.title_CNN = nn.SequentialCell(title_CNN)
        self.abstract_CNN = nn.SequentialCell(abstract_CNN)
        self.title_attention = Attention(args.query_vector_dim, args.n_filters)
        self.abstract_attention = Attention(args.query_vector_dim,
                                            args.n_filters)
        self.total_attention = Attention(args.query_vector_dim, args.n_filters)
        self.pack = ops.Stack(axis=1)
        self.title_shape = (-1, args.n_words_title)
        self.abstract_shape = (-1, args.n_words_abstract)
Ejemplo n.º 3
0
 def __init__(self, hidden_size, output_size, max_length, dropout_p=0.1):
     super(AttnDecoderRNN, self).__init__()
     self.hidden_size = hidden_size
     self.output_size = output_size
     self.dropout_p = dropout_p
     self.max_length = max_length
     self.embedding = nn.Embedding(self.output_size, self.hidden_size)
     self.attn = nn.Dense(in_channels=self.hidden_size * 2,
                          out_channels=self.max_length).to_float(
                              mstype.float16)
     self.attn_combine = nn.Dense(in_channels=self.hidden_size * 2,
                                  out_channels=self.hidden_size).to_float(
                                      mstype.float16)
     self.dropout = nn.Dropout(keep_prob=1.0 - self.dropout_p)
     self.gru = GRU(hidden_size, hidden_size).to_float(mstype.float16)
     self.out = nn.Dense(in_channels=self.hidden_size,
                         out_channels=self.output_size).to_float(
                             mstype.float16)
     self.transpose = P.Transpose()
     self.concat = P.Concat(axis=2)
     self.concat1 = P.Concat(axis=1)
     self.softmax = P.Softmax(axis=1)
     self.relu = P.ReLU()
     self.log_softmax = P.LogSoftmax(axis=1)
     self.bmm = P.BatchMatMul()
     self.unsqueeze = P.ExpandDims()
     self.squeeze = P.Squeeze(1)
     self.squeeze1 = P.Squeeze(0)
     self.cast = P.Cast()
Ejemplo n.º 4
0
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 bidirectional, weight, labels, batch_size):
        super(SentimentNet, self).__init__()
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.batch_size = batch_size

        self.embedding = nn.Embedding(vocab_size, embed_size, use_one_hot=False, embedding_table=Tensor(weight))
        self.embedding.embedding_table.requires_grad = False
        self.trans = P.Transpose()
        self.perm = (1, 0, 2)
        self.h, self.c, self.w = InitialLstmWeight(embed_size, num_hiddens, num_layers, bidirectional)
        self.encoder = P.LSTM(input_size=embed_size, hidden_size=self.num_hiddens,
                              num_layers=num_layers, has_bias=False,
                              bidirectional=self.bidirectional, dropout=0.0)
        self.concat = P.Concat(2)
        if self.bidirectional:
            self.decoder = nn.Dense(num_hiddens * 4, labels)
        else:
            self.decoder = nn.Dense(num_hiddens * 2, labels)

        self.slice1 = P.Slice()
        self.slice2 = P.Slice()
        self.reshape = P.Reshape()

        self.num_direction = 1
        if bidirectional:
            self.num_direction = 2
Ejemplo n.º 5
0
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 bidirectional, num_classes, weight, batch_size):
        super(SentimentNet, self).__init__()
        # Mapp words to vectors
        self.embedding = nn.Embedding(vocab_size,
                                      embed_size,
                                      embedding_table=weight)
        self.embedding.embedding_table.requires_grad = False
        self.trans = P.Transpose()
        self.perm = (1, 0, 2)
        self.encoder = nn.LSTM(input_size=embed_size,
                               hidden_size=num_hiddens,
                               num_layers=num_layers,
                               has_bias=True,
                               bidirectional=bidirectional,
                               dropout=0.0)
        w_init = init_lstm_weight(embed_size, num_hiddens, num_layers,
                                  bidirectional)
        self.encoder.weight = w_init
        self.h, self.c = lstm_default_state(batch_size, num_hiddens,
                                            num_layers, bidirectional)

        self.concat = P.Concat(1)
        if bidirectional:
            self.decoder = nn.Dense(num_hiddens * 4, num_classes)
        else:
            self.decoder = nn.Dense(num_hiddens * 2, num_classes)
Ejemplo n.º 6
0
    def __init__(self,
                 config,
                 is_training,
                 use_one_hot_embeddings=False):
        super(BertModel, self).__init__()
        config = copy.deepcopy(config)
        if not is_training:
            config.hidden_dropout_prob = 0.0
            config.attention_probs_dropout_prob = 0.0

        self.seq_length = config.seq_length
        self.hidden_size = config.hidden_size
        self.num_hidden_layers = config.num_hidden_layers
        self.embedding_size = config.hidden_size
        self.token_type_ids = None

        self.last_idx = self.num_hidden_layers - 1
        output_embedding_shape = [-1, self.seq_length, self.embedding_size]

        self.bert_embedding_lookup = nn.Embedding(
            vocab_size=config.vocab_size,
            embedding_size=self.embedding_size,
            use_one_hot=use_one_hot_embeddings)

        self.bert_embedding_postprocessor = EmbeddingPostprocessor(
            embedding_size=self.embedding_size,
            embedding_shape=output_embedding_shape,
            use_relative_positions=config.use_relative_positions,
            use_token_type=True,
            token_type_vocab_size=config.type_vocab_size,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=0.02,
            max_position_embeddings=config.max_position_embeddings,
            dropout_prob=config.hidden_dropout_prob)

        self.bert_encoder = BertTransformer(
            hidden_size=self.hidden_size,
            seq_length=self.seq_length,
            num_attention_heads=config.num_attention_heads,
            num_hidden_layers=self.num_hidden_layers,
            intermediate_size=config.intermediate_size,
            attention_probs_dropout_prob=config.attention_probs_dropout_prob,
            use_one_hot_embeddings=use_one_hot_embeddings,
            initializer_range=config.initializer_range,
            hidden_dropout_prob=config.hidden_dropout_prob,
            use_relative_positions=config.use_relative_positions,
            hidden_act=config.hidden_act,
            compute_type=config.compute_type,
            return_all_encoders=True)

        self.cast = P.Cast()
        self.dtype = config.dtype
        self.cast_compute_type = SaturateCast(dst_type=config.compute_type)
        self.slice = P.StridedSlice()

        self.squeeze_1 = P.Squeeze(axis=1)
        self.dense = nn.Dense(self.hidden_size, self.hidden_size,
                              activation="tanh",
                              weight_init=TruncatedNormal(config.initializer_range)).to_float(config.compute_type)
        self._create_attention_mask_from_input_mask = CreateAttentionMaskFromInputMask(config)
Ejemplo n.º 7
0
 def __init__(self, config, is_training=True):
     super(Decoder, self).__init__()
     self.hidden_size = config.hidden_size
     self.vocab_size = config.trg_vocab_size
     self.embedding_size = config.decoder_embedding_size
     self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
     self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \
         hidden_size=self.hidden_size).to_float(config.compute_type)
     self.text_len = config.max_length
     self.shape = P.Shape()
     self.transpose = P.Transpose()
     self.p = P.Print()
     self.cast = P.Cast()
     self.concat = P.Concat(axis=2)
     self.squeeze = P.Squeeze(axis=0)
     self.expandims = P.ExpandDims()
     self.log_softmax = P.LogSoftmax(axis=1)
     weight, bias = dense_default_state(
         self.embedding_size + self.hidden_size * 3, self.vocab_size)
     self.fc = nn.Dense(self.embedding_size + self.hidden_size * 3,
                        self.vocab_size,
                        weight_init=weight,
                        bias_init=bias).to_float(config.compute_type)
     self.attention = Attention(config)
     self.bmm = P.BatchMatMul()
     self.dropout = nn.Dropout(0.7)
     self.expandims = P.ExpandDims()
     self.dtype = config.dtype
Ejemplo n.º 8
0
    def __init__(self,
                 img_dim,
                 patch_dim,
                 num_channels,
                 embedding_dim,
                 num_heads,
                 num_layers,
                 hidden_dim,
                 num_queries,
                 dropout_rate=0,
                 norm=False,
                 mlp=False,
                 pos_every=False,
                 no_pos=False,
                 con_loss=False):
        super(VisionTransformer, self).__init__()
        self.norm = norm
        self.mlp = mlp
        self.embedding_dim = embedding_dim
        self.num_heads = num_heads
        self.patch_dim = patch_dim
        self.num_channels = num_channels
        self.img_dim = img_dim
        self.pos_every = pos_every
        self.num_patches = int((img_dim // patch_dim) ** 2)
        self.seq_length = self.num_patches
        self.flatten_dim = patch_dim * patch_dim * num_channels
        self.out_dim = patch_dim * patch_dim * num_channels
        self.no_pos = no_pos
        self.unf = _unfold_(patch_dim)
        self.fold = _fold_(patch_dim, output_shape=(img_dim, img_dim))

        if self.mlp is not True:
            self.linear_encoding = nn.Dense(
                self.flatten_dim, embedding_dim)
            self.mlp_head = nn.SequentialCell(
                nn.Dense(embedding_dim, hidden_dim),
                nn.Dropout(1. - dropout_rate),
                nn.ReLU(),
                nn.Dense(hidden_dim, self.out_dim),
                nn.Dropout(1. - dropout_rate))

        self.query_embed = nn.Embedding(
            num_queries, embedding_dim * self.seq_length)
        encoder_layer = TransformerEncoderLayer(
            embedding_dim, num_heads, hidden_dim, dropout_rate)
        self.encoder = TransformerEncoder(encoder_layer, num_layers)

        decoder_layer = TransformerDecoderLayer(
            embedding_dim, num_heads, hidden_dim, dropout_rate)
        self.decoder = TransformerDecoder(decoder_layer, num_layers)

        self.reshape = P.Reshape()
        self.tile = P.Tile()
        self.transpose = P.Transpose()
        if not self.no_pos:
            self.position_encoding = LearnedPositionalEncoding(self.seq_length, self.embedding_dim, self.seq_length)

        self.dropout_layer1 = nn.Dropout(1. - dropout_rate)
        self.con_loss = con_loss
Ejemplo n.º 9
0
    def __init__(self, max_position_embeddings, embedding_dim, seq_length):
        super(LearnedPositionalEncoding, self).__init__()
        self.pe = nn.Embedding(max_position_embeddings, embedding_dim)
        self.seq_length = seq_length

        self.position_ids = Tensor(np.arange(self.seq_length).astype(np.int32))
        self.reshape = P.Reshape()
        self.position_ids = self.reshape(self.position_ids,
                                         (1, self.seq_length))
Ejemplo n.º 10
0
    def __init__(self, config: Callable[..., None]) -> None:
        super().__init__()
        self.token_embeddings = nn.Embedding(
            config.vocab_size,
            config.hidden_size,
            embedding_table=TruncatedNormal(config.initializer_range),
            padding_idx=0,
        )
        self.position_embeddings = nn.Embedding(
            config.max_position_embeddings,
            config.hidden_size,
            embedding_table=TruncatedNormal(config.initializer_range),
        )
        self.token_type_embeddings = nn.Embedding(
            config.type_vocab_size,
            config.hidden_size,
            embedding_table=TruncatedNormal(config.initializer_range),
        )

        self.layer_norm = nn.LayerNorm((config.hidden_size, ), epsilon=1e-12)
        self.dropout = nn.Dropout(1.0 - config.hidden_dropout_prob)
Ejemplo n.º 11
0
 def __init__(self, config):
     super(PANGUALPHA_EmbeddingPipeLine, self).__init__()
     self.word_embedding = EmbeddingLookupPipeline(config)
     self.position_embedding = nn.Embedding(config.seq_length,
                                            config.embedding_size,
                                            embedding_table=Normal(0.02))
     self.position_embedding.gather.shard(((1, 1), (config.dp ,)))
     self.position_embedding.expand.shard(((config.dp, 1),))
     self.add = P.TensorAdd().shard(((config.dp, 1, 1), (config.dp, 1, 1)))
     self.dropout = nn.Dropout(1 - config.dropout_rate)
     self.dropout.dropout_gen_mask.shard(((config.dp, 1, 1),))
     self.dropout.dropout_do_mask.shard(((config.dp, 1, 1),))
Ejemplo n.º 12
0
    def __init__(self, elements, onehot=True, trainable=False):
        super().__init__()
        self.trainable = trainable

        # Get the number of elements, as well as the highest nuclear charge to use in the embedding vector
        self.nelems = len(elements)
        maxelem = int(max(elements) + 1)

        self.gate = nn.Embedding(maxelem, self.nelems, onehot)

        # Set trainable flag
        if not trainable:
            self.gate.embedding_table.requires_grad = False
Ejemplo n.º 13
0
    def __init__(self, config, is_training=True):
        super(Decoder, self).__init__()

        self.vocab_size = config.ch_vocab_size
        self.hidden_size = config.hidden_size

        self.trans = P.Transpose()
        self.perm = (1, 0, 2)
        self.embedding = nn.Embedding(self.vocab_size, self.hidden_size)
        self.gru = GRU(config, is_training=is_training).to_float(mstype.float16)
        self.dense = nn.Dense(self.hidden_size, self.vocab_size)
        self.softmax = nn.LogSoftmax(axis=2)
        self.cast = P.Cast()
Ejemplo n.º 14
0
 def __init__(self,
              use_relative_positions,
              embedding_size,
              embedding_shape,
              use_token_type=False,
              token_type_vocab_size=16,
              use_one_hot_embeddings=False,
              initializer_range=0.02,
              max_position_embeddings=512,
              dropout_prob=0.1):
     super(EmbeddingPostprocessor, self).__init__()
     self.use_token_type = use_token_type
     self.token_type_vocab_size = token_type_vocab_size
     self.use_one_hot_embeddings = use_one_hot_embeddings
     self.max_position_embeddings = max_position_embeddings
     self.token_type_embedding = nn.Embedding(
         vocab_size=token_type_vocab_size,
         embedding_size=embedding_size,
         use_one_hot=use_one_hot_embeddings)
     self.shape_flat = (-1, )
     self.one_hot = P.OneHot()
     self.on_value = Tensor(1.0, mstype.float32)
     self.off_value = Tensor(0.1, mstype.float32)
     self.array_mul = P.MatMul()
     self.reshape = P.Reshape()
     self.shape = tuple(embedding_shape)
     self.dropout = nn.Dropout(1 - dropout_prob)
     self.gather = P.Gather()
     self.use_relative_positions = use_relative_positions
     self.slice = P.StridedSlice()
     _, seq, _ = self.shape
     self.full_position_embedding = nn.Embedding(
         vocab_size=max_position_embeddings,
         embedding_size=embedding_size,
         use_one_hot=False)
     self.layernorm = nn.LayerNorm((embedding_size, ))
     self.position_ids = Tensor(
         np.arange(seq).reshape(-1, seq).astype(np.int32))
     self.add = P.Add()
Ejemplo n.º 15
0
 def __init__(self, config):
     super(GPT_Model, self).__init__()
     self.get_attention_mask = AttentionMask(config)
     self.word_embedding = EmbeddingLookup(config)
     self.position_embedding = nn.Embedding(config.seq_length, config.embedding_size,
                                            embedding_table=TruncatedNormal(0.02))
     self.blocks = nn.CellList()
     for i in range(config.num_layers):
         self.blocks.append(Block(config, i+1))
     self.layernorm = LayerNorm((config.embedding_size,)).to_float(config.compute_dtype)
     self.use_past = config.use_past
     self.past = tuple([None]*config.num_layers)
     self.num_layers = config.num_layers
Ejemplo n.º 16
0
    def __init__(self, config, is_training=True):
        super(Encoder, self).__init__()
        self.vocab_size = config.en_vocab_size
        self.hidden_size = config.hidden_size
        if is_training:
            self.batch_size = config.batch_size
        else:
            self.batch_size = config.eval_batch_size

        self.trans = P.Transpose()
        self.perm = (1, 0, 2)
        self.embedding = nn.Embedding(self.vocab_size, self.hidden_size)
        self.gru = GRU(config, is_training=is_training).to_float(mstype.float16)
        self.h = Tensor(np.zeros((self.batch_size, self.hidden_size)).astype(np.float16))
Ejemplo n.º 17
0
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 bidirectional, num_classes, weight, batch_size):
        super(SentimentNet, self).__init__()
        # Mapp words to vectors
        self.embedding = nn.Embedding(vocab_size,
                                      embed_size,
                                      embedding_table=weight)
        self.embedding.embedding_table.requires_grad = False
        self.trans = P.Transpose()
        self.perm = (1, 0, 2)

        if context.get_context("device_target") in STACK_LSTM_DEVICE:
            # stack lstm by user
            self.encoder = StackLSTM(input_size=embed_size,
                                     hidden_size=num_hiddens,
                                     num_layers=num_layers,
                                     has_bias=True,
                                     bidirectional=bidirectional,
                                     dropout=0.0)
            self.h, self.c = stack_lstm_default_state(batch_size, num_hiddens,
                                                      num_layers,
                                                      bidirectional)
        elif context.get_context("device_target") == "GPU":
            # standard lstm
            self.encoder = nn.LSTM(input_size=embed_size,
                                   hidden_size=num_hiddens,
                                   num_layers=num_layers,
                                   has_bias=True,
                                   bidirectional=bidirectional,
                                   dropout=0.0)
            self.h, self.c = lstm_default_state(batch_size, num_hiddens,
                                                num_layers, bidirectional)
        else:
            self.encoder = StackLSTMAscend(input_size=embed_size,
                                           hidden_size=num_hiddens,
                                           num_layers=num_layers,
                                           has_bias=True,
                                           bidirectional=bidirectional)
            self.h, self.c = stack_lstm_default_state_ascend(
                batch_size, num_hiddens, num_layers, bidirectional)

        self.concat = P.Concat(1)
        self.squeeze = P.Squeeze(axis=0)
        if bidirectional:
            self.decoder = nn.Dense(num_hiddens * 4, num_classes)
        else:
            self.decoder = nn.Dense(num_hiddens * 2, num_classes)
Ejemplo n.º 18
0
 def __init__(self, config, is_training=True):
     super(Encoder, self).__init__()
     self.hidden_size = config.hidden_size
     self.vocab_size = config.src_vocab_size
     self.embedding_size = config.encoder_embedding_size
     self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
     self.rnn = BidirectionGRU(config, is_training=is_training).to_float(
         mstype.float16)
     self.fc = nn.Dense(2 * self.hidden_size,
                        self.hidden_size).to_float(mstype.float16)
     self.shape = P.Shape()
     self.transpose = P.Transpose()
     self.p = P.Print()
     self.cast = P.Cast()
     self.text_len = config.max_length
     self.squeeze = P.Squeeze(axis=0)
     self.tanh = P.Tanh()
Ejemplo n.º 19
0
 def __init__(self, config, is_training=True):
     super(Encoder, self).__init__()
     self.hidden_size = config.hidden_size
     self.vocab_size = config.src_vocab_size
     self.embedding_size = config.encoder_embedding_size
     self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
     self.rnn = GRU(input_size=self.embedding_size, \
         hidden_size=self.hidden_size, bidirectional=True).to_float(config.compute_type)
     self.fc = nn.Dense(2 * self.hidden_size,
                        self.hidden_size).to_float(config.compute_type)
     self.shape = P.Shape()
     self.transpose = P.Transpose()
     self.p = P.Print()
     self.cast = P.Cast()
     self.text_len = config.max_length
     self.squeeze = P.Squeeze(axis=0)
     self.tanh = P.Tanh()
     self.concat = P.Concat(2)
     self.dtype = config.dtype
Ejemplo n.º 20
0
 def __init__(self, vocab_size, embedding_dims, num_class):
     super(FastText, self).__init__()
     self.vocab_size = vocab_size
     self.embeding_dims = embedding_dims
     self.num_class = num_class
     self.embeding_func = nn.Embedding(vocab_size=self.vocab_size,
                                       embedding_size=self.embeding_dims,
                                       padding_idx=0,
                                       embedding_table='Zeros')
     self.fc = nn.Dense(self.embeding_dims,
                        out_channels=self.num_class,
                        weight_init=XavierUniform(1)).to_float(
                            mstype.float16)
     self.reducesum = P.ReduceSum()
     self.expand_dims = P.ExpandDims()
     self.squeeze = P.Squeeze(axis=1)
     self.cast = P.Cast()
     self.tile = P.Tile()
     self.realdiv = P.RealDiv()
     self.fill = P.Fill()
     self.log_softmax = nn.LogSoftmax(axis=1)
Ejemplo n.º 21
0
    def __init__(self, config):
        super(PANGUALPHA_ModelPipeline, self).__init__()
        self.pangu_alpha_embedding = PANGUALPHA_EmbeddingPipeLine(config).set_comm_fusion(1)
        self.pangu_alpha_embedding.stage = 0
        self.pangu_alpha_mask = PANGUALPHA_Mask(config)
        self.blocks = nn.CellList()
        dropout_recompute = False
        self.top_query_embedding = nn.Embedding(config.seq_length, config.embedding_size,
                                                embedding_table=TruncatedNormal(0.02))
        self.top_query_embedding.gather.shard(((1, 1), (config.dp,)))
        self.top_query_embedding.expand.shard(((config.dp, 1),))
        for i in range(config.num_layers):
            if i == config.num_layers - 1:
                self.top_query_embedding.set_comm_fusion(2)
                self.top_query_embedding.stage = i * config.stage_num // config.num_layers
                per_block = QueryLayer(config).set_comm_fusion(2)
            else:
                per_block = Block(config, i + 1).set_comm_fusion(2)
            per_block.stage = i * config.stage_num // config.num_layers
            per_block.recompute()
            self.blocks.append(per_block)
            if not dropout_recompute:
                per_block.attention.dropout.dropout_gen_mask.recompute(False).add_prim_attr("_side_effect", True)
                per_block.attention.prob_dropout.dropout_gen_mask.recompute(False).add_prim_attr("_side_effect", True)
                per_block.output.dropout.dropout_gen_mask.recompute(False).add_prim_attr("_side_effect", True)

        if config.self_layernorm:
            self.layernorm = LayerNorm((config.embedding_size,), config.dp).to_float(mstype.float32)
        else:
            self.layernorm = nn.LayerNorm(
                (config.embedding_size,)).to_float(mstype.float32)
            self.layernorm.layer_norm.shard(((config.dp, 1, 1), (1,), (1,)))
        self.layernorm.set_comm_fusion(2)
        #self.layernorm.set_comm_fusion(3)
        self.layernorm.stage = config.stage_num - 1
        self.use_past = config.use_past
        self.past = tuple([None] * config.num_layers)
        self.dtype = config.compute_dtype
        self.num_layers = config.num_layers
Ejemplo n.º 22
0
    def __init__(self, config, is_training=True, dropout=0.1):
        super(Decoder, self).__init__()

        self.vocab_size = config.ch_vocab_size
        self.hidden_size = config.hidden_size
        self.max_len = config.max_seq_length

        self.trans = P.Transpose()
        self.perm = (1, 0, 2)
        self.embedding = nn.Embedding(self.vocab_size, self.hidden_size)
        self.dropout = nn.Dropout(1 - dropout)
        self.attn = nn.Dense(self.hidden_size, self.max_len)
        self.softmax = nn.Softmax(axis=2)
        self.bmm = P.BatchMatMul()
        self.concat = P.Concat(axis=2)
        self.attn_combine = nn.Dense(self.hidden_size * 2, self.hidden_size)

        self.gru = GRU(config,
                       is_training=is_training).to_float(mstype.float16)
        self.out = nn.Dense(self.hidden_size, self.vocab_size)
        self.logsoftmax = nn.LogSoftmax(axis=2)
        self.cast = P.Cast()
Ejemplo n.º 23
0
    def __init__(self, vocab_len, word_len, num_classes, vec_length):
        super(TextCNN, self).__init__()
        self.vec_length = vec_length
        self.word_len = word_len
        self.num_classes = num_classes

        self.unsqueeze = P.ExpandDims()
        self.embedding = nn.Embedding(vocab_len,
                                      self.vec_length,
                                      embedding_table='normal')

        self.slice = P.Slice()
        self.layer1 = self.make_layer(kernel_height=3)
        self.layer2 = self.make_layer(kernel_height=4)
        self.layer3 = self.make_layer(kernel_height=5)

        self.concat = P.Concat(1)

        self.fc = nn.Dense(96 * 3, self.num_classes)
        self.drop = nn.Dropout(keep_prob=0.5)
        self.print = P.Print()
        self.reducemean = P.ReduceMax(keep_dims=False)
Ejemplo n.º 24
0
    def __init__(self, config):
        super(PANGUALPHA_Model, self).__init__()
        self.get_attention_mask = AttentionMask(config)
        self.word_embedding = EmbeddingLookup(config).set_comm_fusion(1)
        self.eod_reset = config.eod_reset
        if config.load_ckpt_path:
            # Loading the embedding table from the ckpt path:
            embedding_path = os.path.join(config.load_ckpt_path, 'position_embedding.npy')
            if os.path.exists(embedding_path):
                p_table = np.load(embedding_path)
                position_table_param = Tensor(p_table, mstype.float32)
            else:
                raise ValueError(f"{embedding_path} file not exits, please check whether position_embedding file exit.")
        else:
            position_table_param = TruncatedNormal(0.02)
            
        self.position_embedding = nn.Embedding(
            config.seq_length,
            config.embedding_size,
            embedding_table=position_table_param).set_comm_fusion(1)
        self.word_embedding.embedding_table.parallel_optimizer = False
        self.position_embedding.embedding_table.parallel_optimizer = False
        self.position_embedding.gather.shard(((1, 1), (config.dp,)))
        self.position_embedding.expand.shard(((config.dp, 1),))
        self.blocks = nn.CellList()
        fusion_group_num = 4
        fusion_group_size = config.num_layers // fusion_group_num
        fusion_group_size = max(fusion_group_size, 1)

        num_layers = config.num_layers - 1
        self.num_layers = num_layers

        for i in range(num_layers):
            per_block = Block(config, i + 1).set_comm_fusion(int(i / fusion_group_size) + 2)
            per_block.recompute()
            per_block.attention.dropout.dropout_gen_mask.recompute(False)
            per_block.attention.prob_dropout.dropout_gen_mask.recompute(False)
            per_block.output.dropout.dropout_gen_mask.recompute(False)
            per_block.attention.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
            per_block.attention.prob_dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
            per_block.output.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
            self.blocks.append(per_block)

        if config.self_layernorm:
            self.layernorm = LayerNorm((config.embedding_size,), config.dp).to_float(
                mstype.float32).set_comm_fusion(
                int((num_layers - 1) / fusion_group_size) + 2)
        else:
            self.layernorm = nn.LayerNorm((config.embedding_size,)).to_float(
                mstype.float32).set_comm_fusion(
                int((num_layers - 1) / fusion_group_size) + 2)
            self.layernorm.layer_norm.shard(((config.dp, 1, 1), (1,), (1,)))
        self.layernorm.gamma.parallel_optimizer = False
        self.layernorm.beta.parallel_optimizer = False
        self.use_past = config.use_past
        self.past = tuple([None] * config.num_layers)
        self.add = P.TensorAdd().shard(((config.dp, 1, 1), (config.dp, 1, 1)))
        self.expand_dims = P.ExpandDims().shard(((config.dp, 1, 1),))
        self.dtype = config.compute_dtype
        self.dropout = nn.Dropout(1 - config.dropout_rate)
        self.dropout.dropout_gen_mask.shard(((config.dp, 1, 1),))
        self.dropout.dropout_do_mask.shard(((config.dp, 1, 1),))

        if config.load_ckpt_path:
            # Loading the embedding table from the ckpt path:
            embedding_path = os.path.join(config.load_ckpt_path, 'top_query_embedding.npy')
            if os.path.exists(embedding_path):
                top_query_table = np.load(embedding_path)
                top_query_table_param = Tensor(top_query_table, mstype.float32)
            else:
                raise ValueError(f"{embedding_path} file not exits, please check whether top_query_embedding file exist.")
        else:
            top_query_table_param = TruncatedNormal(0.02)
            
        self.top_query_embedding = nn.Embedding(config.seq_length, config.embedding_size, \
                                                embedding_table=top_query_table_param).set_comm_fusion(
            int((config.num_layers - 1) / fusion_group_num) + 2)
        self.top_query_embedding.embedding_table.parallel_optimizer = False
        self.top_query_embedding.gather.shard(((1, 1), (config.dp,)))
        self.top_query_embedding.expand.shard(((config.dp, 1),))
        self.top_query_layer = QueryLayer(config)

        self.top_query_layer.recompute()

        self.top_query_layer.output.dropout.dropout_gen_mask.recompute(False)
        self.top_query_layer.attention.dropout.dropout_gen_mask.recompute(False)
        self.top_query_layer.attention.prob_dropout.dropout_gen_mask.recompute(False)

        self.top_query_layer.output.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
        self.top_query_layer.attention.dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)
        self.top_query_layer.attention.prob_dropout.dropout_gen_mask.add_prim_attr("_side_effect", True)

        self.top_query_layer.set_comm_fusion(int((config.num_layers - 1) / fusion_group_num) + 2)
Ejemplo n.º 25
0
    def __init__(
        self,
        num_atomtypes,
        dim_atomembedding,
        min_rbf_dis,
        max_rbf_dis,
        num_rbf,
        output_dim=1,
        rbf_sigma=None,
        trainable_rbf=False,
        distance_expansion=None,
        cutoff=None,
        cutoff_network=None,
        rescale_rbf=False,
        use_all_interactions=False,
    ):
        super().__init__()
        self.num_atomtypes = num_atomtypes
        self.dim_atomembedding = dim_atomembedding
        self.num_rbf = num_rbf
        self.distance_expansion = distance_expansion
        self.rescale_rbf = rescale_rbf
        self.output_dim = output_dim
        # ~ self.n_interactions=n_interactions

        self.network_name = 'GNN_Model'

        # make a lookup table to store embeddings for each element (up to atomic
        # number max_z) each of which is a vector of size dim_atomembedding
        self.embedding = nn.Embedding(num_atomtypes,
                                      dim_atomembedding,
                                      use_one_hot=True,
                                      embedding_table=Normal(1.0))

        self.filter = None

        self.fixed_atoms = False

        # layer for expanding interatomic distances in a basis
        if distance_expansion is not None:
            self.distance_expansion = distance_expansion(
                d_min=min_rbf_dis,
                d_max=max_rbf_dis,
                num_rbf=num_rbf,
                sigma=rbf_sigma,
                trainable=trainable_rbf)
        else:
            self.distance_expansion = None

        if cutoff_network is None:
            self.cutoff_network = None
            self.cutoff = None
        else:
            if cutoff is None:
                self.cutoff_network = cutoff_network(max_rbf_dis)
                self.cutoff = max_rbf_dis
            else:
                self.cutoff_network = cutoff_network(cutoff)
                self.cutoff = cutoff

        self.interactions = None

        self.readout = None
        self.use_all_interactions = use_all_interactions
        self.gather_interactions = None

        self.debug_fun = None

        self.ones = P.Ones()
Ejemplo n.º 26
0
def Embedding(num_embeddings, embedding_dim, padding_idx, std=0.01):
    m = nn.Embedding(num_embeddings, embedding_dim, padding_idx=padding_idx)
    return m
Ejemplo n.º 27
0
    def __init__(self, weight, vocab_size, cell, batch_size):
        super(textrcnn, self).__init__()
        self.num_hiddens = 512
        self.embed_size = 300
        self.num_classes = 2
        self.batch_size = batch_size
        k = (1 / self.num_hiddens)**0.5

        self.embedding = nn.Embedding(vocab_size,
                                      self.embed_size,
                                      embedding_table=weight)
        self.embedding.embedding_table.requires_grad = False
        self.cell = cell

        self.cast = P.Cast()

        self.h1 = Tensor(
            np.zeros(shape=(self.batch_size,
                            self.num_hiddens)).astype(np.float16))
        self.c1 = Tensor(
            np.zeros(shape=(self.batch_size,
                            self.num_hiddens)).astype(np.float16))

        if cell == "lstm":
            self.lstm = P.DynamicRNN(forget_bias=0.0)
            self.w1_fw = Parameter(np.random.uniform(
                -k, k, (self.embed_size + self.num_hiddens,
                        4 * self.num_hiddens)).astype(np.float16),
                                   name="w1_fw")
            self.b1_fw = Parameter(np.random.uniform(
                -k, k, (4 * self.num_hiddens)).astype(np.float16),
                                   name="b1_fw")
            self.w1_bw = Parameter(np.random.uniform(
                -k, k, (self.embed_size + self.num_hiddens,
                        4 * self.num_hiddens)).astype(np.float16),
                                   name="w1_bw")
            self.b1_bw = Parameter(np.random.uniform(
                -k, k, (4 * self.num_hiddens)).astype(np.float16),
                                   name="b1_bw")
            self.h1 = Tensor(
                np.zeros(shape=(1, self.batch_size,
                                self.num_hiddens)).astype(np.float16))
            self.c1 = Tensor(
                np.zeros(shape=(1, self.batch_size,
                                self.num_hiddens)).astype(np.float16))

        if cell == "vanilla":
            self.rnnW_fw = nn.Dense(self.num_hiddens, self.num_hiddens)
            self.rnnU_fw = nn.Dense(self.embed_size, self.num_hiddens)
            self.rnnW_bw = nn.Dense(self.num_hiddens, self.num_hiddens)
            self.rnnU_bw = nn.Dense(self.embed_size, self.num_hiddens)

        if cell == "gru":
            self.rnnWr_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWz_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWh_fw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWr_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWz_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.rnnWh_bw = nn.Dense(self.num_hiddens + self.embed_size,
                                     self.num_hiddens)
            self.ones = Tensor(
                np.ones(shape=(self.batch_size,
                               self.num_hiddens)).astype(np.float16))
            self.rnnWr_fw.to_float(mstype.float16)
            self.rnnWz_fw.to_float(mstype.float16)
            self.rnnWh_fw.to_float(mstype.float16)
            self.rnnWr_bw.to_float(mstype.float16)
            self.rnnWz_bw.to_float(mstype.float16)
            self.rnnWh_bw.to_float(mstype.float16)

        self.transpose = P.Transpose()
        self.reduce_max = P.ReduceMax()
        self.expand_dims = P.ExpandDims()
        self.concat = P.Concat()

        self.reshape = P.Reshape()
        self.left_pad_tensor = Tensor(
            np.zeros(
                (1, self.batch_size, self.num_hiddens)).astype(np.float16))
        self.right_pad_tensor = Tensor(
            np.zeros(
                (1, self.batch_size, self.num_hiddens)).astype(np.float16))
        self.output_dense = nn.Dense(self.num_hiddens * 1, 2)
        self.concat0 = P.Concat(0)
        self.concat2 = P.Concat(2)
        self.concat1 = P.Concat(1)
        self.text_rep_dense = nn.Dense(2 * self.num_hiddens + self.embed_size,
                                       self.num_hiddens)
        self.mydense = nn.Dense(self.num_hiddens, 2)
        self.drop_out = nn.Dropout(keep_prob=0.7)
        self.tanh = P.Tanh()
        self.sigmoid = P.Sigmoid()
        self.slice = P.Slice()
        self.text_rep_dense.to_float(mstype.float16)
        self.mydense.to_float(mstype.float16)
        self.output_dense.to_float(mstype.float16)