def __init__(self): super(log_sum_exp, self).__init__() self.maxi = P.ReduceMax() self.maxi_dim = P.ReduceMax(keep_dims=True) self.log = P.Log() self.sums = P.ReduceSum() self.exp = P.Exp()
def construct(self, input_ids, attn_mask, token_type_ids, context_mask, square_mask, packing_mask, cache_mask, para_start_mapping, sent_end_mapping): """construct function""" state = self.encoder(attn_mask, input_ids, token_type_ids) para_state = self.bmm(ops.Cast()(para_start_mapping, dst_type), ops.Cast()(state, dst_type)) # [B, 2, D] sent_state = self.bmm(ops.Cast()(sent_end_mapping, dst_type), ops.Cast()(state, dst_type)) # [B, max_sent, D] q_type, start, end, para_logit, sent_logit = self.downstream( ops.Cast()(para_state, dst_type2), ops.Cast()(sent_state, dst_type2), state, context_mask) outer = start[:, :, None] + end[:, None] outer_mask = cache_mask outer_mask = square_mask * outer_mask[None] outer = outer - 1e30 * (1 - outer_mask) outer = outer - 1e30 * packing_mask[:, :, None] max_row = ops.ReduceMax()(outer, 2) y1 = ops.Argmax()(max_row) max_col = ops.ReduceMax()(outer, 1) y2 = ops.Argmax()(max_col) return start, end, q_type, para_logit, sent_logit, y1, y2
def __init__(self): super(log_softmax, self).__init__() self.maxi = P.ReduceMax() self.log = P.Log() self.sums = P.ReduceSum() self.exp = P.Exp() self.axis = -1 self.concat = P.Concat(-1) self.expanddims = P.ExpandDims()
def __init__(self, sparse=False): super(SoftmaxCrossEntropyExpand, self).__init__() self.exp = ops.Exp() self.sum = ops.ReduceSum(keep_dims=True) self.onehot = ops.OneHot() self.on_value = Tensor(1.0, mstype.float32) self.off_value = Tensor(0.0, mstype.float32) self.div = ops.RealDiv() self.log = ops.Log() self.sum_cross_entropy = ops.ReduceSum(keep_dims=False) self.mul = ops.Mul() self.mul2 = ops.Mul() self.mean = ops.ReduceMean(keep_dims=False) self.sparse = sparse self.max = ops.ReduceMax(keep_dims=True) self.sub = ops.Sub()
def __init__(self, filters, n_filters, max_chars_per_token, char_embed_dim, n_chars, n_highway, output_dim, activation): super().__init__() self.max_chars_per_token = max_chars_per_token # activation for convolutions if activation == 'tanh': self._activation = nn.Tanh() elif activation == 'relu': self._activation = nn.ReLU() else: raise ValueError("Unknown activation") # init char_embedding self.char_embedding = Embedding(n_chars + 1, char_embed_dim, embedding_table=Uniform(1.0), padding_idx=0) # run convolutions convolutions = [] for (width, num) in filters: if activation == 'tanh': cnn_weight_init = Normal(np.sqrt(1.0 / width * char_embed_dim)) elif activation == 'relu': cnn_weight_init = Uniform(0.05) conv = nn.Conv1d(in_channels=char_embed_dim, out_channels=num, kernel_size=width, has_bias=True, weight_init=cnn_weight_init, pad_mode='valid') convolutions.append(conv) self._convolutions = nn.CellList(convolutions) # highway layers self._highways = HighWay(n_filters, n_highway, 'relu') # projection layer self._projection = nn.Dense(n_filters, output_dim, has_bias=True, weight_init=Normal(np.sqrt(1.0 / n_filters))) # array operations self.transpose = P.Transpose() self.concat = P.Concat(-1) self.max = P.ReduceMax()