Beispiel #1
0
def gru_cell(input, hidden, w_ih, w_hh, b_ih, b_hh):
    if b_ih is None:
        gi = P.MatMul(False, True)(input, w_ih)
        gh = P.MatMul(False, True)(hidden, w_hh)
    else:
        gi = P.MatMul(False, True)(input, w_ih) + b_ih
        gh = P.MatMul(False, True)(hidden, w_hh) + b_hh
    i_r, i_i, i_n = P.Split(1, 3)(gi)
    h_r, h_i, h_n = P.Split(1, 3)(gh)

    resetgate = P.Sigmoid()(i_r + h_r)
    inputgate = P.Sigmoid()(i_i + h_i)
    newgate = P.Tanh()(i_n + resetgate * h_n)
    hy = newgate + inputgate * (hidden - newgate)

    return hy
Beispiel #2
0
    def __init__(self, inc, outc, kernel_size=3, padding=1, stride=1, has_bias=False, modulation=True):
        super(DeformConv2d, self).__init__()
        self.kernel_size = kernel_size
        self.padding = padding
        self.stride = stride
        self.zero_padding = nn.Pad(((0, 0), (0, 0), (padding, padding), (padding, padding)))
        self.conv = nn.Conv2d(inc, outc, kernel_size=kernel_size, pad_mode='valid', padding=0,
                              stride=kernel_size, has_bias=has_bias)

        self.p_conv = nn.Conv2d(inc, 2*kernel_size*kernel_size, kernel_size=self.kernel_size,
                                pad_mode='pad', padding=self.padding, stride=self.stride)

        self.modulation = modulation
        if modulation:
            self.m_conv = nn.Conv2d(inc, kernel_size*kernel_size, kernel_size=self.kernel_size,
                                    pad_mode='valid', padding=0, stride=self.stride)
        if kernel_size % 2 == 0:
            raise ValueError("Only odd number is supported, but current kernel sizeis {}".format(kernel_size))
        self.N = kernel_size * kernel_size
        self.begin = kernel_size // 2
        self.sigmoid = ops.Sigmoid()
        self.dtype = ops.DType()
        self.perm_list = (0, 2, 3, 1)
        self.transpose = ops.Transpose()
        self.floor = ops.Floor()
        self.half = ops.Split(axis=-1, output_num=2)
        self.clip_value = ClipByValue()
        self.expand_dims = ops.ExpandDims()
        self.shape = ops.Shape()
        self.cast = ops.Cast()
        self._get_offset = GetOffsetPosition(self.begin, self.stride)
        self._get_surround = GetSurroundFeature()
        self._generate_fm = RegenerateFeatureMap(self.kernel_size)
Beispiel #3
0
def lstm_cell(input, hidden, w_ih, w_hh, b_ih, b_hh):
    hx, cx = hidden
    if b_ih is None:
        gates = P.MatMul(False, True)(input, w_ih) + P.MatMul(False, True)(hx, w_hh)
    else:
        gates = P.MatMul(False, True)(input, w_ih) + P.MatMul(False, True)(hx, w_hh) + b_ih + b_hh
    ingate, forgetgate, cellgate, outgate = P.Split(1, 4)(gates)
    
    ingate = P.Sigmoid()(ingate)
    forgetgate = P.Sigmoid()(forgetgate)
    cellgate = P.Tanh()(cellgate)
    outgate = P.Sigmoid()(outgate)
    
    cy = (forgetgate * cx) + (ingate * cellgate)
    hy = outgate * P.Tanh()(cy)
    
    return hy, cy
Beispiel #4
0
    def construct(self, inputs):
        current_input = inputs
        for layer in self._layers:
            carry_gate = layer[0](current_input)
            carry_gate = P.Sigmoid()(carry_gate)
            transform_gate = layer[1](current_input)
            transform_gate = self._activation(transform_gate)

            current_input = carry_gate * transform_gate + (1 -
                                                           carry_gate) * inputs
        return current_input
Beispiel #5
0
 def __init__(self, num_classes=256, log_scale_min=-7.0, reduce=True):
     super(discretized_mix_logistic_loss, self).__init__()
     self.num_classes = num_classes
     self.log_scale_min = log_scale_min
     self.reduce = reduce
     self.transpose_op = P.Transpose()
     self.exp = P.Exp()
     self.sigmoid = P.Sigmoid()
     self.softplus = Stable_softplus()
     self.log = P.Log()
     self.cast = P.Cast()
     self.logsoftmax = P.LogSoftmax(-1)
     self.expand_dims = P.ExpandDims()
     self.tile = P.Tile()
     self.maximum = P.Maximum()
     self.sums = P.ReduceSum()
     self.lse = log_sum_exp()
     self.reshape = P.Reshape()
     self.factor = self.log(Tensor((self.num_classes - 1) / 2, ms.float32))
    def construct(self, y_t_1, s_t_1, encoder_outputs, encoder_feature,
                  enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab,
                  coverage, step):
        if not self.training and step == 0:
            h_decoder, c_decoder = s_t_1
            h_decoder = h_decoder.view(-1, self.hidden_dim)
            c_decoder = c_decoder.view(-1, self.hidden_dim)
            s_t_hat = P.Concat(1)(
                (h_decoder, c_decoder))  # (B, 2 * hidden_dim)
            c_t, _, coverage_next = self.attention_network(
                s_t_hat, encoder_outputs, encoder_feature, enc_padding_mask,
                coverage)
            coverage = coverage_next

        y_t_1_embed = self.embedding(y_t_1)
        x = self.x_content(P.Concat(1)((c_t_1, y_t_1_embed)))
        lstm_out, s_t = self.lstm(P.ExpandDims()(x, 1), s_t_1)

        h_decoder, c_decoder = s_t
        h_decoder = h_decoder.view(-1, self.hidden_dim)
        c_decoder = c_decoder.view(-1, self.hidden_dim)
        s_t_hat = P.Concat(1)((h_decoder, c_decoder))

        c_t, attn_dist, coverage_next = self.attention_network(
            s_t_hat, encoder_outputs, encoder_feature, enc_padding_mask,
            coverage)

        if self.training or step > 0:
            coverage = coverage_next

        p_gen = None
        if self.pointer_gen:
            p_gen_input = P.Concat(1)(
                (c_t, s_t_hat, x))  # (B, 2 * 2 * hidden_dim + embed_dim)
            p_gen = self.p_gen_linear(p_gen_input)
            p_gen = P.Sigmoid()(p_gen)

        output = P.Concat(1)(
            (lstm_out.view(-1, self.hidden_dim), c_t))  # (B, hidden_dim * 3)
        output = self.out1(output)  # (B, hidden_dim)

        output = self.out2(output)  # (B, vocab_size)
        vocab_dist = P.SoftMax(1)(output)

        if self.pointer_gen:
            vocab_dist_ = p_gen * vocab_dist
            attn_dist_ = (1 - p_gen) * attn_dist

            if extra_zeros is not None:
                vocab_dist_ = P.Concat(1)((vocab_dist_, extra_zeros))

            # like pytorch scatter_add
            batch_size, attn_len = enc_batch_extend_vocab.shape
            batch_num = range_tensor(0, batch_size)
            batch_num = P.ExpandDims()(batch_num, 1)
            batch_num = P.Tile()(batch_num, (1, attn_len))
            indices = P.Pack(2)((batch_num, enc_batch_extend_vocab))
            shape = (batch_size, vocab_dist_.shape[1])
            attn_dist_ = P.ScatterNd()(indices, attn_dist_, shape)
            final_dist = vocab_dist_ + attn_dist_
        else:
            final_dist = vocab_dist

        return final_dist, s_t, c_t, attn_dist, p_gen, coverage
Beispiel #7
0
def read(args):
    """reader function"""
    db_file = args.wiki_db_file
    reader_feature_file = args.reader_feature_file
    reader_example_file = args.reader_example_file
    encoder_ck_file = args.reader_encoder_ck_file
    downstream_ck_file = args.reader_downstream_ck_file
    albert_model_path = args.albert_model_path
    reader_result_file = args.reader_result_file
    seed = args.seed
    sp_threshold = args.sp_threshold
    seq_len = args.seq_len
    batch_size = args.reader_batch_size
    para_limit = args.max_para_num
    sent_limit = args.max_sent_num

    random.seed(seed)
    np.random.seed(seed)

    t1 = time()

    doc_db = DocDB(db_file)

    generator = DataGenerator(feature_file_path=reader_feature_file,
                              example_file_path=reader_example_file,
                              batch_size=batch_size,
                              seq_len=seq_len,
                              para_limit=para_limit,
                              sent_limit=sent_limit,
                              task_type="reader")
    example_dict = generator.example_dict
    feature_dict = generator.feature_dict
    answer_dict = defaultdict(lambda: defaultdict(list))
    new_answer_dict = {}
    total_sp_dict = defaultdict(list)
    new_total_sp_dict = defaultdict(list)

    tokenizer = AlbertTokenizer.from_pretrained(albert_model_path)
    new_tokens = ['[q]', '[/q]', '<t>', '</t>', '[s]']
    tokenizer.add_tokens(new_tokens)

    reader = Reader(batch_size=batch_size,
                    encoder_ck_file=encoder_ck_file,
                    downstream_ck_file=downstream_ck_file)

    print("start reading ...")

    for _, batch in tqdm(enumerate(generator)):
        input_ids = Tensor(batch["context_idxs"], mstype.int32)
        attn_mask = Tensor(batch["context_mask"], mstype.int32)
        token_type_ids = Tensor(batch["segment_idxs"], mstype.int32)
        context_mask = Tensor(batch["context_mask"], mstype.float32)
        square_mask = Tensor(batch["square_mask"], mstype.float32)
        packing_mask = Tensor(batch["query_mapping"], mstype.float32)
        para_start_mapping = Tensor(batch["para_start_mapping"],
                                    mstype.float32)
        sent_end_mapping = Tensor(batch["sent_end_mapping"], mstype.float32)
        unique_ids = batch["unique_ids"]
        sent_names = batch["sent_names"]
        cache_mask = Tensor(
            np.tril(np.triu(np.ones((seq_len, seq_len)), 0), 30),
            mstype.float32)

        _, _, q_type, _, sent_logit, y1, y2 = reader(
            input_ids, attn_mask, token_type_ids, context_mask, square_mask,
            packing_mask, cache_mask, para_start_mapping, sent_end_mapping)

        type_prob = ops.Softmax()(q_type).asnumpy()

        answer_dict_ = convert_to_tokens(example_dict, feature_dict,
                                         batch['ids'],
                                         y1.asnumpy().tolist(),
                                         y2.asnumpy().tolist(),
                                         type_prob, tokenizer,
                                         sent_logit.asnumpy(), sent_names,
                                         unique_ids)
        for q_id in answer_dict_:
            answer_dict[q_id] = answer_dict_[q_id]

    for q_id in answer_dict:
        res = answer_dict[q_id]
        answer_text_ = res[0]
        sent_ = res[1]
        sent_names_ = res[2]
        new_answer_dict[q_id] = answer_text_

        predict_support_np = ops.Sigmoid()(Tensor(sent_,
                                                  mstype.float32)).asnumpy()

        for j in range(predict_support_np.shape[0]):
            if j >= len(sent_names_):
                break
            if predict_support_np[j] > sp_threshold:
                total_sp_dict[q_id].append(sent_names_[j])

    for _id in total_sp_dict:
        _sent_names = total_sp_dict[_id]
        for para in _sent_names:
            title = make_wiki_id(para[0], 0)
            para_original_title = doc_db.get_doc_info(title)[-1]
            para[0] = para_original_title
            new_total_sp_dict[_id].append(para)

    prediction = {'answer': new_answer_dict, 'sp': new_total_sp_dict}

    with open(reader_result_file, 'w') as f:
        json.dump(prediction, f, indent=4)

    t2 = time()

    print(f"reader cost time: {t2-t1} s")
Beispiel #8
0
def glu(input, dim=- 1):
    a, b = ops.Split(dim, 2)(input)
    return a * ops.Sigmoid()(b)
Beispiel #9
0
 def __init__(self, dim: int = -1):
     super().__init__()
     self.split = ops.Split(dim, 2)
     self.sigmoid = ops.Sigmoid()
Beispiel #10
0
 def __init__(self):
     super().__init__()
     self.sigmoid = ops.Sigmoid()
Beispiel #11
0
 def __init__(self, hidden_size):
     super(Net, self).__init__()
     self.hidden_size = hidden_size
     self.fc1 = nn.Dense(2, hidden_size)
     self.fc2 = nn.Dense(hidden_size, 1)
     self.sig = ops.Sigmoid()