def forward(self, cur_input, state, encoder_outputs):
        # 当循环神经网络有多个隐藏层时,取靠近输出层的单层隐藏状态
        single_layer_state = [state[0][-1].expand_dims(0)]
        
        
        encoder_outputs = encoder_outputs.reshape((self.max_seq_len, -1,
                                                   self.encoder_num_hiddens))

        hidden_broadcast = nd.broadcast_axis(single_layer_state[0], axis=0,
                                             size=self.max_seq_len)
        encoder_outputs_and_hiddens = nd.concat(encoder_outputs,
                                                hidden_broadcast, dim=2)

        energy = self.attention(encoder_outputs_and_hiddens)

        batch_attention = nd.softmax(energy, axis=0).transpose((1, 2, 0))
        batch_encoder_outputs = encoder_outputs.swapaxes(0, 1)
        decoder_context = nd.batch_dot(batch_attention, batch_encoder_outputs)
        #改这里
        input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input), axis=1),
            decoder_context, dim=2)
        concat_input = self.rnn_concat_input(input_and_context).reshape((1, -1, 0))

        concat_input = self.dropout(concat_input)

        state = [nd.broadcast_axis(single_layer_state[0], axis=0,size=self.num_layers)]

        output, state = self.rnn(concat_input, state)

        output = self.dropout(output)
        output = self.out(output).reshape((-3, -1))
        return output, state
예제 #2
0
    def _forward_alg(self, feats):
        '''
        CRF 概率计算的前向算法
        feats:长度为句子长度的列表,列表中每个元素为一个 nd.array,代表一批中每个词的特征向量,形状为: (batch_size, tagset_size)
        '''
        # 定义前向向量
        batch_size = feats[0].shape[0]
        alphas = [[-10000.] * self.tagset_size]
        alphas[0][self.tag2idx[START_TAG]] = 0.
        alphas = nd.array(alphas, ctx=self.ctx)
        alphas = nd.broadcast_axis(alphas, axis=0, size=batch_size)

        for feat in feats:
            alphas_t = []
            for next_tag in range(self.tagset_size):
                emit_score = feat[:, next_tag].reshape((batch_size, -1))
                # trans_score 中的每个分值是从 i 转移到 next_tag 的评分
                trans_score = nd.broadcast_axis(
                    self.transitions.data()[next_tag].reshape((1, -1)),
                    axis=0,
                    size=batch_size)
                next_tag_var = alphas + emit_score + trans_score

                # log_sum_exp(next_tag_var)得到的值的形状: (batch_size, 1)
                alphas_t.append(log_sum_exp(next_tag_var))

            alphas = nd.concat(*alphas_t, dim=1)
        terminal_var = alphas + self.transitions.data()[self.tag2idx[STOP_TAG]]
        alpha = log_sum_exp(terminal_var)
        alpha = alpha.reshape((-1, ))
        assert alpha.shape == (batch_size, )
        return alpha
예제 #3
0
 def forward(self, x_num, x_cat):
     # preprocess
     store_embed = self.store_embedding(x_cat[:, :, 0])
     embed_concat = nd.concat(
         store_embed,
         #x_cat[:,:,1:2],
         self.nYear_embedding(x_cat[:, :, 2]),
         self.nMonth_embedding(x_cat[:, :, 3]),
         self.mDay_embedding(x_cat[:, :, 4]),
         self.wday_embedding(x_cat[:, :, 5]),
         self.nHour_embedding(x_cat[:, :, 6]),
         dim=2)
     input_store = nd.broadcast_axis(store_embed[:, 0:1, :],
                                     axis=1,
                                     size=168)
     output = nd.concat(input_store,
                        x_num.reshape((x_num.shape[0], x_num.shape[1], 1)),
                        dim=2)
     output = nd.transpose(output, axes=(0, 2, 1))
     #kip_connections = []
     for sub_TCN in self.TCN:
         output = self.residue_forward(output, sub_TCN)
     output = nd.transpose(output, axes=(0, 2, 1))
     output = nd.reshape(output, (output.shape[0], 1, -1))
     #print(output.shape)
     output = nd.broadcast_axis(output, axis=1, size=24)
     #post_concat = nd.concat(output, embed_concat, dim=2)
     output = self.net(self.post_res(output, embed_concat))
     return output
예제 #4
0
    def _viterbi_decode(self, feats):
        '''
        CRF 的预测算法,维特比算法,即根据特征找出最好的路径
        feats:长度为句子长度的列表,列表中每个元素为一个 nd.array,代表一批中每个词的特征向量,形状为: (batch_size, tagset_size)
        '''
        backpointers = []
        batch_size = feats[0].shape[0]
        vvars = nd.full((1, self.tagset_size), -10000., ctx=self.ctx)
        vvars[0, self.tag2idx[START_TAG]] = 0
        # vvars 形状:(batch_size, tagset_size)
        vvars = nd.broadcast_axis(vvars, axis=0, size=batch_size)

        for feat in feats:
            bptrs_t = []
            viterbivars_t = []

            for next_tag in range(self.tagset_size):
                next_tag_var = vvars + nd.broadcast_axis(
                    self.transitions.data()[next_tag].reshape((1, -1)),
                    axis=0,
                    size=batch_size)
                # best_tag_id 形状(batch_size, 1)
                best_tag_id = nd.argmax(next_tag_var, axis=1, keepdims=True)
                bptrs_t.append(best_tag_id)
                # viterbivars_t 列表中每个元素的形状为 (batch_size, 1)
                viterbivars_t.append(
                    nd.pick(next_tag_var, best_tag_id, axis=1, keepdims=True))
            vvars = (nd.concat(*viterbivars_t, dim=1) + feat)
            # bptrs_t 形状 :(batch_size, tagset_size)
            bptrs_t = nd.concat(*bptrs_t, dim=1)
            backpointers.append(bptrs_t)

        # 转换到 STOP_TAG
        terminal_var = vvars + self.transitions.data()[self.tag2idx[START_TAG]]
        best_tag_id = nd.argmax(terminal_var, axis=1)
        # path_score 形状(batch_size, )
        path_score = nd.pick(terminal_var, best_tag_id, axis=1)

        # 根据反向指针 backpointers 去解码最好的路径
        best_path = [best_tag_id]
        for bptrs_t in reversed(backpointers):
            best_tag_id = nd.pick(bptrs_t, best_tag_id, axis=1)
            best_path.append(best_tag_id)
        # 移除开始符号
        # start 形状为 (batch_size, )
        start = best_path.pop()
        # 检查start是否为开始符号
        for i in range(batch_size):
            assert start[i].asscalar() == self.tag2idx[START_TAG]
        best_path.reverse()

        # 构建最佳路径的矩阵
        new_best_path = []
        for best_tag_id in best_path:
            best_tag_id = best_tag_id.reshape((-1, 1))
            new_best_path.append(best_tag_id)
        best_path_matrix = nd.concat(*new_best_path, dim=1)

        return path_score, best_path_matrix
예제 #5
0
    def forward(self, cur_input, state, encoder_outputs):
        # 当RNN为多层时,取最靠近输出层的单层隐含状态。
        # state.shape is [(1, batch_size, decoder_hidden_dim)]
        single_layer_state = [state[0][-1].expand_dims(0)]
        # encoder_outputs.shape is (max_seq_len, batch_size * encoder_hidden_dim)
        encoder_outputs = encoder_outputs.reshape(
            (self.max_seq_len, -1, self.encoder_hidden_dim))
        # single_layer_state尺寸: [(1, batch_size, decoder_hidden_dim)]
        # hidden_broadcast尺寸: (max_seq_len, batch_size, decoder_hidden_dim)
        hidden_broadcast = nd.broadcast_axis(single_layer_state[0],
                                             axis=0,
                                             size=self.max_seq_len)

        # encoder_outputs_and_hiddens尺寸:
        # (max_seq_len, batch_size, encoder_hidden_dim + decoder_hidden_dim)
        encoder_outputs_and_hiddens = nd.concat(encoder_outputs,
                                                hidden_broadcast,
                                                dim=2)

        # energy尺寸: (max_seq_len, batch_size, 1)
        energy = self.attention(encoder_outputs_and_hiddens)

        # batch_attention尺寸: (batch_size, 1, max_seq_len)
        batch_attention = nd.softmax(energy, axis=0).transpose((1, 2, 0))

        # batch_encoder_outputs尺寸: (batch_size, max_seq_len, encoder_hidden_dim)
        batch_encoder_outputs = encoder_outputs.swapaxes(0, 1)

        # decoder_context尺寸: (batch_size, 1, encoder_hidden_dim)
        decoder_context = nd.batch_dot(batch_attention, batch_encoder_outputs)

        # cur_input尺寸: (batch_size,)
        # input_and_context尺寸: (batch_size, 1, decoder_hidden_dim + encoder_hidden_dim )
        input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input),
                                                     axis=1),
                                      decoder_context,
                                      dim=2)
        # concat_input尺寸: (1, batch_size, decoder_hidden_dim)
        concat_input = self.rnn_concat_input(input_and_context).reshape(
            (1, -1, 0))
        concat_input = self.dropout(concat_input)

        # 当RNN为多层时,用单层隐含状态初始化各个层的隐含状态。
        state = [
            nd.broadcast_axis(single_layer_state[0],
                              axis=0,
                              size=self.num_layers)
        ]

        # XXX 注意:state 是 [nd.NDArray]
        output, state = self.rnn(concat_input, state)
        output = self.dropout(output)
        output = self.out(output)
        output = nd.reshape(output, (-3, -1))
        # output尺寸: (batch_size * 1, output_dim)
        return output, state
예제 #6
0
    def simple_broadcast(self, *args):
        """
        Broadcast a sequence of 1 dimensional arrays.

        Example::

            >>> simple_broadcast(
                astensor([1]),
                astensor([2, 2]),
                astensor([3, 3, 3]))
            [[1. 1. 1.]
             [2. 2. 2.]
             [3. 3. 3.]]

        Args:
            args (Array of Tensors): Sequence of arrays

        Returns:
            MXNet NDArray: The sequence broadcast together.
        """
        max_dim = max(map(len, args))
        broadcast = []
        for arg in args:
            if len(arg) < max_dim:
                broadcast.append(
                    nd.broadcast_axis(arg[0],
                                      axis=len(arg.shape) - 1,
                                      size=max_dim))
            else:
                broadcast.append(arg)
        return nd.stack(*broadcast)
예제 #7
0
 def forward(self, xNum, xCat):
     # embed the auxiliary variables
     embedConcat = nd.concat(
             self.stationEmbedding(xCat[:,:,0]),
             self.nYearEmbedding(xCat[:,:,1]),
             self.nMonthEmbedding(xCat[:,:,2]),
             self.mDayEmbedding(xCat[:,:,3]),
             self.wdayEmbedding(xCat[:,:,4]),
             self.nHourEmbedding(xCat[:,:,5]),
                          dim=2)
     # The training and testing
     embedTrain = embedConcat[:,0:168,:]
     embedTest = embedConcat[:,168:,:]
     # The input series for encoding
     xNum = xNum.reshape((xNum.shape[0],xNum.shape[1],1))
     inputSeries = nd.concat(xNum, embedTrain, dim=2)
     inputSeries = nd.transpose(inputSeries, axes=(0,2,1))
     for subTCN in self.encoder:
         inputSeries = subTCN(inputSeries)
     # The output 
     output = inputSeries
     output = nd.transpose(output, axes=(0,2,1))
     output = nd.reshape(output,(output.shape[0], 1,-1))
     output = nd.broadcast_axis(output, axis=1, size=self.outputSize)
     # the decoder
     output=self.outputLayer(self.decoder(output, embedTest))
     #output = nd.sum_axis(output, axis=2)
     mu = nd.sum_axis(self.mu(output),  axis=2)
     sigma = nd.sum_axis(self.sigma(output),  axis=2)
     return mu, sigma
예제 #8
0
 def forward(self, xNum, xCat):
     # embed the auxiliary variables
     embedConcat = nd.concat(self.stationEmbedding(xCat[:, :, 0]),
                             self.nYearEmbedding(xCat[:, :, 1]),
                             self.nMonthEmbedding(xCat[:, :, 2]),
                             self.mDayEmbedding(xCat[:, :, 3]),
                             self.wdayEmbedding(xCat[:, :, 4]),
                             self.nHourEmbedding(xCat[:, :, 5]),
                             dim=2)
     # The training and testing
     embedTrain = embedConcat[:, 0:self.inputSize,
                              ]  # only consider the id for the input series
     embedTest = embedConcat[:, self.inputSize:, :]
     # The input series for encoding
     xNum = xNum.reshape((xNum.shape[0], xNum.shape[1], 1))
     #inputSeries = nd.concat(xNum, embedTrain, dim=2)
     inputSeries = xNum
     inputSeries = nd.transpose(inputSeries, axes=(0, 2, 1))
     for subTCN in self.encoder:
         inputSeries = subTCN(inputSeries)
     # The output
     output = inputSeries
     output = nd.transpose(output, axes=(0, 2, 1))
     output = nd.reshape(output, (output.shape[0], 1, -1))
     output = nd.broadcast_axis(output, axis=1, size=self.outputSize)
     # the decoder
     output = self.outputLayer(self.decoder(output, embedTest))
     #output = nd.sum_axis(output, axis=2)
     # The quantile outputs
     outputQ10 = nd.sum_axis(self.Q10(output), axis=2)
     outputQ50 = nd.sum_axis(self.Q50(output), axis=2)
     outputQ90 = nd.sum_axis(self.Q90(output), axis=2)
     return outputQ10, outputQ50, outputQ90
예제 #9
0
        def update_alphas(data, alphas):
            """Calculate the batch update alpha for each time step

            Args:
                data (NDArray): NDArray shape: (seq_len, batch_size, self.tagset_size)
                alphas (NDArray): NDArray shape: (batch_size, self.tagset_size)
            """

            # alphas_t shape: (self.tagset_size, batch_size, self.tagset_size)
            alphas_t = nd.broadcast_axis(nd.expand_dims(alphas, axis=0), axis=0,
                                         size=self.tagset_size)
            # emit_score shape: (self.tagset_size, batch_size, 1)
            emit_score = nd.transpose(nd.expand_dims(data, axis=0), axes=(2, 1, 0))
            # trans_score shape: (self.tagset_size, 1, self.tagset_size)
            trans_score = nd.expand_dims(self.transitions.data(), axis=1)

            # next_tag_var shape: (self.tagset_size, batch_size, self.tagset_size)
            next_tag_var = alphas_t + emit_score + trans_score

            # alphas shape: (self.tagset_size, batch_size)
            alphas = log_sum_exp(next_tag_var)
            # alphas shape: (batch_size, self.tagset_size)
            alphas = nd.transpose(alphas, axes=(1, 0))

            return data, alphas
 def forward(self, x_num, x_cat):
     # preprocess
     embed_concat = nd.concat(self.id_embedding(x_cat[:, :, 0]),
                              self.nYear_embedding(x_cat[:, :, 1]),
                              self.nMonth_embedding(x_cat[:, :, 2]),
                              self.mDay_embedding(x_cat[:, :, 3]),
                              self.wday_embedding(x_cat[:, :, 4]),
                              self.nHour_embedding(x_cat[:, :, 5]),
                              dim=2)
     embed_train = embed_concat[:, 0:168, :]
     embed_test = embed_concat[:, 168:, :]
     x_num = x_num.reshape(x_num.shape[0], x_num.shape[1], -1)
     conv_x = nd.concat(x_num, embed_train, dim=2)
     conv_x = nd.transpose(conv_x, axes=(0, 2, 1))
     output = conv_x
     #skip_connections = []
     for sub_TCN in self.TCN:
         output = self.residue_forward(output, sub_TCN)
         #skip_connections.append(skip)
     #print(skip_connections)
     #output1 = sum([s[:,:,-1] for s in skip_connections]
     output = output[:, :, -1:]
     output = nd.transpose(output, axes=(0, 2, 1))
     output = nd.broadcast_axis(output, axis=1, size=24)
     post_concat = nd.concat(output, embed_test, dim=2)
     output = self.net(self.post_res(post_concat))
     output = output.reshape(output.shape[0], -1)
     return output
def attention_forward(model, enc_states, dec_state):
    # 将解码器隐藏状态广播到和编码器隐藏状态形状相同后进行连结
    dec_states = nd.broadcast_axis(
        dec_state.expand_dims(0), axis=0, size=enc_states.shape[0])
    enc_and_dec_states = nd.concat(enc_states, dec_states, dim=2)
    e = model(enc_and_dec_states)  # 形状为(时间步数, 批量大小, 1)
    alpha = nd.softmax(e, axis=0)  # 在时间步维度做softmax运算
    return (alpha * enc_states).sum(axis=0)  # 返回背景变量
예제 #12
0
def make_values_L(range_min, range_max, L, batch_size):

    logs_L = np.linspace(0, np.log(range_max * 1.0 / range_min), num=L / 2)

    values_L = nd.array(1.0 / range_min * np.exp(-logs_L))

    values_L = nd.expand_dims(nd.expand_dims(values_L, axis=0), axis=2)

    return nd.broadcast_axis(values_L, axis=0, size=batch_size)
예제 #13
0
 def begin_state(self,*args,**kwargs):
     #useage decoder.begin_state(batch_size=4,func=nd.zeros,vid_feat = features)
     video_feat = kwargs['vid_feat']
     init_state = self.vid_init_state(video_feat)#
     init_state = init_state.reshape(1,*(init_state.shape)) # LNC for layer is 1
     kwargs.pop('vid_feat')
     states = self.rnn.begin_state(*args,**kwargs)
     states[0] = nd.broadcast_axis(init_state,size=self.num_layers,axis=0)
     return states
def attention_forward(model, enc_states, dec_state):
    # 将解码器隐藏状态⼴播到跟编码器隐藏状态形状相同后进⾏连结。
    dec_states = nd.broadcast_axis(dec_state.expand_dims(0),
                                   axis=0,
                                   size=enc_states.shape[0])
    enc_and_dec_states = nd.concat(enc_states, dec_states, dim=2)
    e = model(enc_and_dec_states)  # 形状为(时间步数,批量⼤⼩,1)。
    alpha = nd.softmax(e, axis=0)  # 在时间步维度做 softmax 运算。
    return (alpha * enc_states).sum(axis=0)  # 返回背景变量。
예제 #15
0
def attention_forward(model, enc_states, dec_state):
    # Broadcast adding for hidden state of decoder and of encoder
    dec_states = nd.broadcast_axis(dec_state.expand_dims(0),
                                   axis=0,
                                   size=enc_states.shape[0])
    enc_and_dec_states = nd.concat(enc_states, dec_states, dim=2)
    e = model(enc_and_dec_states)  # shape is (times, batchsize, 1)
    alpha = nd.softmax(e, axis=0)  # softmax on times dimension
    return (alpha * enc_states).sum(axis=0)  # return background variables
 def postprocess(self, x, embed_test):
     output = nd.relu(x)
     output = self.conv_post_1(output)
     output = nd.relu(output)
     output = self.conv_post_2(output)
     output = nd.broadcast_axis(output, axis=1, size=24)
     embed_result = nd.concat(output, embed_test, dim=2)
     output = self.outputLayer(self.net(embed_result))
     output = output.reshape(output.shape[0], -1)
     return output
예제 #17
0
    def forward(self, x_num, x_cat):
        # preprocess
        store_embed = self.store_embedding(x_cat[:, :, 0])
        embed_concat = nd.concat(
            store_embed,
            #x_cat[:,:,1:2],
            self.nYear_embedding(x_cat[:, :, 2]),
            self.nMonth_embedding(x_cat[:, :, 3]),
            self.mDay_embedding(x_cat[:, :, 4]),
            self.wday_embedding(x_cat[:, :, 5]),
            self.nHour_embedding(x_cat[:, :, 6]),
            dim=2)
        input_store = nd.broadcast_axis(store_embed[:, 0:1, :],
                                        axis=1,
                                        size=168)

        # store id (in-dependent feature) is added as an extra channel to the input ts
        # add extra dimmension to ts (x_num) and concat with store id
        output = nd.concat(input_store,
                           x_num.reshape((x_num.shape[0], x_num.shape[1], 1)),
                           dim=2)

        # reshape to (m, channels, width)
        output = nd.transpose(output, axes=(0, 2, 1))

        #kip_connections = []
        for sub_TCN in self.TCN:  # iteriraš čez ResidualTCN blocke
            output = self.residue_forward(
                output, sub_TCN
            )  # residue_forward je metoda definirana spodaj v tem classu

        output = nd.transpose(output, axes=(0, 2, 1))
        output = nd.reshape(output, (output.shape[0], 1, -1))
        #print(output.shape)

        output = nd.broadcast_axis(output, axis=1, size=24)
        #post_concat = nd.concat(output, embed_concat, dim=2)

        output = self.net(self.post_res(output, embed_concat))
        return output
예제 #18
0
파일: Kernels.py 프로젝트: dyukha/KernelGCN
    def rbf_kernels(self, x: NDArray, y: NDArray):
        """
        Computes exp(-c ||x - y||^2).
        ||x - y||^2 = x . x + y . y - 2 x . y
        Compute each term separately. x is are original features, y are features used for similarity
        """

        cross_products = nd.dot(x, y)

        x_products = nd.sum(sqr(x), axis=1, keepdims=True)
        x_products = nd.broadcast_axis(x_products, axis=1, size=y.shape[1])

        y_products = nd.sum(sqr(y), axis=0, keepdims=True)
        y_products = nd.broadcast_axis(y_products, axis=0, size=x.shape[0])

        sqr_difs = x_products + y_products - 2 * cross_products
        print(nd.mean(x_products), nd.mean(y_products),
              nd.mean(cross_products))
        print(nd.mean(sqr_difs))
        res = nd.exp(-0.5 * sqr_difs)
        print(res.shape)
        return res
예제 #19
0
    def positional(x):
        batch_size, length, model_dim = x.shape
        # (length, 1)
        pos = nd.arange(length).expand_dims(1)

        # (1, model_dim/2),  10000^(2i/model_dim)
        div = nd.power(10000, nd.arange(model_dim / 2) * 2 / model_dim)

        out = nd.zeros((length, model_dim))

        out[:, 0::2] = nd.sin(pos / div)
        out[:, 1::2] = nd.cos(pos / div)

        return nd.broadcast_axis(out.expand_dims(0), axis=0, size=batch_size)
예제 #20
0
def make_dynamic_dec(T, values_L):

    values_T = nd.array(np.linspace(1, T, num=T), ctx=values_L.context)

    values_T = nd.expand_dims(nd.expand_dims(values_T, axis=0), axis=2)

    values_T = nd.broadcast_axis(values_T, axis=0, size=values_L.shape[0])

    values_TL = nd.batch_dot(values_T, values_L, transpose_b=True)

    values_sin = nd.sin(values_TL)
    values_cos = nd.cos(values_TL)

    return nd.concat(values_sin, values_cos, dim=2)
예제 #21
0
 def forward(self, x_num, x_cat):
     # preprocess
     store_embed = self.store_embedding(x_cat[:, :, 0])
     embed_concat = nd.concat(store_embed,
                              self.nYear_embedding(x_cat[:, :, 2]),
                              self.nMonth_embedding(x_cat[:, :, 3]),
                              self.mDay_embedding(x_cat[:, :, 4]),
                              self.wday_embedding(x_cat[:, :, 5]),
                              self.nHour_embedding(x_cat[:, :, 6]),
                              self.holiday_embedding(x_cat[:, :, 7]),
                              dim=2)
     input_store = nd.broadcast_axis(store_embed[:, 0:1, :],
                                     axis=1,
                                     size=168)
     output = nd.concat(input_store,
                        x_num.reshape((x_num.shape[0], x_num.shape[1], 1)),
                        dim=2)
     output = nd.transpose(output, axes=(0, 2, 1))
     #kip_connections = []
     for sub_TCN in self.TCN:
         output = self.residue_forward(output, sub_TCN)
         #skip_connections.append(output)
     #output =  sum([s[:,:,-1] for s in skip_connections])
     output = nd.transpose(output, axes=(0, 2, 1))
     output = nd.reshape(output, (output.shape[0], 1, -1))
     #print(output.shape)
     output = nd.broadcast_axis(output, axis=1, size=self.output_ax)
     #post_concat = nd.concat(output, embed_concat, dim=2)
     output = self.net(self.post_res(output, embed_concat))
     output_Q10 = self.Q10(output)
     output_Q10 = output_Q10.reshape(output_Q10.shape[0], -1)
     output_Q50 = self.Q50(output)
     output_Q50 = output_Q50.reshape(output_Q50.shape[0], -1)
     output_Q90 = self.Q90(output)
     output_Q90 = output_Q90.reshape(output_Q90.shape[0], -1)
     return output_Q10, output_Q50, output_Q90
예제 #22
0
 def forward(self, query, key, value, mask):
     # Project and transpose from (batch_size, num_items, units) to
     # (batch_size * num_heads, num_items, p), where units = p * num_heads.
     query, key, value = [
         transpose_qkv(X, self.num_heads)
         for X in (self.W_q(query), self.W_k(key), self.W_v(value))
     ]
     if mask is not None:
         # Replicate mask for each of the num_heads heads
         mask = nd.broadcast_axis(nd.expand_dims(mask, axis=1),
                                 axis=1, size=self.num_heads)\
                 .reshape(shape=(-1, 0, 0), reverse=True)
     output = self.attention(query, key, value, mask)
     # Transpose from (batch_size * num_heads, num_items, p) back to
     # (batch_size, num_items, units)
     return transpose_output(output, self.num_heads)
    def forward(self, cur_input, state, encoder_outputs):
        # 当循环神经网络有多个隐藏层时,取靠近输出层的单层隐藏状态
        single_layer_state = [state[0][-1].expand_dims(0)]

        #encoder_output的shape是(max_seq_len,-1,encoder_num_hiddens)
        encoder_outputs = encoder_outputs.reshape(
            (self.max_seq_len, -1, self.encoder_num_hiddens))
        # [16,1,0], got [60,16,256]
        last_outputs = nn.Dense(1, in_units=encoder_outputs, flatten=True)
        last_outputs = last_outputs.reshape((4, 256))
        print(last_outputs.shape)
        # last_outputs = encoder_outputs[-1, :, :] #  [16,256]
        #last_outputs = nd.expand_dims(last_outputs, axis=1)
        #print(last_outputs.shape)

        #         last_outputs.swapaxes(0,1) # [16, 1, 256]
        #         hidden_broadcast = nd.broadcast_axis(single_layer_state[0], axis=0,
        #                                              size=self.max_seq_len)
        #         encoder_outputs_and_hiddens = nd.concat(encoder_outputs,
        #                                                 hidden_broadcast, dim=2)
        #print("after swap: " , last_outputs.shape)
        #print(nd.expand_dims(self.embedding(cur_input), axis=1).shape)
        input_and_context = nd.concat(nd.expand_dims(self.embedding(cur_input),
                                                     axis=1),
                                      last_outputs,
                                      dim=2)
        concat_input = self.rnn_concat_input(input_and_context).reshape(
            (1, -1, 0))

        concat_input = self.dropout(concat_input)

        state = [
            nd.broadcast_axis(single_layer_state[0],
                              axis=0,
                              size=self.num_layers)
        ]

        output, state = self.rnn(concat_input, state)

        output = self.dropout(output)
        #print('output.shape:\n')
        #print(output.shape)
        output = self.out(output)
        #print('dense shape:\n')
        #print(output.shape)
        output = output.reshape((-3, -1))
        return output, state
예제 #24
0
def attention_forward(attention, enc_states, dec_state):
    """
    enc_states: (max_length, batch_size, num_hiddens)
    dec_state: (batch_size, num_hidden)
    
    """
    dec_state = dec_state.expand_dims(0)
    dec_states = nd.broadcast_axis(dec_state, axis=0, size=enc_states.shape[0])
    enc_and_dec_states = nd.concat(enc_states, dec_states, dim=2)
    """
    enc_and_dec_states: (max_length, batch_size, 2*num_hiddens)
    attention(enc_and_dec_states): (max_length, batch_size, 1)
    alpha_prob: (max_length, batch_size, 1)
    
    """

    alpha_prob = nd.softmax(attention(enc_and_dec_states), axis=0)
    return (alpha_prob * enc_states).sum(axis=0)
예제 #25
0
 def forward(self, x_num, x_cat):
     # preprocess
     embed_concat = nd.concat(
         self.id_embedding(x_cat[:,:,0]),
         self.nYear_embedding(x_cat[:,:,1]),
         self.nMonth_embedding(x_cat[:,:,2]), dim=2)
     output = self.preprocess(x_num)
     for sub_TCN in self.TCN:
         output = self.residue_forward(output, sub_TCN)
     #output=nd.transpose(output, axes=(0,2,1))
     #print(output.shape)
     output = nd.broadcast_axis(output, axis=1, size=12)
     post_concat = nd.concat(output, embed_concat, dim=2)
     output=self.net(self.post_res(post_concat))
     #
     output_mu = self.mu(output)
     output_mu = output_mu.reshape(output_mu.shape[0],-1)
     output_sigma = self.sigma(output)
     output_sigma = output_sigma.reshape(output_sigma.shape[0],-1)
     return output_mu, output_sigma
예제 #26
0
 def forward(self, x_num, x_cat):
     # preprocess
     embed_concat = nd.concat(
             self.store_embedding(x_cat[:,:,0]),
             #x_cat[:,:,1:2],
             self.nYear_embedding(x_cat[:,:,2]),
             self.nMonth_embedding(x_cat[:,:,3]),
             self.mDay_embedding(x_cat[:,:,4]),
             self.wday_embedding(x_cat[:,:,5]),
             self.nHour_embedding(x_cat[:,:,6]), dim=2)
     output = self.preprocess(x_num)
     for sub_TCN in self.TCN:
         output = self.residue_forward(output, sub_TCN)
     output=nd.transpose(output, axes=(0,2,1))
     output = nd.reshape(output,(output.shape[0], 1,-1))
     #print(output.shape)
     output = nd.broadcast_axis(output, axis=1, size=24)
     #post_concat = nd.concat(output, embed_concat, dim=2)
     output=self.net(self.post_res(output,embed_concat))
     return output
예제 #27
0
        def update_decode(data, states):
            feat = data
            vvars_iner = states

            # vvars_t shape: (self.tagset_size, batch_size, self.tagset_size)
            vvars_t = nd.broadcast_axis(nd.expand_dims(vvars_iner, axis=0), axis=0,
                                        size=self.tagset_size)
            # trans shape: (self.tagset_size, 1, self.tagset_size)
            trans = nd.expand_dims(self.transitions.data(), axis=1)
            next_tag_var = vvars_t + trans

            # best_tag_id shape: (self.tagset_size, batch_size)
            best_tag_id = nd.argmax(next_tag_var, axis=-1)

            # bptrs_t, viterbivars_t  shape :(batch_size, tagset_size)
            viterbivars_t = nd.transpose(nd.pick(next_tag_var, best_tag_id, axis=-1), axes=(1, 0))
            bptrs_t = nd.transpose(best_tag_id, axes=(1, 0))

            vvars_iner = viterbivars_t + feat

            return bptrs_t, vvars_iner
예제 #28
0
 def forward(self, x_num, x_cat):
     # preprocess
     embed_concat = nd.concat(self.store_embedding(x_cat[:, :, 0]),
                              x_cat[:, :, 1:2],
                              self.nYear_embedding(x_cat[:, :, 2]),
                              self.nMonth_embedding(x_cat[:, :, 3]),
                              self.mDay_embedding(x_cat[:, :, 4]),
                              self.wday_embedding(x_cat[:, :, 5]),
                              self.nHour_embedding(x_cat[:, :, 6]),
                              dim=2)
     output = self.preprocess(x_num)
     conv_result = self.pool1(self.conv2(self.conv1(output)))
     #conv_result = conv_result.reshape((conv_result.shape[0], conv_result.shape[2]))
     #output=nd.transpose(output, axes=(0,2,1))
     #output = nd.reshape(output,(output.shape[0], 1,-1))
     #print(output.shape)
     output = nd.broadcast_axis(conv_result, axis=1, size=24)
     post_concat = nd.concat(output, embed_concat, dim=2)
     #output=self.net(self.post_res(post_concat))
     output = self.net(self.post_res(output, embed_concat))
     return output
예제 #29
0
    def simple_broadcast(self, *args):
        """
        Broadcast a sequence of 1 dimensional arrays.

        Example:

            >>> import pyhf
            >>> pyhf.set_backend(pyhf.tensor.mxnet_backend())
            >>> pyhf.tensorlib.simple_broadcast(
            ...   pyhf.tensorlib.astensor([1]),
            ...   pyhf.tensorlib.astensor([2, 3, 4]),
            ...   pyhf.tensorlib.astensor([5, 6, 7]))
            <BLANKLINE>
            [[1. 1. 1.]
             [2. 3. 4.]
             [5. 6. 7.]]
            <NDArray 3x3 @cpu(0)>

        Args:
            args (Array of Tensors): Sequence of arrays

        Returns:
            MXNet NDArray: The sequence broadcast together.
        """
        args = [self.astensor(arg) for arg in args]
        max_dim = max(map(len, args))
        try:
            assert not [arg for arg in args if 1 < len(arg) < max_dim]
        except AssertionError as error:
            log.error(
                'ERROR: The arguments must be of compatible size: 1 or %i',
                max_dim)
            raise error

        broadcast = [
            arg if len(arg) > 1 else nd.broadcast_axis(
                arg[0], axis=len(arg.shape) - 1, size=max_dim) for arg in args
        ]
        return nd.stack(*broadcast)
def attention_forward(attention, cur_features, cur_state):
    """
    cur_features: (batch_size, num_features)
    cur_state: (batch_size, num_hidden)
    
    """
    
    cur_features = cur_features.T # (num_features, batch_size)
    cur_state = cur_state.expand_dims(0) # (1, batch_size, num_hidden)
    cur_states = nd.broadcast_axis(cur_state, axis=0, size=cur_features.shape[0])
    cur_features = cur_features.expand_dims(2)
    features_and_cur_states = nd.concat(cur_features, cur_states, dim=2)
    
    """
    features_and_cur_states: (num_features, batch_size, num_hiddens + 1)
    attention(features_and_cur_states): (num_features, batch_size, 1)
    alpha_prob: (num_features, batch_size, 1)
    
    """
    
    alpha_prob = nd.softmax(attention(features_and_cur_states), axis=0)
    return (alpha_prob * cur_states).sum(axis=0)