Python softmaxの例、utils.mathUtil.softmax Pythonの例

コード例 #1

0

ファイルを表示

ファイル: encdec.py プロジェクト: pcgreat/RNNLG

    def _gen(self, node):

        # input word embedding
        wv_t = sigmoid(self.Wemb_np[node.wordid, :])
        # attention
        b_t = np.zeros((node.sv.shape[0]))
        for j in range(node.sv.shape[0]):
            b_t[j] = np.dot(
                tanh(
                    np.dot(np.concatenate([wv_t, node.h, node.sv[j]], axis=0),
                           self.Wha_np)), self.Vha_np)
        b_t = softmax(b_t)
        sv_emb_t = np.dot(b_t, node.sv)
        da_emb_t = tanh(node.a + sv_emb_t)
        # compute ig, fg, og together and slice it
        gates_t = np.dot(np.concatenate([wv_t, node.h, da_emb_t], axis=0),
                         self.Wgate_np)
        ig = sigmoid(gates_t[:self.dh])
        fg = sigmoid(gates_t[self.dh:self.dh * 2])
        og = sigmoid(gates_t[self.dh * 2:self.dh * 3])
        cx_t = tanh(gates_t[self.dh * 3:])
        # update lstm internal state
        c_t = np.multiply(ig, cx_t) + np.multiply(fg, node.c)
        # obtain new hiddne layer
        h_t = np.multiply(og, tanh(c_t))
        # compute output distribution target word prob
        o_t = softmax(np.dot(h_t, self.Who_np))
        # make sure we won't sample unknown word
        o_t[0] = 0.0
        selected_words = np.argsort(o_t)[::-1][:self.beamwidth].tolist()
        # return results
        return selected_words, o_t[selected_words], c_t, h_t

コード例 #2

0

ファイルを表示

    def _sample_from_posterior(self, belief_t, degree_t, intent_t,
                               masked_source_t, masked_target_t):

        # Posterior
        # response encoding
        target_intent_t = bidirectional_read(self.tfEncoder, self.tbEncoder,
                                             masked_target_t)
        source_intent_t = bidirectional_read(self.sfEncoder, self.sbEncoder,
                                             masked_source_t)
        # posterior parameterisation
        q_logit_t = np.dot(
            tanh(
                np.dot(belief_t, self.Wq1_backup) +
                np.dot(degree_t, self.Wq2_backup) +
                np.dot(source_intent_t, self.Wq3_backup) +
                np.dot(target_intent_t, self.Wq4_backup)), self.Wq5_backup)

        # sampling from a scaled posterior
        sortedIndex = np.argsort(q_logit_t)[::-1][:self.topN]
        topN_posterior_t = softmax(q_logit_t[sortedIndex])
        z_t = sortedIndex[np.argmax(
            np.random.multinomial(n=1, pvals=topN_posterior_t))]
        #z_t = sortedIndex[0]
        z_t = np.expand_dims(z_t, axis=0)
        print sortedIndex[:3]
        print softmax(q_logit_t)[sortedIndex][:3]
        print 'Posterior  : %s' % sortedIndex
        print 'probability: %s' % topN_posterior_t

        return z_t, softmax(q_logit_t)

コード例 #3

0

ファイルを表示

ファイル: encdec.py プロジェクト: jtraviesor/tf-playground

 def _gen(self,node):
     
     # input word embedding
     wv_t = sigmoid(self.Wemb_np[node.wordid,:])
     # attention
     b_t = np.zeros((node.sv.shape[0]))
     for j in range(node.sv.shape[0]):
         b_t[j] = np.dot(tanh(np.dot(
             np.concatenate([wv_t,node.h,node.sv[j]],axis=0),
             self.Wha_np)),self.Vha_np)
     b_t = softmax(b_t)
     sv_emb_t = np.dot(b_t,node.sv)
     da_emb_t = tanh( node.a+sv_emb_t )
     # compute ig, fg, og together and slice it
     gates_t = np.dot( np.concatenate([wv_t,node.h,da_emb_t],axis=0),
             self.Wgate_np)
     ig  = sigmoid(gates_t[:self.dh])
     fg  = sigmoid(gates_t[self.dh:self.dh*2])
     og  = sigmoid(gates_t[self.dh*2:self.dh*3])
     cx_t= tanh( gates_t[self.dh*3:] )
     # update lstm internal state
     c_t = np.multiply(ig,cx_t) + np.multiply(fg,node.c)
     # obtain new hiddne layer
     h_t = np.multiply(og,tanh(c_t))
     # compute output distribution target word prob
     o_t = softmax( np.dot(h_t,self.Who_np) )
     # make sure we won't sample unknown word
     o_t[0] = 0.0
     selected_words = np.argsort(o_t)[::-1][:self.beamwidth].tolist()
     # return results
     return selected_words, o_t[selected_words], c_t, h_t

コード例 #4

0

ファイルを表示

ファイル: policy.py プロジェクト: jungle-cat/NNDIAL

    def _sample_from_posterior(self, belief_t, degree_t, intent_t, 
            masked_source_t, masked_target_t):
        
        # Posterior
        # response encoding
        target_intent_t = bidirectional_read(
                self.tfEncoder, self.tbEncoder, masked_target_t)
        source_intent_t = bidirectional_read(
                self.sfEncoder, self.sbEncoder, masked_source_t)
        # posterior parameterisation
        q_logit_t = np.dot(tanh( 
                        np.dot(belief_t,self.Wq1_backup)+
                        np.dot(degree_t,self.Wq2_backup)+
                        np.dot(source_intent_t,self.Wq3_backup)+
                        np.dot(target_intent_t,self.Wq4_backup)),
                    self.Wq5_backup )

        # sampling from a scaled posterior
        sortedIndex = np.argsort(q_logit_t)[::-1][:self.topN]
        topN_posterior_t= softmax(q_logit_t[sortedIndex])
        z_t = sortedIndex[ np.argmax( np.random.multinomial(n=1,
                    pvals=topN_posterior_t))    ]
        #z_t = sortedIndex[0]
        z_t = np.expand_dims(z_t,axis=0)
        print sortedIndex[:3]
        print softmax(q_logit_t)[sortedIndex][:3]
        print 'Posterior  : %s' % sortedIndex
        print 'probability: %s' % topN_posterior_t
        
        return z_t, softmax(q_logit_t)

コード例 #5

0

ファイルを表示

ファイル: tracker.py プロジェクト: jungle-cat/NNDIAL

    def track(self, b_jm1, ngs_j, ngt_j):
        
        # padding dummy
        Wfbs = np.concatenate([self.Wfbs_backup,\
                np.zeros_like(self.Wfbs_backup[-1:,:])],axis=0)
        Wfbt = np.concatenate([self.Wfbt_backup,\
                np.zeros_like(self.Wfbt_backup[-1:,:])],axis=0)

        # new belief
        g_j = np.zeros(self.dbm1)
        for v in range(self.dbm1):
            ngsidx = ngs_j[v]
            ngtidx = ngt_j[v]
            
            fembs_v = np.sum(Wfbs[ngsidx,:],axis=0)
            fembt_v = np.sum(Wfbt[ngtidx,:],axis=0)
            
            g_jv = np.dot( self.Whb_backup, sigmoid(
                fembs_v + fembt_v + 
                b_jm1[v] *self.Wrec_backup +
                b_jm1[-1]*self.Wnon_backup +
                self.B0_backup ))
            g_j[v] = g_jv

        g_j = np.concatenate([g_j,self.B_backup],axis=0)
        b_j = softmax( g_j )
        
        return b_j

コード例 #6

0

ファイルを表示

ファイル: tracker.py プロジェクト: jungle-cat/NNDIAL

 def track(self, ms_j, mt_jm1, 
         ssrcpos_js, vsrcpos_js, starpos_js, vtarpos_js ):
     
     # cnn encoding
     ngms_j,  uttms_j   = self.sCNN.read(ms_j)
     ngmt_jm1,uttmt_jm1 = self.tCNN.read(mt_jm1)
     
     # padding dummy vector 
     ngms_j = np.concatenate([ngms_j,np.zeros_like(ngms_j[-1:,:])],axis=0)
     ngmt_jm1 = np.concatenate([ngmt_jm1,np.zeros_like(ngmt_jm1[-1:,:])],axis=0)
     
     # source features
     ssrcemb_js = np.sum(ngms_j[ssrcpos_js,:],axis=0)
     vsrcemb_js = np.sum(ngms_j[vsrcpos_js,:],axis=0)
     src_js = np.concatenate([ssrcemb_js,vsrcemb_js,uttms_j],axis=0)
     
     # target features
     staremb_js = np.sum(ngmt_jm1[starpos_js,:],axis=0)
     vtaremb_js = np.sum(ngmt_jm1[vtarpos_js,:],axis=0)
     tar_js = np.concatenate([staremb_js,vtaremb_js,uttmt_jm1],axis=0)
    
     # update g_j 
     g_j = np.dot( self.Whb_backup, sigmoid(
             np.dot(src_js,self.Wfbs_backup) +
             np.dot(tar_js,self.Wfbt_backup) +
             self.B0_backup ))
     
     # update b_j
     g_j = np.array([g_j,self.B_backup])
     b_j = softmax( g_j )
     
     return b_j

コード例 #7

0

ファイルを表示

    def _gen(self, node):

        # input word embedding
        wv_t = sigmoid(self.Wemb_np[node.wordid, :])
        # compute ig, fg, og together and slice it
        gates_t = np.dot(np.concatenate([wv_t, node.h, node.sv], axis=0),
                         self.Wgate_np)
        ig = sigmoid(gates_t[:self.dh])
        fg = sigmoid(gates_t[self.dh:self.dh * 2])
        og = sigmoid(gates_t[self.dh * 2:self.dh * 3])
        # compute reading rg
        rg = sigmoid(
            np.dot(np.concatenate([wv_t, node.h, node.sv], axis=0),
                   self.Wrgate_np))
        # compute proposed cell value
        cx_t = np.tanh(
            np.dot(np.concatenate([wv_t, node.h], axis=0), self.Wcx_np))
        # update DA 1-hot vector
        sv_t = np.multiply(rg, node.sv)
        # update lstm internal state
        c_t = np.multiply(ig, cx_t) + \
              np.multiply(fg, node.c) + \
              tanh(np.dot(np.concatenate([node.a, sv_t], axis=0), self.Wfc_np))
        # obtain new hiddne layer
        h_t = np.multiply(og, tanh(c_t))
        # compute output distribution target word prob
        o_t = softmax(np.dot(h_t, self.Who_np))
        # make sure we won't sample unknown word
        o_t[0] = 0.0
        selected_words = np.argsort(o_t)[::-1][:self.beamwidth].tolist()
        # return results
        return selected_words, o_t[selected_words], sv_t, c_t, h_t

コード例 #8

0

ファイルを表示

    def track(self, b_jm1, ngs_j, ngt_j):

        # padding dummy
        Wfbs = np.concatenate([self.Wfbs_backup,\
                np.zeros_like(self.Wfbs_backup[-1:,:])],axis=0)
        Wfbt = np.concatenate([self.Wfbt_backup,\
                np.zeros_like(self.Wfbt_backup[-1:,:])],axis=0)

        # new belief
        g_j = np.zeros(self.dbm1)
        for v in range(self.dbm1):
            ngsidx = ngs_j[v]
            ngtidx = ngt_j[v]

            fembs_v = np.sum(Wfbs[ngsidx, :], axis=0)
            fembt_v = np.sum(Wfbt[ngtidx, :], axis=0)

            g_jv = np.dot(
                self.Whb_backup,
                sigmoid(fembs_v + fembt_v + b_jm1[v] * self.Wrec_backup +
                        b_jm1[-1] * self.Wnon_backup + self.B0_backup))
            g_j[v] = g_jv

        g_j = np.concatenate([g_j, self.B_backup], axis=0)
        b_j = softmax(g_j)

        return b_j

コード例 #9

0

ファイルを表示

    def track(self, ms_j, mt_jm1, ssrcpos_js, vsrcpos_js, starpos_js,
              vtarpos_js):

        # cnn encoding
        ngms_j, uttms_j = self.sCNN.read(ms_j)
        ngmt_jm1, uttmt_jm1 = self.tCNN.read(mt_jm1)

        # padding dummy vector
        ngms_j = np.concatenate([ngms_j, np.zeros_like(ngms_j[-1:, :])],
                                axis=0)
        ngmt_jm1 = np.concatenate(
            [ngmt_jm1, np.zeros_like(ngmt_jm1[-1:, :])], axis=0)

        # source features
        ssrcemb_js = np.sum(ngms_j[ssrcpos_js, :], axis=0)
        vsrcemb_js = np.sum(ngms_j[vsrcpos_js, :], axis=0)
        src_js = np.concatenate([ssrcemb_js, vsrcemb_js, uttms_j], axis=0)

        # target features
        staremb_js = np.sum(ngmt_jm1[starpos_js, :], axis=0)
        vtaremb_js = np.sum(ngmt_jm1[vtarpos_js, :], axis=0)
        tar_js = np.concatenate([staremb_js, vtaremb_js, uttmt_jm1], axis=0)

        # update g_j
        g_j = np.dot(
            self.Whb_backup,
            sigmoid(
                np.dot(src_js, self.Wfbs_backup) +
                np.dot(tar_js, self.Wfbt_backup) + self.B0_backup))

        # update b_j
        g_j = np.array([g_j, self.B_backup])
        b_j = softmax(g_j)

        return b_j

コード例 #10

0

ファイルを表示

ファイル: sclstm.py プロジェクト: jtraviesor/tf-playground

 def _gen(self,node):
     
     # input word embedding
     wv_t = sigmoid(self.Wemb_np[node.wordid,:])
     # compute ig, fg, og together and slice it
     gates_t = np.dot( np.concatenate(
         [wv_t,node.h,node.sv],axis=0),self.Wgate_np)
     ig  = sigmoid(gates_t[:self.dh])
     fg  = sigmoid(gates_t[self.dh:self.dh*2])
     og  = sigmoid(gates_t[self.dh*2:self.dh*3])
     # compute reading rg
     rg  = sigmoid(np.dot(np.concatenate(
         [wv_t,node.h,node.sv],axis=0),self.Wrgate_np))
     # compute proposed cell value
     cx_t= np.tanh(np.dot(np.concatenate(
         [wv_t,node.h],axis=0),self.Wcx_np))
     # update DA 1-hot vector
     sv_t = np.multiply(rg,node.sv)
     # update lstm internal state
     c_t =   np.multiply(ig,cx_t) +\
             np.multiply(fg,node.c)+\
             tanh(np.dot(np.concatenate([node.a,sv_t],axis=0),self.Wfc_np))
     # obtain new hiddne layer
     h_t = np.multiply(og,tanh(c_t))
     # compute output distribution target word prob
     o_t = softmax( np.dot(h_t,self.Who_np) )
     # make sure we won't sample unknown word
     o_t[0] = 0.0
     selected_words = np.argsort(o_t)[::-1][:self.beamwidth].tolist()
     # return results
     return selected_words, o_t[selected_words], sv_t, c_t, h_t

コード例 #11

0

ファイルを表示

ファイル: policy.py プロジェクト: jungle-cat/NNDIAL

    def _sample_from_prior(self, belief_t, degree_t, intent_t):

        # prior parameterisarion
        hidden_t = tanh(np.dot(belief_t,self.Ws1_backup)+
                        np.dot(degree_t,self.Ws2_backup)+
                        np.dot(intent_t,self.Ws3_backup) )
        p_logit_t = np.dot(
                        tanh(np.dot(hidden_t,self.Wp1_backup)+self.bp1_backup),
                    self.Wp2_backup)
        
        # sampling from prior
        sortedIndex = np.argsort(p_logit_t)[::-1][:self.topN]
        topN_prior_t= softmax(p_logit_t[sortedIndex])
        z_t = sortedIndex[ np.argmax( np.random.multinomial(n=1,
                    pvals=topN_prior_t))    ]
        z_t = np.expand_dims(z_t,axis=0)
        # choose the top N samples 
        print 'Sample     : %s' % z_t
        print 'Prior dist.: %s' % sortedIndex
        print 'probability: %s' % topN_prior_t
        print
        return z_t, softmax(p_logit_t)

コード例 #12

0

ファイルを表示

    def _sample_from_prior(self, belief_t, degree_t, intent_t):

        # prior parameterisarion
        hidden_t = tanh(np.dot(belief_t, self.Ws1_backup) +
                        np.dot(degree_t, self.Ws2_backup) +
                        np.dot(intent_t, self.Ws3_backup))
        p_logit_t = np.dot(
            tanh(np.dot(hidden_t, self.Wp1_backup) + self.bp1_backup),
            self.Wp2_backup)

        # sampling from prior
        sortedIndex = np.argsort(p_logit_t)[::-1][:self.topN]
        topN_prior_t = softmax(p_logit_t[sortedIndex])
        z_t = sortedIndex[np.argmax(np.random.multinomial(n=1,
                                                          pvals=topN_prior_t))]
        z_t = np.expand_dims(z_t, axis=0)
        # choose the top N samples 
        print 'Sample     : %s' % z_t
        print 'Prior dist.: %s' % sortedIndex
        print 'probability: %s' % topN_prior_t
        print
        return z_t, softmax(p_logit_t)

コード例 #13

0

ファイルを表示

    def decide(self, belief_t, degree_t, intent_t, ohidden_tjm1, wemb_tj):
        # embed
        degree_t = tanh(np.dot(degree_t, self.Ws2_backup))
        intent_t = tanh(np.dot(intent_t, self.Ws3_backup))

        # score bias
        score_t=np.dot(ohidden_tjm1,self.Wa1_backup)+\
                np.dot(wemb_tj, self.Wa2_backup)+\
                np.dot(belief_t,self.Wa3_backup)

        # attention mechanism
        atten_t = softmax(np.dot(sigmoid(score_t), self.Va1_backup))
        actEmb = tanh(np.dot(atten_t, belief_t) + degree_t + intent_t)
        return np.expand_dims(actEmb, axis=0)

コード例 #14

0

ファイルを表示

ファイル: policy.py プロジェクト: jungle-cat/NNDIAL

    def decide(self, belief_t, degree_t, intent_t, ohidden_tjm1, wemb_tj):
        # embed 
        degree_t = tanh(np.dot(degree_t,self.Ws2_backup))
        intent_t = tanh(np.dot(intent_t,self.Ws3_backup))

        # score bias
        score_t=np.dot(ohidden_tjm1,self.Wa1_backup)+\
                np.dot(wemb_tj, self.Wa2_backup)+\
                np.dot(belief_t,self.Wa3_backup)

        # attention mechanism
        atten_t= softmax(np.dot(sigmoid(score_t),self.Va1_backup))
        actEmb = tanh(np.dot(atten_t,belief_t)+degree_t+intent_t)
        return np.expand_dims(actEmb,axis=0)

コード例 #15

0

ファイルを表示

    def track(self, ngs_j, ngt_j):
        # padding dummy
        Wfbs = np.concatenate([self.Wfbs_backup, \
                               np.zeros_like(self.Wfbs_backup[-1:, :])], axis=0)
        Wfbt = np.concatenate([self.Wfbt_backup, \
                               np.zeros_like(self.Wfbt_backup[-1:, :])], axis=0)

        # new belief
        fembs_v = np.sum(Wfbs[ngs_j, :], axis=0)
        fembt_v = np.sum(Wfbt[ngt_j, :], axis=0)

        g_j = np.dot(self.Whb_backup,
                     sigmoid(fembs_v + fembt_v + self.B0_backup))

        g_j = np.array([g_j, self.B_backup])
        b_j = softmax(g_j)

        return b_j

コード例 #16

0

ファイルを表示

ファイル: tracker.py プロジェクト: jungle-cat/NNDIAL

    def track(self, ngs_j, ngt_j):
        
        # padding dummy
        Wfbs = np.concatenate([self.Wfbs_backup,\
                np.zeros_like(self.Wfbs_backup[-1:,:])],axis=0)
        Wfbt = np.concatenate([self.Wfbt_backup,\
                np.zeros_like(self.Wfbt_backup[-1:,:])],axis=0)

        # new belief
        fembs_v = np.sum(Wfbs[ngs_j,:],axis=0)
        fembt_v = np.sum(Wfbt[ngt_j,:],axis=0)
            
        g_j = np.dot( self.Whb_backup, sigmoid(
                fembs_v + fembt_v + self.B0_backup ))

        g_j = np.array([g_j,self.B_backup])
        b_j = softmax( g_j )
        
        return b_j

コード例 #17

0

ファイルを表示

    def track(self, b_jm1, ms_j, mt_jm1, ssrcpos_js, vsrcpos_js, starpos_js,
              vtarpos_js):

        # cnn encoding
        ngms_j, uttms_j = self.sCNN.read(ms_j)
        ngmt_jm1, uttmt_jm1 = self.tCNN.read(mt_jm1)

        # padding dummy vector
        ngms_j = np.concatenate([ngms_j, np.zeros_like(ngms_j[-1:, :])],
                                axis=0)
        ngmt_jm1 = np.concatenate(
            [ngmt_jm1, np.zeros_like(ngmt_jm1[-1:, :])], axis=0)

        # new belief
        g_j = np.zeros(self.dbm1)
        for v in range(self.dbm1):
            # source features
            ssrcemb_jsv = np.sum(ngms_j[ssrcpos_js[v], :], axis=0)
            vsrcemb_jsv = np.sum(ngms_j[vsrcpos_js[v], :], axis=0)
            src_jsv = np.concatenate([ssrcemb_jsv, vsrcemb_jsv, uttms_j],
                                     axis=0)
            # target features
            staremb_jsv = np.sum(ngmt_jm1[starpos_js[v], :], axis=0)
            vtaremb_jsv = np.sum(ngmt_jm1[vtarpos_js[v], :], axis=0)
            tar_jsv = np.concatenate([staremb_jsv, vtaremb_jsv, uttmt_jm1],
                                     axis=0)

            # update g_jv value
            g_jv = np.dot(
                self.Whb_backup,
                sigmoid(
                    np.dot(src_jsv, self.Wfbs_backup) +
                    np.dot(tar_jsv, self.Wfbt_backup) +
                    b_jm1[v] * self.Wrec_backup +
                    b_jm1[-1] * self.Wnon_backup + self.B0_backup))
            g_j[v] = g_jv

        # produce new belief b_j
        g_j = np.concatenate([g_j, self.B_backup], axis=0)
        b_j = softmax(g_j)

        return b_j

コード例 #18

0

ファイルを表示

ファイル: tracker.py プロジェクト: jungle-cat/NNDIAL

    def track(self, b_jm1, ms_j, mt_jm1,
            ssrcpos_js, vsrcpos_js, starpos_js, vtarpos_js ):
        
        # cnn encoding
        ngms_j,  uttms_j   = self.sCNN.read(ms_j)
        ngmt_jm1,uttmt_jm1 = self.tCNN.read(mt_jm1)

        # padding dummy vector 
        ngms_j = np.concatenate([ngms_j,np.zeros_like(ngms_j[-1:,:])],axis=0)
        ngmt_jm1 = np.concatenate([ngmt_jm1,np.zeros_like(ngmt_jm1[-1:,:])],axis=0)
        
        # new belief
        g_j = np.zeros(self.dbm1)
        for v in range(self.dbm1):
            # source features
            ssrcemb_jsv = np.sum(ngms_j[ssrcpos_js[v],:],axis=0)
            vsrcemb_jsv = np.sum(ngms_j[vsrcpos_js[v],:],axis=0)
            src_jsv = np.concatenate([ssrcemb_jsv,vsrcemb_jsv,uttms_j],axis=0)
            # target features
            staremb_jsv = np.sum(ngmt_jm1[starpos_js[v],:],axis=0)
            vtaremb_jsv = np.sum(ngmt_jm1[vtarpos_js[v],:],axis=0)
            tar_jsv = np.concatenate([staremb_jsv,vtaremb_jsv,uttmt_jm1],axis=0)
            
            # update g_jv value
            g_jv = np.dot( self.Whb_backup, sigmoid(
                np.dot(src_jsv,self.Wfbs_backup) + 
                np.dot(tar_jsv,self.Wfbt_backup) + 
                b_jm1[v] *self.Wrec_backup +
                b_jm1[-1]*self.Wnon_backup +
                self.B0_backup ))
            g_j[v] = g_jv

        # produce new belief b_j
        g_j = np.concatenate([g_j,self.B_backup],axis=0)
        b_j = softmax( g_j )
        
        return b_j

コード例 #19

0

ファイルを表示

    def _forwardpass(self, n, intent_t, belief_vec_t, degree_t, actEmb_t,
                     scoreTable):

        # forward pass
        in_j = sigmoid(self.Wemb_backup[n.wordid])

        # action embedding
        if self.ply == 'attention':
            actEmb_tj = self.policy.decide(belief_vec_t, degree_t, intent_t,
                                           n.h, in_j)[0]
        else:  # fixed action embedding
            actEmb_tj = actEmb_t

        # syntatic memory cell and gate
        # compute i, f, o, c together and slice it
        bundle_j = np.dot(in_j, self.oWgate_backup) + \
                   np.dot(n.h, self.oUgate_backup)
        bundle_aj = np.dot(actEmb_tj, self.Wzh_backup)
        # input gate
        ig = sigmoid(bundle_j[:self.doh] + bundle_aj[:self.doh] +
                     self.b_backup[:self.doh])
        # use forget bias or not
        fg = sigmoid(bundle_j[self.doh:self.doh * 2] +
                     bundle_aj[self.doh:self.doh * 2] +
                     self.b_backup[self.doh:self.doh * 2])
        # output gate
        og = sigmoid(bundle_j[self.doh * 2:self.doh * 3] +
                     bundle_aj[self.doh * 2:self.doh * 3] +
                     self.b_backup[self.doh * 2:self.doh * 3])
        # proposed memory cell
        # reading gate, memory cell, hidden layer
        if self.struct == 'lstm_cond':  # reading gate control signal
            rg = sigmoid(bundle_j[self.doh * 4:self.doh * 5] +
                         bundle_aj[self.doh * 4:self.doh * 5] +
                         self.b_backup[self.doh * 3:])
            cx_j = tanh(bundle_j[self.doh * 3:self.doh * 4])
            oc_j = np.multiply(ig, cx_j) + \
                   np.multiply(fg, n.c) + \
                   np.multiply(rg, tanh(bundle_aj[self.doh * 3:self.doh * 4]))
            oh_j = np.multiply(og, tanh(oc_j))
            o_j = softmax(np.dot(oh_j, self.Who_backup))
        elif self.struct == 'lstm_mix':  # two signals
            rg = sigmoid(bundle_j[self.doh * 4:self.doh * 5] +
                         bundle_aj[self.doh * 4:self.doh * 5] +
                         self.b_backup[self.doh * 3:])
            cx_j = tanh(bundle_j[self.doh * 3:self.doh * 4])
            oc_j = np.multiply(ig, cx_j) + \
                   np.multiply(fg, n.c)
            oh_j = np.multiply(og, tanh(oc_j)) + \
                   np.multiply(rg, tanh(bundle_aj[self.doh * 3:self.doh * 4]))
            o_j = softmax(np.dot(oh_j, self.Who_backup))
        elif self.struct == 'lstm_lm':  # lm style
            cx_j = tanh(bundle_j[self.doh * 3:self.doh * 4] +
                        bundle_aj[self.doh * 3:self.doh * 4])
            oc_j = np.multiply(ig, cx_j) + \
                   np.multiply(fg, n.c)
            oh_j = np.multiply(og, tanh(oc_j))
            o_j = softmax(np.dot(oh_j, self.Who_backup))
        else:
            sys.exit('[ERROR]: Unseen decoder structure ' + self.struct)

        # compute output distribution, logp, and sample

        # make sure we won't sample unknown word
        o_j[0] = 0.0
        selected_words = np.argsort(o_j)[::-1][:self.beamwidth]

        # expand nodes and add additional reward
        nextnodes = []
        for wid in selected_words:  # ignore <unk> token
            # loglikelihood of current word
            logp = np.log10(o_j[wid])

            # update record for new node
            new_record = deepcopy(n.record)
            if new_record['s'].has_key(wid):
                new_record['s'][wid] += 1
            if new_record['v'].has_key(wid):
                new_record['v'][wid] += 1

            # create new node and score it
            node = BeamSearchNode(oh_j, oc_j, n, wid, \
                                  n.logp + logp, n.leng + 1, new_record)

            # store nodes
            nextnodes.append( \
                (-node.eval(self.repeat_penalty, self.token_reward, \
                            scoreTable, self.alpha), node))

        return nextnodes

コード例 #20

0

ファイルを表示

ファイル: decoder.py プロジェクト: jungle-cat/NNDIAL

    def _forwardpass(self, n, intent_t, belief_vec_t, degree_t, actEmb_t,
            scoreTable):
        
        # forward pass
        in_j    = sigmoid( self.Wemb_backup[n.wordid] )
        
        # action embedding
        if self.ply=='attention':
            actEmb_tj = self.policy.decide(belief_vec_t, 
                    degree_t, intent_t, n.h, in_j)[0]
        else: # fixed action embedding
            actEmb_tj = actEmb_t

        # syntatic memory cell and gate
        # compute i, f, o, c together and slice it
        bundle_j =  np.dot(in_j,self.oWgate_backup) +\
                    np.dot(n.h, self.oUgate_backup)
        bundle_aj=  np.dot(actEmb_tj,self.Wzh_backup)
        # input gate
        ig   = sigmoid( bundle_j[:self.doh]+
                        bundle_aj[:self.doh]+
                        self.b_backup[:self.doh])
        # use forget bias or not
        fg = sigmoid(   bundle_j[self.doh:self.doh*2]+
                        bundle_aj[self.doh:self.doh*2]+
                        self.b_backup[self.doh:self.doh*2])
        # output gate
        og   = sigmoid( bundle_j[self.doh*2:self.doh*3]+
                        bundle_aj[self.doh*2:self.doh*3]+
                        self.b_backup[self.doh*2:self.doh*3])
        # proposed memory cell
        # reading gate, memory cell, hidden layer
        if self.struct=='lstm_cond': # reading gate control signal
            rg   = sigmoid( bundle_j[self.doh*4:self.doh*5]+
                            bundle_aj[self.doh*4:self.doh*5]+
                            self.b_backup[self.doh*3:])
            cx_j =  tanh(bundle_j[self.doh*3:self.doh*4])
            oc_j =  np.multiply(ig,cx_j)+\
                    np.multiply(fg,n.c)+\
                    np.multiply(rg,tanh(bundle_aj[self.doh*3:self.doh*4]))
            oh_j = np.multiply(og,tanh(oc_j))
            o_j  = softmax( np.dot(oh_j, self.Who_backup) )
        elif self.struct=='lstm_mix':# two signals   
            rg   = sigmoid( bundle_j[self.doh*4:self.doh*5]+
                            bundle_aj[self.doh*4:self.doh*5]+
                            self.b_backup[self.doh*3:])
            cx_j =  tanh(bundle_j[self.doh*3:self.doh*4])
            oc_j =  np.multiply(ig,cx_j)+\
                    np.multiply(fg,n.c)
            oh_j =  np.multiply(og,tanh(oc_j))+\
                    np.multiply(rg,tanh(bundle_aj[self.doh*3:self.doh*4]))
            o_j  = softmax( np.dot(oh_j, self.Who_backup) )
        elif self.struct=='lstm_lm': # lm style
            cx_j =  tanh(   bundle_j[self.doh*3:self.doh*4]+
                            bundle_aj[self.doh*3:self.doh*4])
            oc_j =  np.multiply(ig,cx_j)+\
                    np.multiply(fg,n.c)
            oh_j =  np.multiply(og,tanh(oc_j))
            o_j  = softmax( np.dot(oh_j, self.Who_backup) )
        else:
            sys.exit('[ERROR]: Unseen decoder structure '+self.struct)
       
        # compute output distribution, logp, and sample
       
        # make sure we won't sample unknown word
        o_j[0] = 0.0
        selected_words = np.argsort(o_j)[::-1][:self.beamwidth]
 
        # expand nodes and add additional reward
        nextnodes = []
        for wid in selected_words: # ignore <unk> token
            # loglikelihood of current word
            logp = np.log10(o_j[wid])

            # update record for new node
            new_record = deepcopy(n.record)
            if new_record['s'].has_key(wid):
                new_record['s'][wid] += 1
            if new_record['v'].has_key(wid):
                new_record['v'][wid] += 1
            
            # create new node and score it
            node = BeamSearchNode(oh_j,oc_j,n,wid,\
                    n.logp+logp,n.leng+1,new_record)
            
            # store nodes
            nextnodes.append( \
                    (-node.eval(self.repeat_penalty,self.token_reward,\
                    scoreTable,self.alpha), node))

        return nextnodes