Python theano_logsumexp Beispiele, bionlp.utils.utils.theano_logsumexp Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: crf_lstm_layer.py Projekt: abhyudaynj/LSTM-CRF-models

def get_crf_training_loss(Inference_Layer,t_out,numTags,params,x_in,u_in,y_in,mask_in,l_in,l_u_in,l_mask):

    unary_potential = lasagne.layers.get_output(Inference_Layer,deterministic = False,unary=True)
    unnormalized_log = lasagne.layers.get_output(Inference_Layer,deterministic = False,unary=False)
    Wp=Inference_Layer.get_CRF_params()
    logger.info('Initializing CRF structured loss')
    unary_score=T.sum(unary_potential*t_out,axis=[1,2])
    unary_sequence = t_out.dimshuffle(1,0,2)    #Reshuffling the batched unary potential shape so that it can be used for word level iterations in theano.scan

    def pairwise_collector(yprev,ycurrent,Wp):
        yip = yprev.dimshuffle(0,1,'x')*Wp.dimshuffle('x',0,1)
        yip = yip*ycurrent.dimshuffle(0,'x',1)
        yip = T.sum(yip,axis=[1,2])
        return yip


    pairwise_score,_=theano.scan(fn=pairwise_collector,sequences=[dict(input=unary_sequence,taps=[-1,0])],non_sequences=[Wp])
    pairwise_score_result = theano.function([t_out],pairwise_score)
    logger.debug('Scan pairwise score is calculated.{0}'.format(pairwise_score_result(y_in.astype('float32')).shape))
    pairwise_score=T.sum(pairwise_score,axis=[0])
    total_score=theano.function([t_out,l_in.input_var,l_u_in.input_var,l_mask.input_var],pairwise_score+unary_score)
    zee_ = theano.function([l_in.input_var,l_u_in.input_var,l_mask.input_var],theano_logsumexp(unnormalized_log,axis=2))
    cost_ = theano.function([t_out,l_in.input_var,l_u_in.input_var,l_mask.input_var],theano_logsumexp(unnormalized_log,axis=2)[:,0]-(pairwise_score+unary_score))

    logger.debug('Sentence score is calculated.The vector size {0} should be the same as the batch size.'.format(total_score(y_in.astype('float32'),x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32')).shape))
    logger.debug('Zee score is calculated.The vector size {0} should be the same as the batch size.'.format(zee_(x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32')).shape))
    logger.debug('Zee is calculated for sample 0. This should be same for all label instances.{0}'.format(zee_(x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32'))[0,:][mask_in[0]!=0]))
    logger.debug('Zee is calculated for sample 10. This should be same for all label instances.{0}'.format(zee_(x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32'))[10,:][mask_in[10]!=0]))
    logger.debug('Zee score across the sample batch. This should not be same for all batch instances unless there is no embedding initialization.{0}'.format(zee_(x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32'))[:,0]))
    logger.debug('Cost score is calculated.The vector size {0} should be the same as the batch size.'.format(cost_(y_in.astype('float32'),x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32')).shape))

    return theano_logsumexp(unnormalized_log,axis=2)[:,0]-(pairwise_score+unary_score)

Beispiel #2

0

Datei anzeigen

Datei: crf_lstm_layer.py Projekt: abhyudaynj/LSTM-CRF-models

    def get_output_for(self,net_input,**kwargs):
        if 'unary' in kwargs and kwargs['unary']==True:
            return net_input

        logger.info('Initializing the messages')
        Wp=self.W
        unary_sequence = net_input.dimshuffle(1,0,2)    #Reshuffling the batched unary potential shape so that it can be used for word level iterations in theano.scan

        def forward_scan1(unary_sequence,forward_sm,Wp):
            forward_sm=forward_sm+unary_sequence
            forward_sm=theano_logsumexp(forward_sm.dimshuffle(0,1,'x')+Wp,1)
            return forward_sm

        def backward_scan1(unary_sequence,forward_sm,Wp):
            forward_sm=forward_sm+unary_sequence
            forward_sm=theano_logsumexp(forward_sm.dimshuffle(0,1,'x')+Wp.T,1)
            return forward_sm


        forward_results,_=theano.scan(fn=forward_scan1,sequences=[unary_sequence],outputs_info=T.zeros_like(unary_sequence[0]),non_sequences=[Wp],n_steps=unary_sequence.shape[0]-1)
        backward_results,_=theano.scan(fn=backward_scan1,sequences=[unary_sequence[::-1]],outputs_info=T.zeros_like(unary_sequence[0]),non_sequences=[Wp],n_steps=unary_sequence.shape[0]-1)

        backward_results=T.concatenate([backward_results[::-1],T.zeros_like(backward_results[:1])],axis=0)
        forward_results=T.concatenate([T.zeros_like(forward_results[:1]),forward_results],axis=0)

        unnormalized_prob = forward_results+unary_sequence+backward_results
        marginal_results = theano_logsumexp(unnormalized_prob,axis=2)
        normalized_prob = unnormalized_prob - marginal_results.dimshuffle(0,1,'x')
        # provided for debugging purposes.
        #marginal_all = theano.function([l_in.input_var,l_mask.input_var],marginal_results)
        #probs=theano.function([l_in.input_var,l_mask.input_var],normalized_prob.dimshuffle(1,0,2))
        if 'normalized' in kwargs and kwargs['normalized']==True:
            return normalized_prob.dimshuffle(1,0,2)
        else:
            return unnormalized_prob.dimshuffle(1,0,2)

Beispiel #3

0

Datei anzeigen

Datei: crf_dual_layer.py Projekt: KHP-Informatics/ade_ann

def get_crf_training_loss(Inference_Layer,pairwise,t_out,numTags,params,x_in,u_in,y_in,mask_in,l_in,l_u_in,l_mask):

    pairwise_sequence=lasagne.layers.get_output(pairwise,deterministic = False)
    pairwise_sequence=pairwise_sequence.dimshuffle(1,0,2)
    pair_shape=pairwise_sequence.shape
    pairwise_potential=T.reshape(pairwise_sequence,(pair_shape[0],pair_shape[1],numTags,numTags))
    unary_potential = lasagne.layers.get_output(Inference_Layer,deterministic = False,unary=True)
    unnormalized_log = lasagne.layers.get_output(Inference_Layer,deterministic = False,unary=False)

    mask_out=lasagne.layers.get_output(l_mask)
    logger.info('Initializing CRF structured loss')
    unary_score=T.sum(unary_potential*t_out*mask_out.dimshuffle(0,1,'x'),axis=[1,2])
    unary_sequence = t_out.dimshuffle(1,0,2)    #Reshuffling the batched unary potential shape so that it can be used for word level iterations in theano.scan
    mask_out=mask_out.dimshuffle(1,0)
    m_out_f=theano.function([l_mask.input_var],mask_out)
    logger.info("rearranged output shape for mask {0}".format(m_out_f(mask_in.astype('float32')).shape))

    def pairwise_collector(yprev,ycurrent,Wp):
        yip = yprev.dimshuffle(0,1,'x')*Wp
        yip = yip*ycurrent.dimshuffle(0,'x',1)
        yip = T.sum(yip,axis=[1,2])
        return yip


    pairwise_score,_=theano.scan(fn=pairwise_collector,sequences=[dict(input=unary_sequence,taps=[-1,0]),dict(input=pairwise_potential,taps=[0])],non_sequences=None)
    pairwise_score_result = theano.function([t_out,l_in.input_var,l_u_in.input_var,l_mask.input_var],pairwise_score)
    logger.debug('Scan pairwise score is calculated.{0}'.format(pairwise_score_result(y_in.astype('float32'),x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32')).shape))
    pairwise_score=T.sum(pairwise_score*mask_out[1:,:],axis=[0])
    total_score=theano.function([t_out,l_in.input_var,l_u_in.input_var,l_mask.input_var],pairwise_score+unary_score)
    zee_ = theano.function([l_in.input_var,l_u_in.input_var,l_mask.input_var],theano_logsumexp(unnormalized_log,axis=2))
    up_ = theano.function([l_in.input_var,l_u_in.input_var,l_mask.input_var],unnormalized_log)
    cost_ = theano.function([t_out,l_in.input_var,l_u_in.input_var,l_mask.input_var],theano_logsumexp(unnormalized_log,axis=2)[:,0]-(pairwise_score+unary_score))

    logger.debug('Unary potential score is calculated.The vector size {0} should be the same as the batch size.'.format(up_(x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32')).shape))
    logger.debug('Sentence score is calculated.The vector size {0} should be the same as the batch size.'.format(total_score(y_in.astype('float32'),x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32')).shape))
    visible_zee=zee_(x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32'))
    logger.debug('Zee score shape is calculated.The vector size {0} should be the same as the batch size.'.format(visible_zee.shape))
    logger.debug('Zee score is calculated for sample 0. Zee should be the same for all label instances. \nZee :{0} \nMask: {1}'.format(visible_zee[0,:][mask_in[0]!=0],mask_in[0]))
    #logger.debug('Zee score shape for sample 0 {0}'.format(visible_zee[0,:].shape))
    logger.debug('Zee score is calculated for sample 10. Zee should be the same for all label instances. \nZee :{0} \nMask: {1}'.format(visible_zee[10,:][mask_in[10]!=0],mask_in[10]))
    #logger.debug('Zee score shape for sample 11 {0}'.format(visible_zee[11,:].shape))
    logger.debug('Zee score across the sample batch.This should not be same for all batch instances unless there is no embedding initialization.{0}'.format(zee_(x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32'))[:,0]))
    logger.debug('Cost score is calculated.The vector size {0} should be the same as the batch size.'.format(cost_(y_in.astype('float32'),x_in.astype('int32'),u_in.astype('float32'),mask_in.astype('float32')).shape))

    return theano_logsumexp(unnormalized_log,axis=2)[:,0]-(pairwise_score+unary_score)

Beispiel #4

0

Datei anzeigen

    def get_output_for(self, net_input, **kwargs):
        if 'unary' in kwargs and kwargs['unary'] == True:
            return net_input

        logger.info('Initializing the messages')
        Wp = self.W
        unary_sequence = net_input.dimshuffle(
            1, 0, 2
        )  #Reshuffling the batched unary potential shape so that it can be used for word level iterations in theano.scan

        def forward_scan1(unary_sequence, forward_sm, Wp):
            forward_sm = forward_sm + unary_sequence
            forward_sm = theano_logsumexp(
                forward_sm.dimshuffle(0, 1, 'x') + Wp, 1)
            return forward_sm

        def backward_scan1(unary_sequence, forward_sm, Wp):
            forward_sm = forward_sm + unary_sequence
            forward_sm = theano_logsumexp(
                forward_sm.dimshuffle(0, 1, 'x') + Wp.T, 1)
            return forward_sm

        forward_results, _ = theano.scan(fn=forward_scan1,
                                         sequences=[unary_sequence],
                                         outputs_info=T.zeros_like(
                                             unary_sequence[0]),
                                         non_sequences=[Wp],
                                         n_steps=unary_sequence.shape[0] - 1)
        backward_results, _ = theano.scan(fn=backward_scan1,
                                          sequences=[unary_sequence[::-1]],
                                          outputs_info=T.zeros_like(
                                              unary_sequence[0]),
                                          non_sequences=[Wp],
                                          n_steps=unary_sequence.shape[0] - 1)

        backward_results = T.concatenate(
            [backward_results[::-1],
             T.zeros_like(backward_results[:1])],
            axis=0)
        forward_results = T.concatenate(
            [T.zeros_like(forward_results[:1]), forward_results], axis=0)

        unnormalized_prob = forward_results + unary_sequence + backward_results
        marginal_results = theano_logsumexp(unnormalized_prob, axis=2)
        normalized_prob = unnormalized_prob - marginal_results.dimshuffle(
            0, 1, 'x')
        # provided for debugging purposes.
        #marginal_all = theano.function([l_in.input_var,l_mask.input_var],marginal_results)
        #probs=theano.function([l_in.input_var,l_mask.input_var],normalized_prob.dimshuffle(1,0,2))
        if 'normalized' in kwargs and kwargs['normalized'] == True:
            return normalized_prob.dimshuffle(1, 0, 2)
        else:
            return unnormalized_prob.dimshuffle(1, 0, 2)

Beispiel #5

0

Datei anzeigen

def get_crf_training_loss(Inference_Layer,pairwise,t_out,numTags,params,x_in,y_in,mask_in,l_in,l_mask):

    pairwise_sequence=lasagne.layers.get_output(pairwise,deterministic = False)
    pairwise_sequence=pairwise_sequence.dimshuffle(1,0,2)
    pair_shape=pairwise_sequence.shape
    pairwise_potential=T.reshape(pairwise_sequence,(pair_shape[0],pair_shape[1],numTags,numTags))
    unary_potential = lasagne.layers.get_output(Inference_Layer,deterministic = False,unary=True)
    unnormalized_log = lasagne.layers.get_output(Inference_Layer,deterministic = False,unary=False)

    mask_out=lasagne.layers.get_output(l_mask)
    #Wp=Inference_Layer.get_CRF_params()
    logger.info('Initializing CRF structured loss')
    unary_score=T.sum(unary_potential*t_out*mask_out.dimshuffle(0,1,'x'),axis=[1,2])
    unary_sequence = t_out.dimshuffle(1,0,2)    #Reshuffling the batched unary potential shape so that it can be used for word level iterations in theano.scan
    mask_out=mask_out.dimshuffle(1,0)
    m_out_f=theano.function([l_mask.input_var],mask_out)
    print("rearranged output shape for mask",m_out_f(mask_in.astype('float32')).shape)

    def pairwise_collector(yprev,ycurrent,Wp):
        yip = yprev.dimshuffle(0,1,'x')*Wp
        yip = yip*ycurrent.dimshuffle(0,'x',1)
        yip = T.sum(yip,axis=[1,2])
        return yip


    pairwise_score,_=theano.scan(fn=pairwise_collector,sequences=[dict(input=unary_sequence,taps=[-1,0]),dict(input=pairwise_potential,taps=[0])],non_sequences=None)
    pairwise_score_result = theano.function([t_out,l_in.input_var,l_mask.input_var],pairwise_score)
    logger.debug('Scan pairwise score is calculated.{0}'.format(pairwise_score_result(y_in.astype('float32'),x_in.astype('int32'),mask_in.astype('float32')).shape))
    pairwise_score=T.sum(pairwise_score*mask_out[1:,:],axis=[0])
    total_score=theano.function([t_out,l_in.input_var,l_mask.input_var],pairwise_score+unary_score)
    zee_ = theano.function([l_in.input_var,l_mask.input_var],theano_logsumexp(unnormalized_log,axis=2)[:,0])
    cost_ = theano.function([t_out,l_in.input_var,l_mask.input_var],theano_logsumexp(unnormalized_log,axis=2)[:,0]-(pairwise_score+unary_score))

    logger.debug('Sentence score is calculated.The vector size {0} should be the same as the batch size.'.format(total_score(y_in.astype('float32'),x_in.astype('int32'),mask_in.astype('float32')).shape))
    logger.debug('Zee score is calculated.The vector size {0} should be the same as the batch size.'.format(zee_(x_in.astype('int32'),mask_in.astype('float32'))))
    logger.debug('Cost score is calculated.The vector size {0} should be the same as the batch size.'.format(cost_(y_in.astype('float32'),x_in.astype('int32'),mask_in.astype('float32')).shape))

    #sys.exit(0)
    return theano_logsumexp(unnormalized_log,axis=2)[:,0]-(pairwise_score+unary_score)

Beispiel #6

0

Datei anzeigen

    def get_internal_results(deterministic):
        logger.info('Initializing Approx output  with normalization as {0} and deterministic as {1}'.format(normalization,deterministic))
        unary_potential =lasagne.layers.get_output(unary,deterministic=deterministic)
        forward_results=lasagne.layers.get_output(forward1,deterministic = deterministic)
        backward_results=lasagne.layers.get_output(backward1,deterministic = deterministic)
        backward_results=T.concatenate([backward_results,T.zeros_like(backward_results[:,:1,:])],axis=1)
        forward_results=T.concatenate([T.zeros_like(forward_results[:,:1,:]),forward_results],axis=1)


        unnormalized_prob = forward_results+unary_potential+backward_results
        marginal_results = theano_logsumexp(unnormalized_prob,axis=2)
        normalized_prob = unnormalized_prob - marginal_results.dimshuffle(0,1,'x')
        if normalization:
            return normalized_prob
        else:
            return unnormalized_prob

Beispiel #7

0

Datei anzeigen

Datei: crf_dual_layer.py Projekt: KHP-Informatics/ade_ann

    def get_output_for(self,inputs,normalization=False,**kwargs):
        net_input = inputs[0]
        pairwise_sequence=inputs[1].dimshuffle(1,0,2)
        assert inputs.__len__()==3,'the CRF layer should have 3 inputs: unary_potential,pairwise_potential and mask'
        mask=inputs[2]
        pair_shape=pairwise_sequence.shape
        pairwise_sequence=T.reshape(pairwise_sequence,(pair_shape[0],pair_shape[1],self.num_labels,self.num_labels))
        if 'unary' in kwargs and kwargs['unary']==True:
            return net_input[0]


        unary_sequence = net_input.dimshuffle(1,0,2)    #Reshuffling the batched unary potential shape so that it can be used for word level iterations in theano.scan

        mask = mask.dimshuffle(1,0)
        def forward_scan1(unary_sequence,Wp,mask,forward_sm):
            forward_sm=forward_sm+unary_sequence*mask.dimshuffle(0,'x')
            forward_sm=theano_logsumexp(forward_sm.dimshuffle(0,1,'x')+mask.dimshuffle(0,'x','x')*Wp,1)
            return forward_sm

        def backward_scan1(unary_sequence,Wp,mask,forward_sm):
            forward_sm=forward_sm+unary_sequence*mask.dimshuffle(0,'x')
            forward_sm=theano_logsumexp(forward_sm.dimshuffle(0,1,'x')+mask.dimshuffle(0,'x','x')*Wp.dimshuffle(0,2,1),1)
            return forward_sm

        forward_results,_=theano.scan(fn=forward_scan1,sequences=[unary_sequence,pairwise_sequence,mask],outputs_info=T.zeros_like(unary_sequence[0]),non_sequences=None,n_steps=unary_sequence.shape[0]-1)
        backward_results,_=theano.scan(fn=backward_scan1,sequences=[unary_sequence[::-1],pairwise_sequence[::-1],mask[::-1]],outputs_info=T.zeros_like(unary_sequence[0]),non_sequences=None,n_steps=unary_sequence.shape[0]-1)

        backward_results=T.concatenate([backward_results[::-1],T.zeros_like(backward_results[:1])],axis=0)
        forward_results=T.concatenate([T.zeros_like(forward_results[:1]),forward_results],axis=0)

        unnormalized_prob = forward_results+unary_sequence+backward_results
        marginal_results = theano_logsumexp(unnormalized_prob,axis=2)
        normalized_prob = unnormalized_prob - marginal_results.dimshuffle(0,1,'x')
        # provided for debugging purposes.
        #marginal_all = theano.function([l_in.input_var,l_mask.input_var],marginal_results)
        #probs=theano.function([l_in.input_var,l_mask.input_var],normalized_prob.dimshuffle(1,0,2))
        if normalization:
            return normalized_prob.dimshuffle(1,0,2)
        else:
            return unnormalized_prob.dimshuffle(1,0,2)

Beispiel #8

0

Datei anzeigen

def get_crf_training_loss(Inference_Layer, t_out, numTags, params, x_in, u_in,
                          y_in, mask_in, l_in, l_u_in, l_mask):

    unary_potential = lasagne.layers.get_output(Inference_Layer,
                                                deterministic=False,
                                                unary=True)
    unnormalized_log = lasagne.layers.get_output(Inference_Layer,
                                                 deterministic=False,
                                                 unary=False)
    Wp = Inference_Layer.get_CRF_params()
    logger.info('Initializing CRF structured loss')
    unary_score = T.sum(unary_potential * t_out, axis=[1, 2])
    unary_sequence = t_out.dimshuffle(
        1, 0, 2
    )  #Reshuffling the batched unary potential shape so that it can be used for word level iterations in theano.scan

    def pairwise_collector(yprev, ycurrent, Wp):
        yip = yprev.dimshuffle(0, 1, 'x') * Wp.dimshuffle('x', 0, 1)
        yip = yip * ycurrent.dimshuffle(0, 'x', 1)
        yip = T.sum(yip, axis=[1, 2])
        return yip

    pairwise_score, _ = theano.scan(
        fn=pairwise_collector,
        sequences=[dict(input=unary_sequence, taps=[-1, 0])],
        non_sequences=[Wp])
    pairwise_score_result = theano.function([t_out], pairwise_score)
    logger.debug('Scan pairwise score is calculated.{0}'.format(
        pairwise_score_result(y_in.astype('float32')).shape))
    pairwise_score = T.sum(pairwise_score, axis=[0])
    total_score = theano.function(
        [t_out, l_in.input_var, l_u_in.input_var, l_mask.input_var],
        pairwise_score + unary_score)
    zee_ = theano.function(
        [l_in.input_var, l_u_in.input_var, l_mask.input_var],
        theano_logsumexp(unnormalized_log, axis=2))
    cost_ = theano.function(
        [t_out, l_in.input_var, l_u_in.input_var, l_mask.input_var],
        theano_logsumexp(unnormalized_log, axis=2)[:, 0] -
        (pairwise_score + unary_score))

    logger.debug(
        'Sentence score is calculated.The vector size {0} should be the same as the batch size.'
        .format(
            total_score(y_in.astype('float32'), x_in.astype('int32'),
                        u_in.astype('float32'),
                        mask_in.astype('float32')).shape))
    logger.debug(
        'Zee score is calculated.The vector size {0} should be the same as the batch size.'
        .format(
            zee_(x_in.astype('int32'), u_in.astype('float32'),
                 mask_in.astype('float32')).shape))
    logger.debug(
        'Zee is calculated for sample 0. This should be same for all label instances.{0}'
        .format(
            zee_(x_in.astype('int32'), u_in.astype('float32'),
                 mask_in.astype('float32'))[0, :][mask_in[0] != 0]))
    logger.debug(
        'Zee is calculated for sample 10. This should be same for all label instances.{0}'
        .format(
            zee_(x_in.astype('int32'), u_in.astype('float32'),
                 mask_in.astype('float32'))[10, :][mask_in[10] != 0]))
    logger.debug(
        'Zee score across the sample batch. This should not be same for all batch instances unless there is no embedding initialization.{0}'
        .format(
            zee_(x_in.astype('int32'), u_in.astype('float32'),
                 mask_in.astype('float32'))[:, 0]))
    logger.debug(
        'Cost score is calculated.The vector size {0} should be the same as the batch size.'
        .format(
            cost_(y_in.astype('float32'), x_in.astype('int32'),
                  u_in.astype('float32'), mask_in.astype('float32')).shape))

    return theano_logsumexp(unnormalized_log,
                            axis=2)[:, 0] - (pairwise_score + unary_score)

Beispiel #9

0

Datei anzeigen

 def backward_scan1(unary_sequence, forward_sm, Wp):
     forward_sm = forward_sm + unary_sequence
     forward_sm = theano_logsumexp(
         forward_sm.dimshuffle(0, 1, 'x') + Wp.T, 1)
     return forward_sm

Beispiel #10

0

Datei anzeigen

Datei: crf_lstm_layer.py Projekt: abhyudaynj/LSTM-CRF-models

 def backward_scan1(unary_sequence,forward_sm,Wp):
     forward_sm=forward_sm+unary_sequence
     forward_sm=theano_logsumexp(forward_sm.dimshuffle(0,1,'x')+Wp.T,1)
     return forward_sm

Beispiel #11

0

Datei anzeigen

Datei: crf_dual_layer.py Projekt: KHP-Informatics/ade_ann

 def backward_scan1(unary_sequence,Wp,mask,forward_sm):
     forward_sm=forward_sm+unary_sequence*mask.dimshuffle(0,'x')
     forward_sm=theano_logsumexp(forward_sm.dimshuffle(0,1,'x')+mask.dimshuffle(0,'x','x')*Wp.dimshuffle(0,2,1),1)
     return forward_sm