def expectedEmissionStats( self, t, ys, alphas, betas, conditionOnY=True ): # E[ x_t * x_t^T ], E[ y_t * x_t^T ] and E[ y_t * y_t^T ] # Find the expected sufficient statistic for N( y_t, x_t | Y ) J_x, h_x, _ = np.add( alphas[ t ], betas[ t ] ) if( conditionOnY == False ): J11 = self.J1Emiss J12 = -self._hy J22 = self.Jy + J_x D = J11.shape[ 0 ] J = np.block( [ [ J11, J12 ], [ J12.T, J22 ] ] ) h = np.hstack( ( np.zeros( D ), h_x ) ) # This is a block matrix with block E[ y_t * y_t^T ], E[ y_t * x_t^T ] # and E[ x_t * x_t^T ] E, _ = Normal.expectedSufficientStats( nat_params=( -0.5 * J, h ) ) Eyt_yt, Eyt_xt, Ext_xt = toBlocks( E, D ) else: Ext_xt, E_xt = Normal.expectedSufficientStats( nat_params=( -0.5 * J_x, h_x ) ) Eyt_yt = np.einsum( 'mi,mj->ij', ys[ :, t ], ys[ :, t ] ) Eyt_xt = np.einsum( 'mi,j->ij', ys[ :, t ], E_xt ) return Eyt_yt, Eyt_xt, Ext_xt
def emissionLikelihood( self, x, ys ): # Compute P( y | x, ϴ ) if( x.ndim == 2 ): # Multiple time steps if( ys.ndim == 2 ): assert x.shape[ 0 ] == ys.shape[ 0 ] else: # There are multiple measurements per latent state assert ys.ndim == 3 assert x.shape[ 0 ] == ys.shape[ 1 ] # Put the time index in front ys = np.swapaxes( ys, 0, 1 ) assert x.shape[ 0 ] == ys.shape[ 0 ] ans = 0.0 for t, ( _x, _ys ) in enumerate( zip( x, ys ) ): ans += Normal.log_likelihood( _ys, nat_params=( -0.5 * self.J1Emiss, self._hy.dot( _x ) ) ) return ans else: # Only 1 example. I don't think this code will ever be called assert x.ndim == 1 if( ys.ndim == 1 ): pass else: assert ys.ndim == 2 return Normal.log_likelihood( _ys, nat_params=( -0.5 * self.J1Emiss, self._hy.dot( _x ) ) )
def integrate(self, integrand, forward=True): if (forward): # Integrate x_t-1 J, h, log_Z = Normal.marginalizeX2( *integrand, computeMarginal=self.computeMarginal) else: # Integrate x_t+1 J, h, log_Z = Normal.marginalizeX1( *integrand, computeMarginal=self.computeMarginal) return J, h, log_Z
def preprocessData(self, ys, u=None, computeMarginal=True): ys is not None ys = np.array(ys) if (ys.ndim == 2): ys = ys[None] else: assert ys.ndim == 3 assert self.J1Emiss.shape[0] == ys.shape[2] self._T = ys.shape[1] # This is A.T @ sigInv @ y for each y, summed over each measurement self.hy = ys.dot(self._hy).sum(axis=0) # P( y | x ) ~ N( -0.5 * Jy, hy ) self.computeMarginal = computeMarginal if (computeMarginal): print('self.Jy', self.Jy) print('self.hy', self.hy) partition = np.vectorize( lambda J, h: Normal.log_partition(nat_params=(-0.5 * J, h)), signature='(n,n),(n)->()') self.log_Zy = partition(self.Jy, self.hy) else: self.log_Zy = np.zeros(self.hy.shape[0]) if (u is not None): assert u.shape == (self.T, self.D_latent) uMask = np.isnan(u) self.u = (u, uMask, None)
def updateParams(self, z, As, sigmas, C, R, mu0, sigma0, u=None, ys=None, computeMarginal=True): self.parameterCheck(z, As, sigmas, C, R, mu0, sigma0, u=u, ys=ys) n1Trans, n2Trans, n3Trans = zip(*[ Regression.standardToNat(A, sigma) for A, sigma in zip(As, sigmas) ]) n1Emiss, n2Emiss, n3Emiss = Regression.standardToNat(C, R) n1Init, n2Init = Normal.standardToNat(mu0, sigma0) self.updateNatParams(z, n1Trans, n2Trans, n3Trans, n1Emiss, n2Emiss, n3Emiss, n1Init, n2Init, u=u, ys=ys, computeMarginal=computeMarginal)
def updateParams( self, initialDist, transDist, mus, sigmas, ys=None, computeMarginal=True ): self.parameterCheck( initialDist, transDist, mus, sigmas, ys ) nInit, = Categorical.standardToNat( initialDist ) nTrans, = Transition.standardToNat( transDist ) n1Emiss, n2Emiss = zip( *[ Normal.standardToNat( mu, sigma ) for mu, sigma in zip( mus, sigmas ) ] ) self.updateNatParams( nInit, nTrans, n1Emiss, n2Emiss, ys=ys )
def updateParams( self, initialDist, transDist, mu0, sigma0, u, As, sigmas, xs=None, computeMarginal=True ): self.parameterCheck( initialDist, transDist, mu0, sigma0, u, As, sigmas, xs=xs ) nInit, = Categorical.standardToNat( initialDist ) nTrans, = Transition.standardToNat( transDist ) nat1_0, nat2_0 = Normal.standardToNat( mu0, sigma0 ) nat1Trans, nat2Trans, nat3Trans = zip( *[ Regression.standardToNat( A, sigma ) for A, sigma in zip( As, sigmas ) ] ) self.updateNatParams( nInit, nTrans, nat1_0, nat2_0, nat1Trans, nat2Trans, nat3Trans, u=u, xs=xs )
def log_partition( cls, x=None, params=None, nat_params=None, split=False ): # Compute A( Ѳ ) - log( h( x ) ) assert ( params is None ) ^ ( nat_params is None ) # Need to multiply each partition by the length of each sequence!!!! A, sigma, C, R, mu0, sigma0 = params if params is not None else cls.natToStandard( *nat_params ) A1, A2 = Regression.log_partition( params=( A, sigma ), split=True ) A3, A4 = Regression.log_partition( params=( C, R ), split=True ) A5, A6, A7 = Normal.log_partition( params=( mu0, sigma0 ), split=True ) if( split == True ): return A1, A2, A3, A4, A5, A6, A7 return A1 + A2 + A3 + A4 + A5 + A6 + A7
def preprocessData( self, ys, computeMarginal=True ): ys = np.array( ys ) self._T = ys.shape[ 1 ] # Compute all of the emission probs here. This just makes the code cleaner self.L = np.zeros( ( self.T, self.K ) ) for k in range( self.K ): n1 = self.n1Emiss[ k ] n2 = self.n2Emiss[ k ] self.L[ :, k ] = Normal.log_likelihood( ys, nat_params=( n1, n2 ) ).sum( axis=0 )
def emissionProb( self, t, forward=False, ys=None ): if( ys is None ): emiss = self.L[ t ] else: emiss = np.zeros( self.K ) for k in range( self.K ): n1 = self.n1Emiss[ k ] n2 = self.n2Emiss[ k ] emiss += Normal.log_likelihood( ys[ :, t ], nat_params=( n1, n2 ) ).sum( axis=0 ) return emiss if forward == True else np.broadcast_to( emiss, ( self.K, self.K ) )
def updateNatParams(self, z, n1Trans, n2Trans, n3Trans, n1Emiss, n2Emiss, n3Emiss, n1Init, n2Init, u=None, ys=None, computeMarginal=True): self._D_latent = n2Init.shape[0] self._D_obs = n1Emiss.shape[0] self.z = z self.J11s = [-2 * n for n in n1Trans] self.J12s = [-n.T for n in n3Trans] self.J22s = [-2 * n for n in n2Trans] self.log_Zs = [ 0.5 * np.linalg.slogdet(np.linalg.inv(J11))[1] for J11 in self.J11s ] if computeMarginal else [0 for _ in self.J11s] self.J1Emiss = -2 * n1Emiss self.Jy = -2 * n2Emiss self._hy = n3Emiss.T self.J0 = -2 * n1Init self.h0 = n2Init self.log_Z0 = Normal.log_partition( nat_params=(-2 * self.J0, self.h0)) if computeMarginal else 0 if (ys is not None): self.preprocessData(ys, computeMarginal=computeMarginal) else: self._T = None if (u is not None): assert u.shape == (self.T, self.D_latent) uMask = np.isnan(u) self.u = (u, uMask, None) else: uMask = np.zeros(self.T, dtype=bool) self.u = (None, uMask, self.D_latent)
def updateNatParams(self, n1Trans, n2Trans, n3Trans, n1Emiss, n2Emiss, n3Emiss, n1Init, n2Init, u=None, ys=None, computeMarginal=True): # This doesn't exactly use natural parameters, but uses J = -2 * n1 and h = n2 self._D_latent = n1Trans.shape[0] self._D_obs = n1Emiss.shape[0] self.J11 = -2 * n1Trans self.J12 = -n3Trans.T self.J22 = -2 * n2Trans self.log_Z = 0.5 * np.linalg.slogdet(np.linalg.inv( self.J11))[1] if computeMarginal else 0 self.J1Emiss = -2 * n1Emiss self.Jy = -2 * n2Emiss self._hy = n3Emiss.T self.J0 = -2 * n1Init self.h0 = n2Init self.log_Z0 = Normal.log_partition( nat_params=(-2 * self.J0, self.h0)) if computeMarginal else 0 if (ys is not None): self.preprocessData(ys, computeMarginal=computeMarginal) else: self._T = None if (u is not None): assert u.shape == (self.T, self.D_latent) uMask = np.isnan(u) self.u = (u, uMask, None) else: uMask = np.zeros(self.T, dtype=bool) self.u = (None, uMask, self.D_latent) self.fromNatural = True
def emissionProb(self, t, forward=False, ys=None): # P( y_t | x_t ) as a function of x_t J = self.Jy if (ys is None): h = self.hy[t] log_Z = self.log_Zy[t] else: # A.T @ sigInv @ y h = np.einsum('ji,mj->i', self._hy, ys[:, t]) log_Z = Normal.log_partition(nat_params=(-0.5 * self.Jy, h)) if (forward): return J, h, np.array(log_Z) # Because this is before the integration step return self.alignOnUpper(J, h, log_Z)
def expectedTransitionStatsBlock( self, t, alphas, betas, ys=None, u=None ): # E[ x_t * x_t^T ], E[ x_t+1 * x_t^T ] and E[ x_t+1 * x_t+1^T ] # Find the natural parameters for P( x_t+1, x_t | Y ) J11, J12, J22, h1, h2, _ = self.childParentJoint( t, alphas, betas, ys=ys, u=u ) J = np.block( [ [ J11, J12 ], [ J12.T, J22 ] ] ) h = np.hstack( ( h1, h2 ) ) # The first expected sufficient statistic for N( x_t+1, x_t | Y ) will # be a block matrix with blocks E[ x_t+1 * x_t+1^T ], E[ x_t+1 * x_t^T ] # and E[ x_t * x_t^T ] E, _ = Normal.expectedSufficientStats( nat_params=( -0.5 * J, h ) ) D = h1.shape[ 0 ] Ext1_xt1, Ext1_xt, Ext_xt = toBlocks( E, D ) return Ext1_xt1, Ext1_xt, Ext_xt
def sufficientStats( cls, x, constParams=None ): # Compute T( x ). This is for when we're treating this class as P( x, y | Ѳ ) if( cls.dataN( x ) > 1 ): t = [ 0, 0, 0, 0, 0, 0, 0, 0 ] for _x, _ys in zip( *x ): s = cls.sufficientStats( ( _x, _ys ), constParams=constParams ) for i in range( 8 ): t[ i ] += s[ i ] return tuple( t ) ( x, ys ) = x u = constParams xIn = x[ :-1 ] xOut = x[ 1: ] - u[ :-1 ] t1, t2, t3 = Regression.sufficientStats( x=( xIn, xOut ), constParams=constParams ) t4, t5, t6 = Regression.sufficientStats( x=( x, ys ), constParams=constParams ) t7, t8 = Normal.sufficientStats( x=x[ 0 ], constParams=constParams ) return t1, t2, t3, t4, t5, t6, t7, t8
def preprocessData( self, xs, u=None, computeMarginal=True ): xs = np.array( xs ) # Not going to use multiple measurements here assert xs.ndim == 2 self._T = xs.shape[ 0 ] # Compute P( x_t | x_t-1, z ) for all of the observations over each z self.L0 = Normal.log_likelihood( xs[ 0 ], nat_params=( self.n1_0, self.n2_0 ) ) self.L = np.empty( ( self.T - 1, self.K ) ) for i, ( n1, n2, n3 ) in enumerate( zip( self.n1Trans, self.n2Trans, self.n3Trans ) ): def ll( _x ): x, x1 = np.split( _x, 2 ) return Regression.log_likelihood( ( x, x1 ), nat_params=( n1, n2, n3 ) ) self.L[ :, i ] = np.apply_along_axis( ll, -1, np.hstack( ( xs[ :-1 ], xs[ 1: ] ) ) )
def updateParams(self, A, sigma, C, R, mu0, sigma0, u=None, ys=None, computeMarginal=True): self.parameterCheck(A, sigma, C, R, mu0, sigma0, u=u, ys=ys) n1Init, n2Init = Normal.standardToNat(mu0, sigma0) n1Emiss, n2Emiss, n3Emiss = Regression.standardToNat(C, R) n1Trans, n2Trans, n3Trans = Regression.standardToNat(A, sigma) self.updateNatParams(n1Trans, n2Trans, n3Trans, n1Emiss, n2Emiss, n3Emiss, n1Init, n2Init, u=u, ys=ys, computeMarginal=computeMarginal) self.fromNatural = False self._A = A self._sigma = sigma self._C = C self._R = R self._mu0 = mu0 self._sigma0 = sigma0
def log_marginalFromAlphaBeta(cls, alpha, beta): Ja, ha, log_Za = alpha Jb, hb, log_Zb = beta return Normal.log_partition(nat_params=(-0.5 * (Ja + Jb), (ha + hb))) - (log_Za + log_Zb)
def sampleSingleEmission( self, x, measurements=1 ): assert x.size == x.squeeze().shape[ 0 ] return Normal.sample( nat_params=( -0.5 * self.J1Emiss, self._hy.dot( x.squeeze() ) ), size=measurements )
def natToStandard( cls, n1, n2, n3, n4, n5, n6, n7, n8 ): A, sigma = Regression.natToStandard( n1, n2, n3 ) C, R = Regression.natToStandard( n4, n5, n6 ) mu0, sigma0 = Normal.natToStandard( n7, n8 ) return A, sigma, C, R, mu0, sigma0
def standardToNat( cls, A, sigma, C, R, mu0, sigma0 ): n1, n2, n3 = Regression.standardToNat( A, sigma ) n4, n5, n6 = Regression.standardToNat( C, R ) n7, n8 = Normal.standardToNat( mu0, sigma0 ) return n1, n2, n3, n4, n5, n6, n7, n8
def initialStats( cls, x, constParams=None ): # Assumes that only a single element is passed in assert x.ndim == 1 return Normal.sufficientStats( x=x, constParams=constParams )
def conditionedExpectedSufficientStats( self, ys, u, alphas, betas, forMStep=False ): Ext1_xt1 = np.zeros( ( self.D_latent, self.D_latent ) ) Ext1_xt = np.zeros( ( self.D_latent, self.D_latent ) ) Ext_xt = np.zeros( ( self.D_latent, self.D_latent ) ) if( forMStep ): Eut_ut = np.zeros( ( self.D_latent, self.D_latent ) ) Ext_ut = np.zeros( ( self.D_latent, self.D_latent ) ) Ext1_ut = np.zeros( ( self.D_latent, self.D_latent ) ) allT = 0 allM = 0 Eyt_yt = np.zeros( ( self.D_obs, self.D_obs ) ) Eyt_xt = np.zeros( ( self.D_obs, self.D_latent ) ) Ext_xt_y = np.zeros( ( self.D_latent, self.D_latent ) ) Ex0_x0 = np.zeros( ( self.D_latent, self.D_latent ) ) Ex0 = np.zeros( self.D_latent ) if( u is not None and u.ndim == 3 ): # Multiple u's assert len( ys ) == len( u ) it = zip( ys, alphas, betas, u ) else: assert u is None or u.ndim == 2 if( u is None ): J, _, _ = alphas[ 0 ][ 0 ] u = np.zeros( ( len( alphas[ 0 ] ), J.shape[ 0 ] ) ) it = zip( ys, alphas, betas, itertools.repeat( u, len( ys ) ) ) for i, ( _ys, _alphas, _betas, _u ) in enumerate( it ): uMask = np.isnan( _u ) _u = MaskedData( _u, uMask, None ) M, T, _ = _ys.shape if( forMStep ): allT += T - 1 allM += T * M for t in range( 1, T ): _Ext1_xt1, _Ext1_xt, _Ext_xt = self.expectedTransitionStatsBlock( t - 1, _alphas, _betas, ys=_ys, u=_u ) Ext1_xt1 += _Ext1_xt1 Ext1_xt += _Ext1_xt Ext_xt += _Ext_xt if( forMStep ): J, h, _ = np.add( _alphas[ t - 1 ], _betas[ t - 1 ] ) J1, h1, _ = np.add( _alphas[ t ], _betas[ t ] ) Ext = Normal.natToStandard( J, h, fromPrecision=True )[ 0 ] Ex1t = Normal.natToStandard( J1, h1, fromPrecision=True )[ 0 ] Eut_ut += np.outer( _u[ t - 1 ], _u[ t - 1 ] ) Ext_ut += np.outer( Ext, _u[ t - 1 ] ) Ext1_ut += np.outer( Ex1t, _u[ t - 1 ] ) for t in range( T ): _Eyt_yt, _Eyt_xt, _Ext_xt = self.expectedEmissionStats( t, _ys, _alphas, _betas, conditionOnY=True ) Eyt_yt += _Eyt_yt Eyt_xt += _Eyt_xt Ext_xt_y += _Ext_xt _Ex0_x0, _Ex0 = self.expectedInitialStats( _alphas, _betas ) Ex0_x0 += _Ex0_x0 Ex0 += _Ex0 if( forMStep ): return Ext1_xt1, Ext1_xt, Ext_xt, Eyt_yt, Eyt_xt, Ext_xt_y, Ex0_x0, Ex0, Eut_ut, Ext_ut, Ext1_ut, allT, allM return Ext1_xt1, Ext1_xt, Ext_xt, Eyt_yt, Eyt_xt, Ext_xt_y, Ex0_x0, Ex0
def expectedInitialStats( self, alphas, betas ): # E[ x_0 * x_0 ], E[ x_0 ] J, h, _ = np.add( alphas[ 0 ], betas[ 0 ] ) return Normal.expectedSufficientStats( nat_params=( -0.5 * J, h ) )
def sampleStep( self, J, h ): return Normal.unpackSingleSample( Normal.sample( params=Normal.natToStandard( J, h, fromPrecision=True ) ) )
def likelihoodStep( self, x, J, h ): return Normal.log_likelihood( x, params=Normal.natToStandard( J, h, fromPrecision=True ) )