def calcOutput(self, input): ''' Run all data points through the neural network PARAMETERS ---------- input {TxD}: input data, T points of dimensionality D RETURNS ------- output {TxK}: output of the network, T points of dimensionality K (number of network outputs) ''' input = unsqueeze(input,2) T,D = input.shape if D != self.D: raise ValueError("NeuralNet: invalid input dimensions") # run the input through the network to attain output output = np.zeros([T, self.K]) # for each input point for t, i in enumerate(input): signal = i for l in self.layers: signal = l.calcOutput(signal) output[t,:] = signal return output
def derivOptCrit(self, O): ''' PARAMETERS ---------- O {TxD}: observation matrix with a sequence of T observations, each having dimension D RETURNS ------- dC / dy {TxD}: derivative of the optimization criterion for each observation ''' O = unsqueeze(O, 2) T, D = O.shape _, lnAlpha, lnC = self._forward(O, scale=True) lnBeta = self._backward(O, lnC) # calculate lnP for each observation for each state's emission distribution # lnP_obs {T, N} lnP_obs = np.zeros([T, self.N]) for i in range(0, self.N): lnP_obs[:, i] = self._B[i].calcLnP(O) # calculate derivative of the optimization criterion for each observation for each state's emission distribution dlnP = np.zeros([T, self.N, D]) for i in range(0, self.N): dlnP[:, i, :] = self._B[i].calcDerivLnP(O) return np.sum(np.exp(lnBeta + lnAlpha - lnP_obs)[:, :, np.newaxis] * dlnP, axis=1)
def derivOptCrit(self, O): ''' PARAMETERS ---------- O {TxD}: observation matrix with a sequence of T observations, each having dimension D RETURNS ------- dC / dy {TxD}: derivative of the optimization criterion for each observation ''' O = unsqueeze(O,2) T, D = O.shape _, lnAlpha, lnC = self._forward(O, scale=True) lnBeta = self._backward(O, lnC) # calculate lnP for each observation for each state's emission distribution # lnP_obs {T, N} lnP_obs = np.zeros([T,self.N]) for i in range(0,self.N): lnP_obs[:,i] = self._B[i].calcLnP(O) # calculate derivative of the optimization criterion for each observation for each state's emission distribution dlnP = np.zeros([T,self.N,D]) for i in range(0,self.N): dlnP[:,i,:] = self._B[i].calcDerivLnP(O) return np.sum(np.exp(lnBeta + lnAlpha - lnP_obs)[:,:,np.newaxis] * dlnP, axis=1)
def viterbi(self, O, labels=True): ''' Calculates the q*, the most probable state sequence corresponding from the observations O. As the function name suggests, the viterbi algorithm is used. PARAMETERS ---------- O {TxD}: observation matrix with a sequence of T observations, each having dimension D labels: whether to return the state labels, or the state indices RETURNS ------- pstar: ln probability of q* qstar {Tx1}: labels/indices of states in q* (normal python array of len T) ''' O = unsqueeze(O, 2) T, D = O.shape # check dimensions of provided observations agree with the trained emission distributions dim = self._B[0].mu.shape[1] if D != dim: raise ValueError( 'GHMM: observation dimension does not agree with the trained emission distributions for the model' ) # calculate lnP for each observation for each state's emission distribution # lnP_obs {T, N} lnP_obs = np.zeros([T, self.N]) for i in range(self.N): lnP_obs[:, i] = self._B[i].calcLnP(O) # lnDelta {TxN}: best score along a single path, at time t, accounting for the first t observations and ending in state Si lnDelta = np.zeros([T, self.N]) # lnPsi {TxN}: arg max of best scores for each t and j state lnPsi = np.zeros([T, self.N], dtype=np.int) # Step 1: initialization lnDelta[0, :] = np.log(self._pi) + lnP_obs[0, :] # Step 2: recursion for t in range(1, T): pTrans = lnDelta[[t - 1], :].T + np.log(self._A) lnDelta[t, :] = np.max(pTrans, axis=0) + lnP_obs[t, :] lnPsi[t, :] = np.argmax(pTrans, axis=0) # Step 3: termination qstar = [np.argmax(lnDelta[T - 1, :])] pstar = lnDelta[T - 1, qstar[-1]] for t in reversed(range(T - 1)): qstar.append(lnPsi[t + 1, qstar[-1]]) qstar.reverse() # return labels if (labels): qstar = [self._labels[q] for q in qstar] return pstar, qstar
def viterbi(self, O, labels = True): ''' Calculates the q*, the most probable state sequence corresponding from the observations O. As the function name suggests, the viterbi algorithm is used. PARAMETERS ---------- O {TxD}: observation matrix with a sequence of T observations, each having dimension D labels: whether to return the state labels, or the state indices RETURNS ------- pstar: ln probability of q* qstar {Tx1}: labels/indices of states in q* (normal python array of len T) ''' O = unsqueeze(O,2) T, D = O.shape # check dimensions of provided observations agree with the trained emission distributions dim = self._B[0].mu.shape[1] if D != dim: raise ValueError('GHMM: observation dimension does not agree with the trained emission distributions for the model') # calculate lnP for each observation for each state's emission distribution # lnP_obs {T, N} lnP_obs = np.zeros([T,self.N]) for i in range(self.N): lnP_obs[:,i] = self._B[i].calcLnP(O) # lnDelta {TxN}: best score along a single path, at time t, accounting for the first t observations and ending in state Si lnDelta = np.zeros([T,self.N]) # lnPsi {TxN}: arg max of best scores for each t and j state lnPsi = np.zeros([T,self.N], dtype=np.int) # Step 1: initialization lnDelta[0,:] = np.log(self._pi) + lnP_obs[0,:] # Step 2: recursion for t in range(1,T): pTrans = lnDelta[[t-1],:].T + np.log(self._A) lnDelta[t,:] = np.max(pTrans, axis=0) + lnP_obs[t,:] lnPsi[t,:] = np.argmax(pTrans, axis=0) # Step 3: termination qstar = [np.argmax(lnDelta[T-1,:])] pstar = lnDelta[T-1,qstar[-1]] for t in reversed(range(T-1)): qstar.append(lnPsi[t+1,qstar[-1]]) qstar.reverse() # return labels if (labels): qstar = [self._labels[q] for q in qstar] return pstar, qstar
def _forward(self, O, scale = True): ''' Calculates the forward variable, alpha: the probability of the partial observation sequence O1 O2 ... Ot (until time t) and state Si at time t. PARAMETERS ---------- O {TxD}: observation matrix with a sequence of T observations, each having dimension D scale {Boolean}: default True RETURNS ------- lnP {Float}: log probability of the observation sequence O lnAlpha {T,N}: log of the forward variable: the probability of the partial observation sequence O1 O2 ... Ot (until time t) and state Si at time t. lnC (T,): log of the scaling coefficients for each observation ''' O = unsqueeze(O,2) T, D = O.shape # check dimensions of provided observations agree with the trained emission distributions dim = self._B[0].mu.shape[1] if D != dim: raise ValueError('GHMM: observation dimension does not agree with the trained emission distributions for the model') # calculate lnP for each observation for each state's emission distribution # lnP_obs {T, N} lnP_obs = np.zeros([T,self.N]) for i in range(self.N): lnP_obs[:,i] = self._B[i].calcLnP(O) # forward variable, alpha {T,N} lnAlpha = np.zeros([T,self.N]) # initialize vector of scaling coefficients lnC = np.zeros(T) # Step 1: Initialization lnAlpha[0,:] = np.log(self._pi) + lnP_obs[0,:] if scale: lnC[0] = -logsumexp(lnAlpha[0,:]) lnAlpha[0,:] += lnC[0] # Step 2: Induction for t in range(1,T): lnAlpha[t,:] = logsumexp(lnAlpha[[t-1],:].T + np.log(self._A), axis=0) + lnP_obs[t,:] if scale: lnC[t] = -logsumexp(lnAlpha[0,:]) lnAlpha[t,:] += lnC[t] # Step 3: Termination if scale: lnP = -np.sum(lnC) else: lnP = logsumexp(lnAlpha[T-1,:]) return lnP, lnAlpha, lnC
def _backward(self, O, lnC): ''' Calculates the backward variable, beta: the probability of the partial observation sequence 0T OT-1 ... Ot+1 (backwards to time t+1) and State Si at time t+1 PARAMETERS ---------- O {TxD}: observation matrix with a sequence of T observations, each having dimension D lnC (T,): log of the scaling coefficients for each observation calculated from the forward pass RETURNS ------- lnBeta {T,N}: log of the backward variable: the probability of the partial observation sequence 0T OT-1 ... Ot+1 (backwards to time t+1) and State Si at time t+1 ''' O = unsqueeze(O, 2) T, D = O.shape # check dimensions of provided observations agree with the trained emission distributions dim = self._B[0].mu.shape[1] if D != dim: raise ValueError( 'GHMM: observation dimension does not agree with the trained emission distributions for the model' ) # calculate lnP for each observation for each state's emission distribution # lnP_obs {T, N} lnP_obs = np.zeros([T, self.N]) for i in range(0, self.N): lnP_obs[:, i] = self._B[i].calcLnP(O) # backward variable, beta {T,N} # Step 1: Initialization # since ln(1) = 0 lnBeta = np.zeros([T, self.N]) + lnC[T - 1] # Step 2: Induction for t in reversed(range(T - 1)): lnBeta[t, :] = logsumexp( np.log(self._A) + lnP_obs[t + 1, :] + lnBeta[t + 1, :], axis=1) + lnC[t] return lnBeta
def _backward(self, O, lnC): ''' Calculates the backward variable, beta: the probability of the partial observation sequence 0T OT-1 ... Ot+1 (backwards to time t+1) and State Si at time t+1 PARAMETERS ---------- O {TxD}: observation matrix with a sequence of T observations, each having dimension D lnC (T,): log of the scaling coefficients for each observation calculated from the forward pass RETURNS ------- lnBeta {T,N}: log of the backward variable: the probability of the partial observation sequence 0T OT-1 ... Ot+1 (backwards to time t+1) and State Si at time t+1 ''' O = unsqueeze(O,2) T, D = O.shape # check dimensions of provided observations agree with the trained emission distributions dim = self._B[0].mu.shape[1] if D != dim: raise ValueError('GHMM: observation dimension does not agree with the trained emission distributions for the model') # calculate lnP for each observation for each state's emission distribution # lnP_obs {T, N} lnP_obs = np.zeros([T,self.N]) for i in range(0,self.N): lnP_obs[:,i] = self._B[i].calcLnP(O) # backward variable, beta {T,N} # Step 1: Initialization # since ln(1) = 0 lnBeta = np.zeros([T,self.N]) + lnC[T-1] # Step 2: Induction for t in reversed(range(T-1)): lnBeta[t,:] = logsumexp(np.log(self._A) + lnP_obs[t+1,:] + lnBeta[t+1,:], axis=1) + lnC[t] return lnBeta
def setData(self, Xtrain, Ytrain): self.train = unsqueeze(Xtrain,2) self.target = unsqueeze(Ytrain,2)
def _forward(self, O, scale=True): ''' Calculates the forward variable, alpha: the probability of the partial observation sequence O1 O2 ... Ot (until time t) and state Si at time t. PARAMETERS ---------- O {TxD}: observation matrix with a sequence of T observations, each having dimension D scale {Boolean}: default True RETURNS ------- lnP {Float}: log probability of the observation sequence O lnAlpha {T,N}: log of the forward variable: the probability of the partial observation sequence O1 O2 ... Ot (until time t) and state Si at time t. lnC (T,): log of the scaling coefficients for each observation ''' O = unsqueeze(O, 2) T, D = O.shape # check dimensions of provided observations agree with the trained emission distributions dim = self._B[0].mu.shape[1] if D != dim: raise ValueError( 'GHMM: observation dimension does not agree with the trained emission distributions for the model' ) # calculate lnP for each observation for each state's emission distribution # lnP_obs {T, N} lnP_obs = np.zeros([T, self.N]) for i in range(self.N): lnP_obs[:, i] = self._B[i].calcLnP(O) # forward variable, alpha {T,N} lnAlpha = np.zeros([T, self.N]) # initialize vector of scaling coefficients lnC = np.zeros(T) # Step 1: Initialization lnAlpha[0, :] = np.log(self._pi) + lnP_obs[0, :] if scale: lnC[0] = -logsumexp(lnAlpha[0, :]) lnAlpha[0, :] += lnC[0] # Step 2: Induction for t in range(1, T): lnAlpha[t, :] = logsumexp(lnAlpha[[t - 1], :].T + np.log(self._A), axis=0) + lnP_obs[t, :] if scale: lnC[t] = -logsumexp(lnAlpha[0, :]) lnAlpha[t, :] += lnC[t] # Step 3: Termination if scale: lnP = -np.sum(lnC) else: lnP = logsumexp(lnAlpha[T - 1, :]) return lnP, lnAlpha, lnC