class Hmm1(Hmm): __pi: Vector def __init__(self, states: set, observations: list, emittedSymbols: list): """ A constructor of Hmm1 class which takes a Set of states, an array of observations (which also consists of an array of states) and an array of instances (which also consists of an array of emitted symbols). The constructor calls its super method to calculate the emission probabilities for those states. PARAMETERS ---------- states : set A Set of states, consisting of all possible states for this problem. observations : list An array of instances, where each instance consists of an array of states. emittedSymbols : list An array of instances, where each instance consists of an array of symbols. """ super().__init__(states, observations, emittedSymbols) def calculatePi(self, observations: list): """ calculatePi calculates the prior probability vector (initial probabilities for each state) from a set of observations. For each observation, the function extracts the first state in that observation. Normalizing the counts of the states returns us the prior probabilities for each state. PARAMETERS ---------- observations : list A set of observations used to calculate the prior probabilities. """ self.__pi = Vector() self.__pi.initAllSame(self.stateCount, 0.0) for observation in observations: index = self.stateIndexes[observation[0]] self.__pi.addValue(index, 1.0) self.__pi.l1Normalize() def calculateTransitionProbabilities(self, observations: list): """ calculateTransitionProbabilities calculates the transition probabilities matrix from each state to another state. For each observation and for each transition in each observation, the function gets the states. Normalizing the counts of the pair of states returns us the transition probabilities. PARAMETERS ---------- observations : list A set of observations used to calculate the transition probabilities. """ self.transitionProbabilities = Matrix(self.stateCount, self.stateCount) for current in observations: for j in range(len(current) - 1): fromIndex = self.stateIndexes[current[j]] toIndex = self.stateIndexes[current[j + 1]] self.transitionProbabilities.increment(fromIndex, toIndex) self.transitionProbabilities.columnWiseNormalize() def __logOfColumn(self, column: int) -> Vector: """ logOfColumn calculates the logarithm of each value in a specific column in the transition probability matrix. PARAMETERS ---------- column : int Column index of the transition probability matrix. RETURNS ------- Vector A vector consisting of the logarithm of each value in the column in the transition probability matrix. """ result = Vector() for i in range(self.stateCount): result.add(self.safeLog(self.transitionProbabilities.getValue(i, column))) return result def viterbi(self, s: list) -> list: """ viterbi calculates the most probable state sequence for a set of observed symbols. PARAMETERS ---------- s : list A set of observed symbols. RETURNS ------- list The most probable state sequence as an {@link ArrayList}. """ result = [] sequenceLength = len(s) gamma = Matrix(sequenceLength, self.stateCount) phi = Matrix(sequenceLength, self.stateCount) qs = Vector(sequenceLength, 0) emission = s[0] for i in range(self.stateCount): observationLikelihood = self.states[i].getEmitProb(emission) gamma.setValue(0, i, self.safeLog(self.__pi.getValue(i)) + self.safeLog(observationLikelihood)) for t in range(1, sequenceLength): emission = s[t] for j in range(self.stateCount): tempArray = self.__logOfColumn(j) tempArray.addVector(gamma.getRowVector(t - 1)) maxIndex = tempArray.maxIndex() observationLikelihood = self.states[j].getEmitProb(emission) gamma.setValue(t, j, tempArray.getValue(maxIndex) + self.safeLog(observationLikelihood)) phi.setValue(t, j, maxIndex) qs.setValue(sequenceLength - 1, gamma.getRowVector(sequenceLength - 1).maxIndex()) result.insert(0, self.states[int(qs.getValue(sequenceLength - 1))].getState()) for i in range(sequenceLength - 2, -1, -1): qs.setValue(i, phi.getValue(i + 1, int(qs.getValue(i + 1)))) result.insert(0, self.states[int(qs.getValue(i))].getState()) return result
class MatrixTest(unittest.TestCase): def setUp(self): self.small = Matrix(3, 3) for i in range(3): for j in range(3): self.small.setValue(i, j, 1.0) self.v = Vector(3, 1.0) self.large = Matrix(1000, 1000) for i in range(1000): for j in range(1000): self.large.setValue(i, j, 1.0) self.medium = Matrix(100, 100) for i in range(100): for j in range(100): self.medium.setValue(i, j, 1.0) self.V = Vector(1000, 1.0) self.vr = Vector(100, 1.0) self.random = Matrix(100, 100, 1, 10, 1) self.originalSum = self.random.sumOfElements() self.identity = Matrix(100) def test_ColumnWiseNormalize(self): mClone = self.small.clone() mClone.columnWiseNormalize() self.assertEqual(3, mClone.sumOfElements()) MClone = self.large.clone() MClone.columnWiseNormalize() self.assertAlmostEqual(1000, MClone.sumOfElements(), 3) self.identity.columnWiseNormalize() self.assertEqual(100, self.identity.sumOfElements()) def test_MultiplyWithConstant(self): self.small.multiplyWithConstant(4) self.assertEqual(36, self.small.sumOfElements()) self.small.divideByConstant(4) self.large.multiplyWithConstant(1.001) self.assertAlmostEqual(1001000, self.large.sumOfElements(), 3) self.large.divideByConstant(1.001) self.random.multiplyWithConstant(3.6) self.assertAlmostEqual(self.originalSum * 3.6, self.random.sumOfElements(), 4) self.random.divideByConstant(3.6) def test_DivideByConstant(self): self.small.divideByConstant(4) self.assertEqual(2.25, self.small.sumOfElements()) self.small.multiplyWithConstant(4) self.large.divideByConstant(10) self.assertAlmostEqual(100000, self.large.sumOfElements(), 3) self.large.multiplyWithConstant(10) self.random.divideByConstant(3.6) self.assertAlmostEqual(self.originalSum / 3.6, self.random.sumOfElements(), 4) self.random.multiplyWithConstant(3.6) def test_Add(self): self.random.add(self.identity) self.assertAlmostEqual(self.originalSum + 100, self.random.sumOfElements(), 4) self.random.subtract(self.identity) def test_AddVector(self): self.large.addRowVector(4, self.V) self.assertEqual(1001000, self.large.sumOfElements(), 0.0) self.V.multiply(-1.0) self.large.addRowVector(4, self.V) self.V.multiply(-1.0) def test_Subtract(self): self.random.subtract(self.identity) self.assertAlmostEqual(self.originalSum - 100, self.random.sumOfElements(), 4) self.random.add(self.identity) def test_MultiplyWithVectorFromLeft(self): result = self.small.multiplyWithVectorFromLeft(self.v) self.assertEqual(9, result.sumOfElements()) result = self.large.multiplyWithVectorFromLeft(self.V) self.assertEqual(1000000, result.sumOfElements()) result = self.random.multiplyWithVectorFromLeft(self.vr) self.assertAlmostEqual(self.originalSum, result.sumOfElements(), 4) def test_MultiplyWithVectorFromRight(self): result = self.small.multiplyWithVectorFromRight(self.v) self.assertEqual(9, result.sumOfElements()) result = self.large.multiplyWithVectorFromRight(self.V) self.assertEqual(1000000, result.sumOfElements()) result = self.random.multiplyWithVectorFromRight(self.vr) self.assertAlmostEqual(self.originalSum, result.sumOfElements(), 4) def test_ColumnSum(self): self.assertEqual(3, self.small.columnSum(randrange(3))) self.assertEqual(1000, self.large.columnSum(randrange(1000))) self.assertEqual(1, self.identity.columnSum(randrange(100))) def test_SumOfRows(self): self.assertEqual(9, self.small.sumOfRows().sumOfElements()) self.assertEqual(1000000, self.large.sumOfRows().sumOfElements()) self.assertEqual(100, self.identity.sumOfRows().sumOfElements()) self.assertAlmostEqual(self.originalSum, self.random.sumOfRows().sumOfElements(), 3) def test_RowSum(self): self.assertEqual(3, self.small.rowSum(randrange(3))) self.assertEqual(1000, self.large.rowSum(randrange(1000))) self.assertEqual(1, self.identity.rowSum(randrange(100))) def test_Multiply(self): result = self.small.multiply(self.small) self.assertEqual(27, result.sumOfElements()) result = self.medium.multiply(self.medium) self.assertEqual(1000000.0, result.sumOfElements()) result = self.random.multiply(self.identity) self.assertEqual(self.originalSum, result.sumOfElements()) result = self.identity.multiply(self.random) self.assertEqual(self.originalSum, result.sumOfElements()) def test_ElementProduct(self): result = self.small.elementProduct(self.small) self.assertEqual(9, result.sumOfElements()) result = self.large.elementProduct(self.large) self.assertEqual(1000000, result.sumOfElements()) result = self.random.elementProduct(self.identity) self.assertEqual(result.trace(), result.sumOfElements()) def test_SumOfElements(self): self.assertEqual(9, self.small.sumOfElements()) self.assertEqual(1000000, self.large.sumOfElements()) self.assertEqual(100, self.identity.sumOfElements()) self.assertEqual(self.originalSum, self.random.sumOfElements()) def test_Trace(self): self.assertEqual(3, self.small.trace()) self.assertEqual(1000, self.large.trace()) self.assertEqual(100, self.identity.trace()) def test_Transpose(self): self.assertEqual(9, self.small.transpose().sumOfElements()) self.assertEqual(1000000, self.large.transpose().sumOfElements()) self.assertEqual(100, self.identity.transpose().sumOfElements()) self.assertAlmostEqual(self.originalSum, self.random.transpose().sumOfElements(), 3) def test_IsSymmetric(self): self.assertTrue(self.small.isSymmetric()) self.assertTrue(self.large.isSymmetric()) self.assertTrue(self.identity.isSymmetric()) self.assertFalse(self.random.isSymmetric()) def test_Determinant(self): self.assertEqual(0, self.small.determinant()) self.assertEqual(0, self.large.determinant()) self.assertEqual(1, self.identity.determinant()) def test_Inverse(self): self.identity.inverse() self.assertEqual(100, self.identity.sumOfElements()) self.random.inverse() self.random.inverse() self.assertAlmostEqual(self.originalSum, self.random.sumOfElements(), 5) def test_Characteristics(self): vectors = self.small.characteristics() self.assertEqual(2, len(vectors)) vectors = self.identity.characteristics() self.assertEqual(100, len(vectors)) vectors = self.medium.characteristics() self.assertEqual(46, len(vectors))