Exemple #1
0
    def calculateRMinusY(self, instance: Instance, inputVector: Vector,
                         weights: Matrix) -> Vector:
        """
        The calculateRMinusY method creates a new Vector with given Instance, then it multiplies given
        input Vector with given weights Matrix. After normalizing the output, it return the difference between the newly
        created Vector and normalized output.

        PARAMETERS
        ----------
        instance : Instance
            Instance is used to get class labels.
        inputVector : Vector
            Vector to multiply weights.
        weights : Matrix
            Matrix of weights

        RETURNS
        -------
        Vector
            Difference between newly created Vector and normalized output.
        """
        r = Vector()
        r.initAllZerosExceptOne(
            self.K, self.classLabels.index(instance.getClassLabel()), 1.0)
        o = weights.multiplyWithVectorFromRight(inputVector)
        y = self.normalizeOutput(o)
        return r.difference(y)
Exemple #2
0
    def train(self, trainSet: InstanceList, parameters: Parameter):
        """
        Training algorithm for the linear discriminant analysis classifier (Introduction to Machine Learning, Alpaydin,
        2015).

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm.
        parameters : Parameter
            Parameter of the Lda algorithm.
        """
        w0 = {}
        w = {}
        priorDistribution = trainSet.classDistribution()
        classLists = Partition(trainSet)
        covariance = Matrix(trainSet.get(0).continuousAttributeSize(), trainSet.get(0).continuousAttributeSize())
        for i in range(classLists.size()):
            averageVector = Vector(classLists.get(i).continuousAverage())
            classCovariance = classLists.get(i).covariance(averageVector)
            classCovariance.multiplyWithConstant(classLists.get(i).size() - 1)
            covariance.add(classCovariance)
        covariance.divideByConstant(trainSet.size() - classLists.size())
        covariance.inverse()
        for i in range(classLists.size()):
            Ci = classLists.get(i).getClassLabel()
            averageVector = Vector(classLists.get(i).continuousAverage())
            wi = covariance.multiplyWithVectorFromRight(averageVector)
            w[Ci] = wi
            w0i = -0.5 * wi.dotProduct(averageVector) + math.log(priorDistribution.getProbability(Ci))
            w0[Ci] = w0i
        self.model = LdaModel(priorDistribution, w, w0)
Exemple #3
0
    def __linearRegressionOnCountsOfCounts(self, countsOfCounts: list) -> list:
        """
        Given counts of counts, this function will calculate the estimated counts of counts c$^*$ with
        Good-Turing smoothing. First, the algorithm filters the non-zero counts from counts of counts array and constructs
        c and r arrays. Then it constructs Z_n array with Z_n = (2C_n / (r_{n+1} - r_{n-1})). The algorithm then uses
        simple linear regression on Z_n values to estimate w_1 and w_0, where log(N[i]) = w_1log(i) + w_0

        PARAMETERS
        ----------
        countsOfCounts : list
            Counts of counts. countsOfCounts[1] is the number of words occurred once in the corpus. countsOfCounts[i] is
            the number of words occurred i times in the corpus.

        RETURNS
        ------
        list
            Estimated counts of counts array. N[1] is the estimated count for out of vocabulary words.
        """
        N = [0.0] * len(countsOfCounts)
        r = []
        c = []
        for i in range(1, len(countsOfCounts)):
            if countsOfCounts[i] != 0:
                r.append(i)
                c.append(countsOfCounts[i])
        A = Matrix(2, 2)
        y = Vector(2, 0)
        for i in range(len(r)):
            xt = math.log(r[i])
            if i == 0:
                rt = math.log(c[i])
            else:
                if i == len(r) - 1:
                    rt = math.log((1.0 * c[i]) / (r[i] - r[i - 1]))
                else:
                    rt = math.log((2.0 * c[i]) / (r[i + 1] - r[i - 1]))
            A.addValue(0, 0, 1.0)
            A.addValue(0, 1, xt)
            A.addValue(1, 0, xt)
            A.addValue(1, 1, xt * xt)
            y.addValue(0, rt)
            y.addValue(1, rt * xt)
        A.inverse()
        w = A.multiplyWithVectorFromRight(y)
        w0 = w.getValue(0)
        w1 = w.getValue(1)
        for i in range(1, len(countsOfCounts)):
            N[i] = math.exp(math.log(i) * w1 + w0)
        return N
Exemple #4
0
    def calculateForwardSingleHiddenLayer(self, W: Matrix, V: Matrix):
        """
        The calculateForwardSingleHiddenLayer method takes two matrices W and V. First it multiplies W with x, then
        multiplies V with the result of the previous multiplication.

        PARAMETERS
        ----------
        W : Matrix
            Matrix to multiply with x.
        V : Matrix
            Matrix to multiply.
        """
        hidden = self.calculateHidden(self.x, W)
        hiddenBiased = hidden.biased()
        self.y = V.multiplyWithVectorFromRight(hiddenBiased)
Exemple #5
0
    def calculateHidden(self, input: Vector, weights: Matrix) -> Vector:
        """
        The calculateHidden method takes a {@link Vector} input and {@link Matrix} weights, It multiplies the weights
        Matrix with given input Vector than applies the sigmoid function and returns the result.

        PARAMETERS
        ----------
        input : Vector
            Vector to multiply weights.
        weights : Matrix
            Matrix is multiplied with input Vector.

        RETURNS
        -------
        Vector
            Result of sigmoid function.
        """
        z = weights.multiplyWithVectorFromRight(input)
        z.sigmoid()
        return z
Exemple #6
0
class MatrixTest(unittest.TestCase):
    def setUp(self):
        self.small = Matrix(3, 3)
        for i in range(3):
            for j in range(3):
                self.small.setValue(i, j, 1.0)
        self.v = Vector(3, 1.0)
        self.large = Matrix(1000, 1000)
        for i in range(1000):
            for j in range(1000):
                self.large.setValue(i, j, 1.0)
        self.medium = Matrix(100, 100)
        for i in range(100):
            for j in range(100):
                self.medium.setValue(i, j, 1.0)
        self.V = Vector(1000, 1.0)
        self.vr = Vector(100, 1.0)
        self.random = Matrix(100, 100, 1, 10, 1)
        self.originalSum = self.random.sumOfElements()
        self.identity = Matrix(100)

    def test_ColumnWiseNormalize(self):
        mClone = self.small.clone()
        mClone.columnWiseNormalize()
        self.assertEqual(3, mClone.sumOfElements())
        MClone = self.large.clone()
        MClone.columnWiseNormalize()
        self.assertAlmostEqual(1000, MClone.sumOfElements(), 3)
        self.identity.columnWiseNormalize()
        self.assertEqual(100, self.identity.sumOfElements())

    def test_MultiplyWithConstant(self):
        self.small.multiplyWithConstant(4)
        self.assertEqual(36, self.small.sumOfElements())
        self.small.divideByConstant(4)
        self.large.multiplyWithConstant(1.001)
        self.assertAlmostEqual(1001000, self.large.sumOfElements(), 3)
        self.large.divideByConstant(1.001)
        self.random.multiplyWithConstant(3.6)
        self.assertAlmostEqual(self.originalSum * 3.6,
                               self.random.sumOfElements(), 4)
        self.random.divideByConstant(3.6)

    def test_DivideByConstant(self):
        self.small.divideByConstant(4)
        self.assertEqual(2.25, self.small.sumOfElements())
        self.small.multiplyWithConstant(4)
        self.large.divideByConstant(10)
        self.assertAlmostEqual(100000, self.large.sumOfElements(), 3)
        self.large.multiplyWithConstant(10)
        self.random.divideByConstant(3.6)
        self.assertAlmostEqual(self.originalSum / 3.6,
                               self.random.sumOfElements(), 4)
        self.random.multiplyWithConstant(3.6)

    def test_Add(self):
        self.random.add(self.identity)
        self.assertAlmostEqual(self.originalSum + 100,
                               self.random.sumOfElements(), 4)
        self.random.subtract(self.identity)

    def test_AddVector(self):
        self.large.addRowVector(4, self.V)
        self.assertEqual(1001000, self.large.sumOfElements(), 0.0)
        self.V.multiply(-1.0)
        self.large.addRowVector(4, self.V)
        self.V.multiply(-1.0)

    def test_Subtract(self):
        self.random.subtract(self.identity)
        self.assertAlmostEqual(self.originalSum - 100,
                               self.random.sumOfElements(), 4)
        self.random.add(self.identity)

    def test_MultiplyWithVectorFromLeft(self):
        result = self.small.multiplyWithVectorFromLeft(self.v)
        self.assertEqual(9, result.sumOfElements())
        result = self.large.multiplyWithVectorFromLeft(self.V)
        self.assertEqual(1000000, result.sumOfElements())
        result = self.random.multiplyWithVectorFromLeft(self.vr)
        self.assertAlmostEqual(self.originalSum, result.sumOfElements(), 4)

    def test_MultiplyWithVectorFromRight(self):
        result = self.small.multiplyWithVectorFromRight(self.v)
        self.assertEqual(9, result.sumOfElements())
        result = self.large.multiplyWithVectorFromRight(self.V)
        self.assertEqual(1000000, result.sumOfElements())
        result = self.random.multiplyWithVectorFromRight(self.vr)
        self.assertAlmostEqual(self.originalSum, result.sumOfElements(), 4)

    def test_ColumnSum(self):
        self.assertEqual(3, self.small.columnSum(randrange(3)))
        self.assertEqual(1000, self.large.columnSum(randrange(1000)))
        self.assertEqual(1, self.identity.columnSum(randrange(100)))

    def test_SumOfRows(self):
        self.assertEqual(9, self.small.sumOfRows().sumOfElements())
        self.assertEqual(1000000, self.large.sumOfRows().sumOfElements())
        self.assertEqual(100, self.identity.sumOfRows().sumOfElements())
        self.assertAlmostEqual(self.originalSum,
                               self.random.sumOfRows().sumOfElements(), 3)

    def test_RowSum(self):
        self.assertEqual(3, self.small.rowSum(randrange(3)))
        self.assertEqual(1000, self.large.rowSum(randrange(1000)))
        self.assertEqual(1, self.identity.rowSum(randrange(100)))

    def test_Multiply(self):
        result = self.small.multiply(self.small)
        self.assertEqual(27, result.sumOfElements())
        result = self.medium.multiply(self.medium)
        self.assertEqual(1000000.0, result.sumOfElements())
        result = self.random.multiply(self.identity)
        self.assertEqual(self.originalSum, result.sumOfElements())
        result = self.identity.multiply(self.random)
        self.assertEqual(self.originalSum, result.sumOfElements())

    def test_ElementProduct(self):
        result = self.small.elementProduct(self.small)
        self.assertEqual(9, result.sumOfElements())
        result = self.large.elementProduct(self.large)
        self.assertEqual(1000000, result.sumOfElements())
        result = self.random.elementProduct(self.identity)
        self.assertEqual(result.trace(), result.sumOfElements())

    def test_SumOfElements(self):
        self.assertEqual(9, self.small.sumOfElements())
        self.assertEqual(1000000, self.large.sumOfElements())
        self.assertEqual(100, self.identity.sumOfElements())
        self.assertEqual(self.originalSum, self.random.sumOfElements())

    def test_Trace(self):
        self.assertEqual(3, self.small.trace())
        self.assertEqual(1000, self.large.trace())
        self.assertEqual(100, self.identity.trace())

    def test_Transpose(self):
        self.assertEqual(9, self.small.transpose().sumOfElements())
        self.assertEqual(1000000, self.large.transpose().sumOfElements())
        self.assertEqual(100, self.identity.transpose().sumOfElements())
        self.assertAlmostEqual(self.originalSum,
                               self.random.transpose().sumOfElements(), 3)

    def test_IsSymmetric(self):
        self.assertTrue(self.small.isSymmetric())
        self.assertTrue(self.large.isSymmetric())
        self.assertTrue(self.identity.isSymmetric())
        self.assertFalse(self.random.isSymmetric())

    def test_Determinant(self):
        self.assertEqual(0, self.small.determinant())
        self.assertEqual(0, self.large.determinant())
        self.assertEqual(1, self.identity.determinant())

    def test_Inverse(self):
        self.identity.inverse()
        self.assertEqual(100, self.identity.sumOfElements())
        self.random.inverse()
        self.random.inverse()
        self.assertAlmostEqual(self.originalSum, self.random.sumOfElements(),
                               5)

    def test_Characteristics(self):
        vectors = self.small.characteristics()
        self.assertEqual(2, len(vectors))
        vectors = self.identity.characteristics()
        self.assertEqual(100, len(vectors))
        vectors = self.medium.characteristics()
        self.assertEqual(46, len(vectors))