Ejemplo n.º 1
0
    def test_corr_dist(self):
        """
        Test the calculation of the correlation matrix distance
        """

        tic = TIC()

        # Taking the original correlation matrix and the TIC matrix
        etf_prices = self.returns_data.iloc[:, :5]
        etf_corr = etf_prices.corr()

        tic_corr = pd.DataFrame(
            [[1, 0.72177129, -0.30629381, 0.7144813, 0.72177129],
             [0.72177129, 1, -0.30729469, 0.716816, 0.72412981],
             [-0.30629381, -0.30729469, 1, -0.30419096, -0.30729469],
             [0.7144813, 0.716816, -0.30419096, 1, 0.716816],
             [0.72177129, 0.72412981, -0.30729469, 0.716816, 1]],
            index=etf_corr.index,
            columns=etf_corr.index)

        # Expected distance between the matrices
        dist_ecpected = 0.0130404424083

        # Calculating the distance between the correlation matrices
        distance = tic.corr_dist(etf_corr, tic_corr)

        # Testing that the calculated distance is right
        np.testing.assert_almost_equal(distance, dist_ecpected, decimal=2)
Ejemplo n.º 2
0
    def test_link2corr(self):
        """
        Test the process of deriving a correlation matrix from the linkage object
        """

        tic = TIC()

        # Creating a sample input
        etf_prices = self.returns_data.iloc[:, :5]
        etf_corr = etf_prices.corr()

        lnk = np.array([(1, 4, 0.10526126, 2), (5, 3, 0.23105119, 3),
                        (0, 6, 0.40104189, 4), (7, 2, 0.59567056, 5)],
                       dtype=[('i0', int), ('i1', int), ('dist', float),
                              ('num', int)])
        lbls = etf_corr.index

        # Expected correlation matrix
        corr_expected = pd.DataFrame(
            [[1, 0.678331, 0.290353, 0.678331, 0.678331],
             [0.678331, 1, 0.290353, 0.893231, 0.977840],
             [0.290353, 0.290353, 1, 0.290353, 0.290353],
             [0.678331, 0.893231, 0.290353, 1, 0.893231],
             [0.678331, 0.977840, 0.290353, 0.893231, 1]],
            index=lbls,
            columns=lbls)

        # Getting the correlation matrix
        corr = tic._link2corr(lnk, lbls)

        # Testing that the correlation matrix is right
        np.testing.assert_almost_equal(np.array(corr),
                                       np.array(corr_expected),
                                       decimal=2)
Ejemplo n.º 3
0
    def test_update_dist():
        """
        Testing the update of the general distance matrix to take the new clusters into account
        """

        tic = TIC()

        # Creating a sample input
        dist0 = pd.DataFrame([[0.0, 0.595671], [0.595671, 0.0]],
                             columns=[20, 10],
                             index=[20, 10])
        lnk0 = np.array([[1, 4, 0.10526126, 2], [5, 3, 0.23105119, 3],
                         [0, 6, 0.40104189, 4], [7, 2, 0.59567056, 5]])
        lnk_ = np.array([[7, 2, 0.59567056, 5]])
        items0 = [1010, 1020, 20, 1040, 1030, 5, 6, 10, 8]

        # Alternative criterion - simple average
        alt_criterion = pd.DataFrame.mean

        # Expected distance matrix
        dist_expected = pd.DataFrame([0], columns=[8], index=[8])

        # Calculating distance matrix
        dist_new = tic._update_dist(dist0, lnk0, lnk_, items0, criterion=None)

        # Calculating the distance matrix with an alternative criterion
        dist_new_alt = tic._update_dist(dist0,
                                        lnk0,
                                        lnk_,
                                        items0,
                                        criterion=alt_criterion)

        # Testing that the obtained distance matrix is right
        np.testing.assert_almost_equal(np.array(dist_new),
                                       np.array(dist_expected),
                                       decimal=2)

        # Testing that the obtained distance matrix is right
        np.testing.assert_almost_equal(np.array(dist_new_alt),
                                       np.array(dist_expected),
                                       decimal=2)
Ejemplo n.º 4
0
    def test_get_linkage_corr(self):
        """
        Testing the creation of a linkage object from empirical correlation matrix and tree graph
        """

        tic = TIC()

        # Taking the first 5 ETFs for test purposes
        etf_prices = self.returns_data.iloc[:, :5]
        etf_classification_tree = self.classification_tree.iloc[:5]

        # Expected dendrogram
        dend_expected = np.array([(1, 4, 0.16853455, 2), (5, 0, 0.22227236, 3),
                                  (3, 6, 0.26530006, 4),
                                  (7, 2, 0.76268129, 5)])

        # Calculating simple correlation matrix for the TIC algorithm input
        etf_corr = etf_prices.corr()

        # Also testing on the tree that has a single element on the top level
        etf_classification_tree_alt = etf_classification_tree.copy()
        etf_classification_tree_alt['All'] = 0

        # Using the function
        dendrogram = tic._get_linkage_corr(etf_classification_tree, etf_corr)

        # Using the function on a tree with an extra level
        dendrogram_alt = tic._get_linkage_corr(etf_classification_tree_alt,
                                               etf_corr)

        # Testing that the obtained dendrogram is right
        # Transforming to DataFrames to get the same types
        np.testing.assert_almost_equal(np.array(pd.DataFrame(dendrogram)),
                                       np.array(pd.DataFrame(dend_expected)),
                                       decimal=2)

        # Checking that the tree with an extra level returned the same result
        np.testing.assert_almost_equal(np.array(pd.DataFrame(dendrogram)),
                                       np.array(pd.DataFrame(dendrogram_alt)),
                                       decimal=2)
Ejemplo n.º 5
0
    def test_get_atoms():
        """
        Testing the obtaining of the atoms included in an element from a linkage object
        """

        tic = TIC()

        # Creating a sample input
        lnk = np.array([(1, 4, 0.10526126, 2), (5, 3, 0.23105119, 3),
                        (0, 6, 0.40104189, 4), (7, 2, 0.59567056, 5)],
                       dtype=[('i0', int), ('i1', int), ('dist', float),
                              ('num', int)])
        item = 5

        # Expected list of atoms
        atoms_expected = [1, 4]

        # Getting the atoms list
        atoms = tic._get_atoms(lnk, item)

        # Testing that the obtained atoms are right
        np.testing.assert_almost_equal(atoms, atoms_expected, decimal=2)
Ejemplo n.º 6
0
    def test_link_clusters():
        """
        Testing the transformation of linkage object from local linkage to global linkage
        """

        tic = TIC()

        # Creating a sample input
        lnk0 = np.array([])
        lnk1 = np.array([[1, 3, 0.10526126, 2], [4, 2, 0.23105119, 3],
                         [0, 5, 0.40104189, 4]])
        items0 = [1010, 1020, 2030, 1040, 1030]
        items1 = [1010, 1020, 1040, 1030]

        # Expected link array
        link_expected = np.array([[1, 4, 0.10526126, 2], [5, 3, 0.23105119, 3],
                                  [0, 6, 0.40104189, 4]])

        # Calculating new link array
        link_new = tic._link_clusters(lnk0, lnk1, items0, items1)

        # Testing that the obtained new link is right
        np.testing.assert_almost_equal(link_new, link_expected, decimal=2)
Ejemplo n.º 7
0
    def test_tic_correlation(self):
        """
        Test the calculation the Theory-Implies Correlation (TIC) matrix.
        """

        tic = TIC()

        # Taking the first 5 ETFs for test purposes
        etf_prices = self.returns_data.iloc[:, :5]
        etf_classification_tree = self.classification_tree.iloc[:5]

        # Calculating simple correlation matrix for the TIC algorithm input
        etf_corr = etf_prices.corr()

        # Calculating the relation of the number of observations to the number of elements
        tn_relation = etf_prices.shape[0] / etf_prices.shape[1]

        # Expected correlation matrix
        corr_expected = pd.DataFrame(
            [[1, 0.72177129, -0.30629381, 0.7144813, 0.72177129],
             [0.72177129, 1, -0.30729469, 0.716816, 0.72412981],
             [-0.30629381, -0.30729469, 1, -0.30419096, -0.30729469],
             [0.7144813, 0.716816, -0.30419096, 1, 0.716816],
             [0.72177129, 0.72412981, -0.30729469, 0.716816, 1]],
            index=etf_corr.index,
            columns=etf_corr.index)

        # Getting the correlation matrix
        corr = tic.tic_correlation(etf_classification_tree,
                                   etf_corr,
                                   tn_relation=tn_relation,
                                   kde_bwidth=0.25)

        # Testing that the correlation matrix is right
        np.testing.assert_almost_equal(np.array(corr),
                                       np.array(corr_expected),
                                       decimal=2)