Python calculate_mi Examples, idtxl.idtxl_utils.calculate_mi Python Examples

Example #1

0

Show file

File: test_bivariate_mi.py Project: monperrus/IDTxl

def test_discrete_input():
    """Test bivariate MI estimation from discrete data."""
    # Generate Gaussian test data
    covariance = 0.4
    data = _get_discrete_gauss_data(covariance=covariance,
                                    n=10000,
                                    delay=1,
                                    normalise=False,
                                    seed=SEED)
    corr_expected = covariance / (1 * np.sqrt(covariance**2 +
                                              (1 - covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    settings = {
        'cmi_estimator': 'JidtDiscreteCMI',
        'discretise_method': 'none',
        'n_discrete_bins': 5,  # alphabet size of the variables analysed
        'n_perm_max_stat': 21,
        'n_perm_omnibus': 30,
        'n_perm_max_seq': 30,
        'min_lag_sources': 1,
        'max_lag_sources': 2
    }
    nw = BivariateMI()
    res = nw.analyse_single_target(settings=settings, data=data, target=1)
    assert np.isclose(
        res._single_target[1].omnibus_mi, expected_mi, atol=0.05), (
            'Estimated MI for discrete variables is not correct. Expected: '
            '{0}, Actual results: {1}.'.format(expected_mi,
                                               res['selected_sources_mi'][0]))

Example #2

0

Show file

File: test_estimators_jidt.py Project: SimonStreicher/IDTxl

def _get_gauss_data(n=10000, covariance=0.4, expand=True):
    """Generate correlated and uncorrelated Gaussian variables.

    Generate two sets of random normal data, where one set has a given
    covariance and the second is uncorrelated.
    """
    corr_expected = covariance / (1 * np.sqrt(covariance**2 + (1-covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    src_corr = [rn.normalvariate(0, 1) for r in range(n)]  # correlated src
    src_uncorr = [rn.normalvariate(0, 1) for r in range(n)]  # uncorrelated src
    target = [sum(pair) for pair in zip(
                    [covariance * y for y in src_corr[0:n]],
                    [(1-covariance) * y for y in [
                        rn.normalvariate(0, 1) for r in range(n)]])]
    # Make everything numpy arrays so jpype understands it. Add an additional
    # axis if requested (MI/CMI estimators accept 2D arrays, TE/AIS only 1D).
    if expand:
        src_corr = np.expand_dims(np.array(src_corr), axis=1)
        src_uncorr = np.expand_dims(np.array(src_uncorr), axis=1)
        target = np.expand_dims(np.array(target), axis=1)
    else:
        src_corr = np.array(src_corr)
        src_uncorr = np.array(src_uncorr)
        target = np.array(target)
    return expected_mi, src_corr, src_uncorr, target

Example #3

0

Show file

def _get_gauss_data(n=10000, covariance=0.4, expand=True):
    """Generate correlated and uncorrelated Gaussian variables.

    Generate two sets of random normal data, where one set has a given
    covariance and the second is uncorrelated.
    """
    corr_expected = covariance / (1 * np.sqrt(covariance**2 +
                                              (1 - covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    src_corr = [rn.normalvariate(0, 1) for r in range(n)]  # correlated src
    src_uncorr = [rn.normalvariate(0, 1) for r in range(n)]  # uncorrelated src
    target = [
        sum(pair)
        for pair in zip([covariance * y for y in src_corr[0:n]],
                        [(1 - covariance) * y
                         for y in [rn.normalvariate(0, 1) for r in range(n)]])
    ]
    # Make everything numpy arrays so jpype understands it. Add an additional
    # axis if requested (MI/CMI estimators accept 2D arrays, TE/AIS only 1D).
    if expand:
        src_corr = np.expand_dims(np.array(src_corr), axis=1)
        src_uncorr = np.expand_dims(np.array(src_uncorr), axis=1)
        target = np.expand_dims(np.array(target), axis=1)
    else:
        src_corr = np.array(src_corr)
        src_uncorr = np.array(src_uncorr)
        target = np.array(target)
    return expected_mi, src_corr, src_uncorr, target

Example #4

0

Show file

def test_discrete_input():
    """Test bivariate TE estimation from discrete data."""
    # Generate Gaussian test data
    covariance = 0.4
    n = 10000
    delay = 1
    source = np.random.normal(0, 1, size=n)
    target = (covariance * source +
              (1 - covariance) * np.random.normal(0, 1, size=n))
    corr_expected = covariance / (1 * np.sqrt(covariance**2 +
                                              (1 - covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    source = source[delay:]
    target = target[:-delay]

    # Discretise data
    settings = {'discretise_method': 'equal', 'n_discrete_bins': 5}
    est = JidtDiscreteCMI(settings)
    source_dis, target_dis = est._discretise_vars(var1=source, var2=target)
    data = Data(np.vstack((source_dis, target_dis)),
                dim_order='ps',
                normalise=False)
    settings = {
        'cmi_estimator': 'JidtDiscreteCMI',
        'discretise_method': 'none',
        'n_discrete_bins': 5,  # alphabet size of the variables analysed
        'n_perm_max_stat': 21,
        'n_perm_omnibus': 30,
        'n_perm_max_seq': 30,
        'min_lag_sources': 1,
        'max_lag_sources': 2,
        'max_lag_target': 1
    }
    nw = BivariateTE()
    res = nw.analyse_single_target(settings=settings, data=data, target=1)
    assert np.isclose(
        res._single_target[1].omnibus_te, expected_mi, atol=0.05), (
            'Estimated TE for discrete variables is not correct. Expected: '
            '{0}, Actual results: {1}.'.format(
                expected_mi, res._single_target[1].omnibus_te))

Example #5

0

Show file

File: test_network_comparison.py Project: benfulcher/IDTxl

def test_calculate_cmi_all_links():
    """Test if the CMI is estimated correctly."""
    data = Data()
    n = 1000
    cov = 0.4
    source = [rn.normalvariate(0, 1) for r in range(n)]  # correlated src
    target = [0] + [
        sum(pair) for pair in zip([cov * y for y in source[0:n - 1]], [
            (1 - cov) * y
            for y in [rn.normalvariate(0, 1) for r in range(n - 1)]
        ])
    ]
    data.set_data(np.vstack((source, target)), 'ps')
    res_0 = np.load(
        os.path.join(os.path.dirname(__file__), 'data/mute_results_0.p'))
    comp_settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'n_perm_max_stat': 50,
        'n_perm_min_stat': 50,
        'n_perm_omnibus': 200,
        'n_perm_max_seq': 50,
        'tail': 'two',
        'n_perm_comp': 6,
        'alpha_comp': 0.2,
        'stats_type': 'dependent'
    }
    comp = NetworkComparison()
    comp._initialise(comp_settings)
    comp._create_union(res_0)
    comp.union._single_target[1]['selected_vars_sources'] = [(0, 4)]
    cmi = comp._calculate_cmi_all_links(data)
    corr_expected = cov / (1 * np.sqrt(cov**2 + (1 - cov)**2))
    expected_cmi = calculate_mi(corr_expected)
    print('correlated Gaussians: TE result {0:.4f} bits; expected to be '
          '{1:0.4f} bit for the copy'.format(cmi[1][0], expected_cmi))
    np.testing.assert_almost_equal(
        cmi[1][0],
        expected_cmi,
        decimal=1,
        err_msg='when calculating cmi for correlated Gaussians.')

Example #6

0

Show file

File: test_bivariate_te.py Project: SimonStreicher/IDTxl

def test_discrete_input():
    """Test bivariate TE estimation from discrete data."""
    # Generate Gaussian test data
    covariance = 0.4
    n = 10000
    delay = 1
    source = np.random.normal(0, 1, size=n)
    target = (covariance * source + (1 - covariance) *
              np.random.normal(0, 1, size=n))
    corr_expected = covariance / (
        1 * np.sqrt(covariance**2 + (1-covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    source = source[delay:]
    target = target[:-delay]

    # Discretise data
    settings = {'discretise_method': 'equal',
                'n_discrete_bins': 5}
    est = JidtDiscreteCMI(settings)
    source_dis, target_dis = est._discretise_vars(var1=source, var2=target)
    data = Data(np.vstack((source_dis, target_dis)),
                dim_order='ps', normalise=False)
    settings = {
        'cmi_estimator': 'JidtDiscreteCMI',
        'discretise_method': 'none',
        'n_discrete_bins': 5,  # alphabet size of the variables analysed
        'n_perm_max_stat': 21,
        'n_perm_omnibus': 30,
        'n_perm_max_seq': 30,
        'min_lag_sources': 1,
        'max_lag_sources': 2,
        'max_lag_target': 1}
    nw = BivariateTE()
    res = nw.analyse_single_target(settings=settings, data=data, target=1)
    assert np.isclose(
        res._single_target[1].omnibus_te, expected_mi, atol=0.05), (
            'Estimated TE for discrete variables is not correct. Expected: '
            '{0}, Actual results: {1}.'.format(
                expected_mi, res._single_target[1].omnibus_te))

Example #7

0

Show file

File: test_network_comparison.py Project: SimonStreicher/IDTxl

def test_calculate_cmi_all_links():
    """Test if the CMI is estimated correctly."""
    data = Data()
    n = 1000
    cov = 0.4
    source = [rn.normalvariate(0, 1) for r in range(n)]  # correlated src
    target = [0] + [sum(pair) for pair in zip(
        [cov * y for y in source[0:n - 1]],
        [(1 - cov) * y for y in
            [rn.normalvariate(0, 1) for r in range(n - 1)]])]
    data.set_data(np.vstack((source, target)), 'ps')
    res_0 = np.load(os.path.join(os.path.dirname(__file__),
                    'data/mute_results_0.p'))
    comp_settings = {
        'cmi_estimator': 'JidtKraskovCMI',
        'n_perm_max_stat': 50,
        'n_perm_min_stat': 50,
        'n_perm_omnibus': 200,
        'n_perm_max_seq': 50,
        'tail': 'two',
        'n_perm_comp': 6,
        'alpha_comp': 0.2,
        'stats_type': 'dependent'
        }
    comp = NetworkComparison()
    comp._initialise(comp_settings)
    comp._create_union(res_0)
    comp.union._single_target[1]['selected_vars_sources'] = [(0, 4)]
    cmi = comp._calculate_cmi_all_links(data)
    corr_expected = cov / (1 * np.sqrt(cov**2 + (1-cov)**2))
    expected_cmi = calculate_mi(corr_expected)
    print('correlated Gaussians: TE result {0:.4f} bits; expected to be '
          '{1:0.4f} bit for the copy'.format(cmi[1][0], expected_cmi))
    np.testing.assert_almost_equal(
                   cmi[1][0], expected_cmi, decimal=1,
                   err_msg='when calculating cmi for correlated Gaussians.')

Example #8

0

Show file

def test_multivariate_te_corr_gaussian(estimator=None):
    """Test multivariate TE estimation on correlated Gaussians.

    Run the multivariate TE algorithm on two sets of random Gaussian data with
    a given covariance. The second data set is shifted by one sample creating
    a source-target delay of one sample. This example is modeled after the
    JIDT demo 4 for transfer entropy. The resulting TE can be compared to the
    analytical result (but expect some error in the estimate).

    The simulated delay is 1 sample, i.e., the algorithm should find
    significant TE from sample (0, 1), a sample in process 0 with lag/delay 1.
    The final target sample should always be (1, 1), the mandatory sample at
    lat 1, because there is no memory in the process.

    Note:
        This test runs considerably faster than other system tests.
        This produces strange small values for non-coupled sources.  TODO
    """
    if estimator is None:
        estimator = 'JidtKraskovCMI'

    n = 1000
    cov = 0.4
    source = [rn.normalvariate(0, 1) for r in range(n)]
    target = [
        sum(pair)
        for pair in zip([cov * y for y in source],
                        [(1 - cov) * y
                         for y in [rn.normalvariate(0, 1) for r in range(n)]])
    ]
    # Cast everything to numpy so the idtxl estimator understands it.
    source = np.expand_dims(np.array(source), axis=1)
    target = np.expand_dims(np.array(target), axis=1)

    data = Data(normalise=True)
    data.set_data(np.vstack((source[1:].T, target[:-1].T)), 'ps')
    settings = {
        'cmi_estimator': estimator,
        'max_lag_sources': 5,
        'min_lag_sources': 1,
        'max_lag_target': 5,
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_omnibus': 21,
        'n_perm_max_seq': 21,
    }
    random_analysis = MultivariateTE()
    results = random_analysis.analyse_single_target(settings, data, 1)

    # Assert that there are significant conditionals from the source for target
    # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for
    # examples 1 and 2 respectively. The maximum errors were 0.093841 and
    # 0.05833172 repectively. This inspired the following error boundaries.
    corr_expected = cov / (1 * np.sqrt(cov**2 + (1 - cov)**2))
    expected_res = calculate_mi(corr_expected)
    estimated_res = results.get_single_target(1, fdr=False).omnibus_te
    diff = np.abs(estimated_res - expected_res)
    print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)')
    print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:'
           '{2:2.2f} % ').format(estimated_res, expected_res,
                                 diff / expected_res))
    assert (diff < 0.1), ('Multivariate TE calculation for correlated '
                          'Gaussians failed (error larger 0.1: {0}, expected: '
                          '{1}, actual: {2}).'.format(diff, expected_res,
                                                      estimated_res))

Example #9

0

Show file

File: test_results.py Project: SimonStreicher/IDTxl

def test_results_network_inference():
    """Test results class for multivariate TE network inference."""
    covariance = 0.4
    n = 10000
    delay = 1
    normalisation = False
    source = np.random.normal(0, 1, size=n)
    target_1 = (covariance * source + (1 - covariance) *
                np.random.normal(0, 1, size=n))
    target_2 = (covariance * source + (1 - covariance) *
                np.random.normal(0, 1, size=n))
    corr_expected = covariance / (
        1 * np.sqrt(covariance**2 + (1-covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    source = source[delay:]
    target_1 = target_1[:-delay]
    target_2 = target_2[:-delay]

    # Discretise data for speed
    settings_dis = {'discretise_method': 'equal',
                    'n_discrete_bins': 5}
    est = JidtDiscreteCMI(settings_dis)
    source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1)
    source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2)
    data = Data(np.vstack((source_dis, target_1_dis, target_2_dis)),
                dim_order='ps', normalise=normalisation)

    nw = MultivariateTE()
    # TE - single target
    res_single_multi_te = nw.analyse_single_target(
        settings=settings, data=data, target=1)
    # TE whole network
    res_network_multi_te = nw.analyse_network(settings=settings, data=data)

    nw = BivariateTE()
    # TE - single target
    res_single_biv_te = nw.analyse_single_target(
        settings=settings, data=data, target=1)
    # TE whole network
    res_network_biv_te = nw.analyse_network(settings=settings, data=data)

    nw = MultivariateMI()
    # TE - single target
    res_single_multi_mi = nw.analyse_single_target(
        settings=settings, data=data, target=1)
    # TE whole network
    res_network_multi_mi = nw.analyse_network(settings=settings, data=data)

    nw = BivariateMI()
    # TE - single target
    res_single_biv_mi = nw.analyse_single_target(
        settings=settings, data=data, target=1)
    # TE whole network
    res_network_biv_mi = nw.analyse_network(settings=settings, data=data)

    res_te = [res_single_multi_te, res_network_multi_te, res_single_biv_te,
              res_network_biv_te]
    res_mi = [res_single_multi_mi, res_network_multi_mi, res_single_biv_mi,
              res_network_biv_mi]
    res_all = res_te + res_mi

    # Check estimated values
    for res in res_te:
        est_te = res._single_target[1].omnibus_te
        assert np.isclose(est_te, expected_mi, atol=0.05), (
            'Estimated TE for discrete variables is not correct. Expected: '
            '{0}, Actual results: {1}.'.format(expected_mi, est_te))
    for res in res_mi:
        est_mi = res._single_target[1].omnibus_mi
        assert np.isclose(est_mi, expected_mi, atol=0.05), (
            'Estimated TE for discrete variables is not correct. Expected: '
            '{0}, Actual results: {1}.'.format(expected_mi, est_mi))

    est_te = res_network_multi_te._single_target[2].omnibus_te
    assert np.isclose(est_te, expected_mi, atol=0.05), (
        'Estimated TE for discrete variables is not correct. Expected: {0}, '
        'Actual results: {1}.'.format(expected_mi, est_te))
    est_mi = res_network_multi_mi._single_target[2].omnibus_mi
    assert np.isclose(est_mi, expected_mi, atol=0.05), (
        'Estimated TE for discrete variables is not correct. Expected: {0}, '
        'Actual results: {1}.'.format(expected_mi, est_mi))

    # Check data parameters in results objects
    n_nodes = 3
    n_realisations = n - delay - max(
        settings['max_lag_sources'], settings['max_lag_target'])
    for res in res_all:
        assert res.data_properties.n_nodes == n_nodes, 'Incorrect no. nodes.'
        assert res.data_properties.n_nodes == n_nodes, 'Incorrect no. nodes.'
        assert res.data_properties.n_realisations == n_realisations, (
            'Incorrect no. realisations.')
        assert res.data_properties.n_realisations == n_realisations, (
            'Incorrect no. realisations.')
        assert res.data_properties.normalised == normalisation, (
            'Incorrect value for data normalisation.')
        assert res.data_properties.normalised == normalisation, (
            'Incorrect value for data normalisation.')
        adj_matrix = res.get_adjacency_matrix('binary', fdr=False)
        assert adj_matrix.shape[0] == n_nodes, (
            'Incorrect number of rows in adjacency matrix.')
        assert adj_matrix.shape[1] == n_nodes, (
            'Incorrect number of columns in adjacency matrix.')
        assert adj_matrix.shape[0] == n_nodes, (
            'Incorrect number of rows in adjacency matrix.')
        assert adj_matrix.shape[1] == n_nodes, (
            'Incorrect number of columns in adjacency matrix.')

Example #10

0

Show file

File: test_results.py Project: SimonStreicher/IDTxl

def test_delay_reconstruction():
    """Test the reconstruction of information transfer delays from results."""
    covariance = 0.4
    corr_expected = covariance / (
        1 * np.sqrt(covariance**2 + (1-covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    n = 10000
    delay_1 = 1
    delay_2 = 3
    delay_3 = 5
    normalisation = False
    source = np.random.normal(0, 1, size=n)
    target_1 = (covariance * source + (1 - covariance) *
                np.random.normal(0, 1, size=n))
    target_2 = (covariance * source + (1 - covariance) *
                np.random.normal(0, 1, size=n))
    target_3 = (covariance * source + (1 - covariance) *
                np.random.normal(0, 1, size=n))
    source = source[delay_3:]
    target_1 = target_1[(delay_3-delay_1):-delay_1]
    target_2 = target_2[(delay_3-delay_2):-delay_2]
    target_3 = target_3[:-delay_3]

    # Discretise data for speed
    settings_dis = {'discretise_method': 'equal',
                    'n_discrete_bins': 5}
    est = JidtDiscreteCMI(settings_dis)
    source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1)
    source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2)
    source_dis, target_3_dis = est._discretise_vars(var1=source, var2=target_3)
    data = Data(
        np.vstack((source_dis, target_1_dis, target_2_dis, target_3_dis)),
        dim_order='ps', normalise=normalisation)

    nw = MultivariateTE()
    settings = {
        'cmi_estimator': 'JidtDiscreteCMI',
        'discretise_method': 'none',
        'n_discrete_bins': 5,  # alphabet size of the variables analysed
        'n_perm_max_stat': 21,
        'n_perm_omnibus': 30,
        'n_perm_max_seq': 30,
        'min_lag_sources': 1,
        'max_lag_sources': delay_3 + 1,
        'max_lag_target': 1}

    res_network = nw.analyse_single_target(
        settings=settings, data=data, target=1)
    res_network.combine_results(nw.analyse_single_target(
        settings=settings, data=data, target=2))
    res_network.combine_results(nw.analyse_single_target(
        settings=settings, data=data, target=3))
    adj_mat = res_network.get_adjacency_matrix('max_te_lag', fdr=False)
    print(adj_mat)
    assert adj_mat[0, 1] == delay_1, ('Estimate for delay 1 is not correct.')
    assert adj_mat[0, 2] == delay_2, ('Estimate for delay 2 is not correct.')
    assert adj_mat[0, 3] == delay_3, ('Estimate for delay 3 is not correct.')

    for target in range(1, 4):
        est_mi = res_network._single_target[target].omnibus_te
        assert np.isclose(est_mi, expected_mi, atol=0.05), (
            'Estimated TE for target {0} is not correct. Expected: {1}, '
            'Actual results: {2}.'.format(target, expected_mi, est_mi))

Example #11

0

Show file

File: systemtest_multivariate_te.py Project: SimonStreicher/IDTxl

def test_multivariate_te_corr_gaussian(estimator=None):
    """Test multivariate TE estimation on correlated Gaussians.

    Run the multivariate TE algorithm on two sets of random Gaussian data with
    a given covariance. The second data set is shifted by one sample creating
    a source-target delay of one sample. This example is modeled after the
    JIDT demo 4 for transfer entropy. The resulting TE can be compared to the
    analytical result (but expect some error in the estimate).

    The simulated delay is 1 sample, i.e., the algorithm should find
    significant TE from sample (0, 1), a sample in process 0 with lag/delay 1.
    The final target sample should always be (1, 1), the mandatory sample at
    lat 1, because there is no memory in the process.

    Note:
        This test runs considerably faster than other system tests.
        This produces strange small values for non-coupled sources.  TODO
    """
    if estimator is None:
        estimator = 'JidtKraskovCMI'

    n = 1000
    cov = 0.4
    source = [rn.normalvariate(0, 1) for r in range(n)]
    target = [sum(pair) for pair in zip(
        [cov * y for y in source],
        [(1 - cov) * y for y in [rn.normalvariate(0, 1) for r in range(n)]])]
    # Cast everything to numpy so the idtxl estimator understands it.
    source = np.expand_dims(np.array(source), axis=1)
    target = np.expand_dims(np.array(target), axis=1)

    data = Data(normalise=True)
    data.set_data(np.vstack((source[1:].T, target[:-1].T)), 'ps')
    settings = {
        'cmi_estimator': estimator,
        'max_lag_sources': 5,
        'min_lag_sources': 1,
        'max_lag_target': 5,
        'n_perm_max_stat': 21,
        'n_perm_min_stat': 21,
        'n_perm_omnibus': 21,
        'n_perm_max_seq': 21,
        }
    random_analysis = MultivariateTE()
    results = random_analysis.analyse_single_target(settings, data, 1)

    # Assert that there are significant conditionals from the source for target
    # 1. For 500 repetitions I got mean errors of 0.02097686 and 0.01454073 for
    # examples 1 and 2 respectively. The maximum errors were 0.093841 and
    # 0.05833172 repectively. This inspired the following error boundaries.
    corr_expected = cov / (1 * np.sqrt(cov**2 + (1-cov)**2))
    expected_res = calculate_mi(corr_expected)
    estimated_res = results.get_single_target(1, fdr=False).omnibus_te
    diff = np.abs(estimated_res - expected_res)
    print('Expected source sample: (0, 1)\nExpected target sample: (1, 1)')
    print(('Estimated TE: {0:5.4f}, analytical result: {1:5.4f}, error:'
           '{2:2.2f} % ').format(
               estimated_res, expected_res, diff / expected_res))
    assert (diff < 0.1), ('Multivariate TE calculation for correlated '
                          'Gaussians failed (error larger 0.1: {0}, expected: '
                          '{1}, actual: {2}).'.format(
                              diff, expected_res, estimated_res))

Example #12

0

Show file

File: demo_core_estimators.py Project: SimonStreicher/IDTxl

import numpy as np
from idtxl.estimators_jidt import (JidtDiscreteAIS, JidtDiscreteCMI,
                                   JidtDiscreteMI, JidtDiscreteTE,
                                   JidtKraskovAIS, JidtKraskovCMI,
                                   JidtKraskovMI, JidtKraskovTE,
                                   JidtGaussianAIS, JidtGaussianCMI,
                                   JidtGaussianMI, JidtGaussianTE)
from idtxl.estimators_opencl import OpenCLKraskovMI, OpenCLKraskovCMI
from idtxl.estimators_pid import SydneyPID, TartuPID
from idtxl.idtxl_utils import calculate_mi

# Generate Gaussian test data
n = 10000
covariance = 0.4
corr_expected = covariance / (1 * np.sqrt(covariance**2 + (1-covariance)**2))
expected_mi = calculate_mi(corr_expected)
source_cor = np.random.normal(0, 1, size=n)  # correlated src
source_uncor = np.random.normal(0, 1, size=n)  # uncorrelated src
target = (covariance * source_cor +
          (1 - covariance) * np.random.normal(0, 1, size=n))

# JIDT Discrete estimators
settings = {'discretise_method': 'equal', 'alph1': 5, 'alph2': 5}
est = JidtDiscreteCMI(settings)
cmi = est.estimate(source_cor, target, source_uncor)
print('Estimated CMI: {0:.5f}, expected CMI: {1:.5f}'.format(cmi, expected_mi))
est = JidtDiscreteMI(settings)
mi = est.estimate(source_cor, target)
print('Estimated MI: {0:.5f}, expected MI: {1:.5f}'.format(mi, expected_mi))
settings['history_target'] = 1
est = JidtDiscreteTE(settings)

Example #13

0

Show file

File: test_results.py Project: monperrus/IDTxl

def test_results_network_inference():
    """Test results class for multivariate TE network inference."""
    covariance = 0.4
    n = 10000
    delay = 1
    normalisation = False
    source = np.random.normal(0, 1, size=n)
    target_1 = (covariance * source +
                (1 - covariance) * np.random.normal(0, 1, size=n))
    target_2 = (covariance * source +
                (1 - covariance) * np.random.normal(0, 1, size=n))
    corr_expected = covariance / (1 * np.sqrt(covariance**2 +
                                              (1 - covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    source = source[delay:]
    target_1 = target_1[:-delay]
    target_2 = target_2[:-delay]

    # Discretise data for speed
    settings_dis = {'discretise_method': 'equal', 'n_discrete_bins': 5}
    est = JidtDiscreteCMI(settings_dis)
    source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1)
    source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2)
    data = Data(np.vstack((source_dis, target_1_dis, target_2_dis)),
                dim_order='ps',
                normalise=normalisation)

    nw = MultivariateTE()
    # TE - single target
    res_single_multi_te = nw.analyse_single_target(settings=settings,
                                                   data=data,
                                                   target=1)
    # TE whole network
    res_network_multi_te = nw.analyse_network(settings=settings, data=data)

    nw = BivariateTE()
    # TE - single target
    res_single_biv_te = nw.analyse_single_target(settings=settings,
                                                 data=data,
                                                 target=1)
    # TE whole network
    res_network_biv_te = nw.analyse_network(settings=settings, data=data)

    nw = MultivariateMI()
    # TE - single target
    res_single_multi_mi = nw.analyse_single_target(settings=settings,
                                                   data=data,
                                                   target=1)
    # TE whole network
    res_network_multi_mi = nw.analyse_network(settings=settings, data=data)

    nw = BivariateMI()
    # TE - single target
    res_single_biv_mi = nw.analyse_single_target(settings=settings,
                                                 data=data,
                                                 target=1)
    # TE whole network
    res_network_biv_mi = nw.analyse_network(settings=settings, data=data)

    res_te = [
        res_single_multi_te, res_network_multi_te, res_single_biv_te,
        res_network_biv_te
    ]
    res_mi = [
        res_single_multi_mi, res_network_multi_mi, res_single_biv_mi,
        res_network_biv_mi
    ]
    res_all = res_te + res_mi

    # Check estimated values
    for res in res_te:
        est_te = res._single_target[1].omnibus_te
        assert np.isclose(est_te, expected_mi, atol=0.05), (
            'Estimated TE for discrete variables is not correct. Expected: '
            '{0}, Actual results: {1}.'.format(expected_mi, est_te))
    for res in res_mi:
        est_mi = res._single_target[1].omnibus_mi
        assert np.isclose(est_mi, expected_mi, atol=0.05), (
            'Estimated TE for discrete variables is not correct. Expected: '
            '{0}, Actual results: {1}.'.format(expected_mi, est_mi))

    est_te = res_network_multi_te._single_target[2].omnibus_te
    assert np.isclose(est_te, expected_mi, atol=0.05), (
        'Estimated TE for discrete variables is not correct. Expected: {0}, '
        'Actual results: {1}.'.format(expected_mi, est_te))
    est_mi = res_network_multi_mi._single_target[2].omnibus_mi
    assert np.isclose(est_mi, expected_mi, atol=0.05), (
        'Estimated TE for discrete variables is not correct. Expected: {0}, '
        'Actual results: {1}.'.format(expected_mi, est_mi))

    # Check data parameters in results objects
    n_nodes = 3
    n_realisations = n - delay - max(settings['max_lag_sources'],
                                     settings['max_lag_target'])
    for res in res_all:
        assert res.data_properties.n_nodes == n_nodes, 'Incorrect no. nodes.'
        assert res.data_properties.n_nodes == n_nodes, 'Incorrect no. nodes.'
        assert res.data_properties.n_realisations == n_realisations, (
            'Incorrect no. realisations.')
        assert res.data_properties.n_realisations == n_realisations, (
            'Incorrect no. realisations.')
        assert res.data_properties.normalised == normalisation, (
            'Incorrect value for data normalisation.')
        assert res.data_properties.normalised == normalisation, (
            'Incorrect value for data normalisation.')
        adj_matrix = res.get_adjacency_matrix('binary', fdr=False)
        assert adj_matrix._edge_matrix.shape[0] == n_nodes, (
            'Incorrect number of rows in adjacency matrix.')
        assert adj_matrix._edge_matrix.shape[1] == n_nodes, (
            'Incorrect number of columns in adjacency matrix.')
        assert adj_matrix._edge_matrix.shape[0] == n_nodes, (
            'Incorrect number of rows in adjacency matrix.')
        assert adj_matrix._edge_matrix.shape[1] == n_nodes, (
            'Incorrect number of columns in adjacency matrix.')

Example #14

0

Show file

File: test_results.py Project: monperrus/IDTxl

def test_delay_reconstruction():
    """Test the reconstruction of information transfer delays from results."""
    covariance = 0.4
    corr_expected = covariance / (1 * np.sqrt(covariance**2 +
                                              (1 - covariance)**2))
    expected_mi = calculate_mi(corr_expected)
    n = 10000
    delay_1 = 1
    delay_2 = 3
    delay_3 = 5
    normalisation = False
    source = np.random.normal(0, 1, size=n)
    target_1 = (covariance * source +
                (1 - covariance) * np.random.normal(0, 1, size=n))
    target_2 = (covariance * source +
                (1 - covariance) * np.random.normal(0, 1, size=n))
    target_3 = (covariance * source +
                (1 - covariance) * np.random.normal(0, 1, size=n))
    source = source[delay_3:]
    target_1 = target_1[(delay_3 - delay_1):-delay_1]
    target_2 = target_2[(delay_3 - delay_2):-delay_2]
    target_3 = target_3[:-delay_3]

    # Discretise data for speed
    settings_dis = {'discretise_method': 'equal', 'n_discrete_bins': 5}
    est = JidtDiscreteCMI(settings_dis)
    source_dis, target_1_dis = est._discretise_vars(var1=source, var2=target_1)
    source_dis, target_2_dis = est._discretise_vars(var1=source, var2=target_2)
    source_dis, target_3_dis = est._discretise_vars(var1=source, var2=target_3)
    data = Data(np.vstack(
        (source_dis, target_1_dis, target_2_dis, target_3_dis)),
                dim_order='ps',
                normalise=normalisation)

    nw = MultivariateTE()
    settings = {
        'cmi_estimator': 'JidtDiscreteCMI',
        'discretise_method': 'none',
        'n_discrete_bins': 5,  # alphabet size of the variables analysed
        'n_perm_max_stat': 21,
        'n_perm_omnibus': 30,
        'n_perm_max_seq': 30,
        'min_lag_sources': 1,
        'max_lag_sources': delay_3 + 1,
        'max_lag_target': 1
    }

    res_network = nw.analyse_single_target(settings=settings,
                                           data=data,
                                           target=1)
    res_network.combine_results(
        nw.analyse_single_target(settings=settings, data=data, target=2))
    res_network.combine_results(
        nw.analyse_single_target(settings=settings, data=data, target=3))
    adj_mat = res_network.get_adjacency_matrix('max_te_lag', fdr=False)
    adj_mat.print_matrix()
    assert adj_mat._weight_matrix[0, 1] == delay_1, (
        'Estimate for delay 1 is not correct.')
    assert adj_mat._weight_matrix[0, 2] == delay_2, (
        'Estimate for delay 2 is not correct.')
    assert adj_mat._weight_matrix[0, 3] == delay_3, (
        'Estimate for delay 3 is not correct.')

    for target in range(1, 4):
        est_mi = res_network._single_target[target].omnibus_te
        assert np.isclose(est_mi, expected_mi, atol=0.05), (
            'Estimated TE for target {0} is not correct. Expected: {1}, '
            'Actual results: {2}.'.format(target, expected_mi, est_mi))

Example #15

0

Show file

"""Estimation from discrete data using IDTxl."""
import numpy as np
from idtxl.estimators_jidt import JidtDiscreteCMI, JidtDiscreteTE
from idtxl.idtxl_utils import calculate_mi
from idtxl.data import Data
from idtxl.multivariate_te import MultivariateTE

# 1 Use core esimtators with data discretisation. Generate Gaussian test data
# and call JIDT Discrete estimators using the build-in discretization.
n = 1000
covariance = 0.4
corr_expected = covariance / (1 * np.sqrt(covariance**2 + (1 - covariance)**2))
expected_mi = calculate_mi(corr_expected)
source_cor = np.random.normal(0, 1, size=n)  # correlated src
source_uncor = np.random.normal(0, 1, size=n)  # uncorrelated src
target = (covariance * source_cor +
          (1 - covariance) * np.random.normal(0, 1, size=n))

settings = {'discretise_method': 'equal', 'n_discrete_bins': 5}
est = JidtDiscreteCMI(settings)
cmi = est.estimate(source_cor, target, source_uncor)
print('Estimated CMI: {0:.5f}, expected CMI: {1:.5f}'.format(cmi, expected_mi))
settings['history_target'] = 1
est = JidtDiscreteTE(settings)
te = est.estimate(source_cor[1:n], target[0:n - 1])
print('Estimated TE: {0:.5f}, expected TE: {1:.5f}'.format(te, expected_mi))

# 2 Use network inference algorithms on discrete data.
n_procs = 5
alphabet_size = 5
data = Data(np.random.randint(0, alphabet_size, size=(n, n_procs)),