Ejemplo n.º 1
0
def fill_params_dict_list_sample_sizes(base_path, do_fast_mgc=False):
    mcorr = DCorr(which_test='unbiased')
    dcorr = DCorr(which_test='biased')
    mantel = DCorr(which_test='mantel')
    mgc = MGC()
    hhg = HHG()
    pearson = RVCorr(which_test='pearson')
    mdmr = MDMR()
    independence_tests = [mcorr, dcorr, mantel, mgc, hhg, pearson, mdmr]

    params_dict_list = []
    for sim_name, sim_func in simulations.items():
        for test in independence_tests:
            params_dict = {
                'independence_test': test,
                'simulation_type': sim_func[1],
                'base_path': base_path,
                'additional_params': {}
            }
            params_dict_list.append(params_dict)
        if do_fast_mgc:
            fast_mgc = MGC()
            additional_params = {"is_fast": True}
            params_dict = {
                'independence_test': fast_mgc,
                'simulation_type': sim_func[1],
                'base_path': base_path,
                'additional_params': additional_params
            }
            params_dict_list.append(params_dict)
    return params_dict_list
Ejemplo n.º 2
0
def fill_params_dict_list_dimensions(do_fast_mgc=False):
    mcorr = DCorr(which_test='unbiased')
    dcorr = DCorr(which_test='biased')
    mantel = DCorr(which_test='mantel')
    mgc = MGC()
    rv_corr = RVCorr(which_test='rv')
    hhg = HHG()
    cca = RVCorr(which_test='cca')
    mdmr = MDMR()
    independence_tests = []  # [mgc, mcorr, dcorr, mantel, rv_corr, cca]

    params_dict_list = []
    for sim_name, sim_func in simulations.items():
        for test in independence_tests:
            params_dict = {
                'independence_test': test,
                'simulation_type': sim_func[1],
                'dim': find_dim(sim_name),
                'additional_params': {}
            }
            params_dict_list.append(params_dict)
        if do_fast_mgc:
            fast_mgc = MGC()
            additional_params = {"is_fast": True}
            params_dict = {
                'independence_test': fast_mgc,
                'simulation_type': sim_func[1],
                'dim': find_dim(sim_name),
                'additional_params': additional_params
            }
            params_dict_list.append(params_dict)

    return params_dict_list
Ejemplo n.º 3
0
def paired_two_sample_test_dcorr(x,
                                 y,
                                 which_test="biased",
                                 compute_distance_matrix=None,
                                 is_fast=False):
    '''
    Compute paired two sample test's DCorr test_statistic

    :param X: is interpreted as either:

        - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for n samples OR
        - a ``[n*p]`` data matrix, a matrix with n samples in p dimensions
    :type X: 2D numpy.array

    :param Y: is interpreted as either:

        - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for n samples OR
        - a ``[n*p]`` data matrix, a matrix with n samples in p dimensions
    :type Y: 2D numpy.array

    :return: paired two sample DCorr test_statistic
    :rtype: float
    '''
    assert x.shape == y.shape, "Matrices X and Y need to be of same dimensions [n, p]"

    dcorr = DCorr(is_paired=True,
                  which_test=which_test,
                  compute_distance_matrix=compute_distance_matrix)

    return dcorr.p_value(x, y, is_fast=is_fast)
Ejemplo n.º 4
0
def fill_params_dict_list_epsilons(base_path, do_fast_mgc=False):
    mcorr = DCorr(which_test='unbiased')
    mgc = MGC()
    manova = Manova()
    independence_tests = [manova, mcorr, mgc]
    three_sample_simulation_types = [1, 2, 3]

    params_dict_list = []
    for sim_type in three_sample_simulation_types:
        for test in independence_tests:
            params_dict = {
                'independence_test': test,
                'simulation_type': sim_type,
                'base_path': base_path,
                'additional_params': {}
            }
            params_dict_list.append(params_dict)
        if do_fast_mgc:
            fast_mgc = MGC()
            additional_params = {"is_fast": True}
            params_dict = {
                'independence_test': fast_mgc,
                'simulation_type': sim_type,
                'base_path': base_path,
                'additional_params': additional_params
            }
            params_dict_list.append(params_dict)
    return params_dict_list
Ejemplo n.º 5
0
def fill_params_dict_list_sample_sizes():
    mcorr = DCorr(which_test='unbiased')
    dcorr = DCorr(which_test='biased')
    mantel = DCorr(which_test='mantel')
    mgc = MGC()
    hhg = HHG()
    pearson = RVCorr(which_test='pearson')
    independence_tests = []  # [mgc, mcorr, dcorr, mantel, pearson]

    params_dict_list = []
    for sim_name, sim_func in simulations.items():
        for test in independence_tests:
            params_dict = {
                'independence_test': test,
                'simulation_type': sim_func[1]
            }
            params_dict_list.append(params_dict)
    return params_dict_list
Ejemplo n.º 6
0
def fill_params_dict_list_dimensions():
    mcorr = DCorr(which_test='unbiased')
    dcorr = DCorr(which_test='biased')
    mantel = DCorr(which_test='mantel')
    mgc = MGC()
    rv_corr = RVCorr(which_test='rv')
    hhg = HHG()
    cca = RVCorr(which_test='cca')
    mdmr = MDMR()
    # initialize additional test
    # add the test that you wish to run in the `independence_tests` list
    independence_tests = []  # [mgc, mcorr, dcorr, mantel, rv_corr, cca]

    params_dict_list = []
    for sim_name, sim_func in simulations.items():
        for test in independence_tests:
            params_dict = {
                'independence_test': test,
                'simulation_type': sim_func[1],
                'dim': find_dim(sim_name)
            }
            params_dict_list.append(params_dict)
    return params_dict_list
Ejemplo n.º 7
0
    def __init__(self,
                 compute_distance_matrix=None,
                 which_test='unbiased',
                 max_lag=0):
        '''
        :param compute_distance_matrix: a function to compute the pairwise distance matrix, given a data matrix
        :type compute_distance_matrix: FunctionType or callable()

        :param which_test: the type of distance covariance estimate to use, can be 'unbiased', 'biased' 'mantel'
        :type which_test: string

        :param max_lag: Maximum lead/lag to check for dependence between X_t and Y_t+j (M parameter)
        :type max_lag: int
        '''
        IndependenceTest.__init__(self)
        if which_test not in ['unbiased', 'biased']:
            raise ValueError('which_test must be unbiased or biased.')
        self.which_test = which_test
        self.dcorr = DCorr(which_test=self.which_test)
        self.max_lag = max_lag
Ejemplo n.º 8
0
def test_power():
    test = DCorr(which_test='unbiased')
    simulation_type = 4
    sim = joint_sim
    sample_sizes = [i for i in range(5, 101, 5)]

    matlab_file_name = './mgcpy/benchmarks/matlab_power_results/sample_size/CorrIndTestType{}N100Dim1.mat'.format(
        simulation_type)
    with h5py.File(matlab_file_name, 'r') as f:
        matlab_results = {}
        for k, v in f.items():
            matlab_results[k] = np.transpose(np.array(v))
    matlab_power = matlab_results['powerM'][0, :]

    estimated_power = np.zeros(len(sample_sizes))
    for i in range(len(sample_sizes)):
        estimated_power[i] = power(test,
                                   sim,
                                   num_samples=sample_sizes[i],
                                   num_dimensions=1)
    assert np.allclose(estimated_power, matlab_power, atol=0.2)
Ejemplo n.º 9
0
def test_dcorr_p_value():
    '''
    test p value
    analytical p value for unbiased dcorr is compared with R package energy
    other p values are compared with the permutation tests in mgc-paper
    the value is the mean and atol is set to 4 times standard deviation
    '''
    dir_name = './mgcpy/independence_tests/unit_tests/dcorr/data/'
    unbiased = DCorr(which_test='unbiased')
    biased = DCorr(which_test='biased')
    mantel = DCorr(which_test='mantel')
    X = np.genfromtxt(dir_name + 'pvalue_X_mtx.csv', delimiter=',')
    Y = np.genfromtxt(dir_name + 'pvalue_Y_mtx.csv', delimiter=',')
    Y = Y[:, np.newaxis]

    # p value
    assert np.allclose(unbiased.p_value(X, Y)[0], 0.0640, atol=0.03)
    assert np.allclose(biased.p_value(X, Y)[0], 0.0510, atol=0.03)
    assert np.allclose(mantel.p_value(X, Y)[0], 0.1020, atol=0.03)

    # p value (faster versions)
    assert np.allclose(unbiased.p_value(X, Y, is_fast=True)[0],
                       0.7429,
                       atol=0.03)
    assert np.allclose(biased.p_value(X, Y, is_fast=True)[0],
                       1 / 1000,
                       atol=0.03)
    assert np.allclose(mantel.p_value(X, Y, is_fast=True)[0],
                       1 / 1000,
                       atol=0.03)
Ejemplo n.º 10
0
def test_dcorr_stat():
    # test the special case when one of the dataset has zero variance
    X = np.array([1, 1, 1])[:, np.newaxis]
    Y = np.array([1, 2, 3])[:, np.newaxis]
    unbiased = DCorr(which_test='unbiased')
    assert np.allclose(unbiased.test_statistic(X, Y)[0], 0)

    dir_name = './mgcpy/independence_tests/unit_tests/dcorr/data/'
    X = np.genfromtxt(dir_name + 'test_stat_X_mtx.csv', delimiter=',')
    Y = np.genfromtxt(dir_name + 'test_stat_Y_mtx.csv', delimiter=',')
    Y = Y[:, np.newaxis]
    unbiased = DCorr(which_test='unbiased')
    biased = DCorr(which_test='biased')
    mantel = DCorr(which_test='mantel')

    # test get_name
    assert unbiased.get_name() == 'unbiased'
    assert biased.get_name() == 'biased'
    assert mantel.get_name() == 'mantel'

    # test statistic
    assert np.allclose(unbiased.test_statistic(X, Y)[0], 0.1174, atol=1e-4)
    assert np.allclose(biased.test_statistic(X, Y)[0], 0.1548, atol=1e-4)
    assert np.allclose(mantel.test_statistic(X, Y)[0], 0.2421, atol=1e-4)

    # test statistic (fast versions)
    assert np.allclose(unbiased.test_statistic(X, Y, is_fast=True)[0],
                       0.1562,
                       atol=1e-4)
    assert np.allclose(biased.test_statistic(X, Y, is_fast=True)[0],
                       0.3974,
                       atol=1e-4)
    assert np.allclose(mantel.test_statistic(X, Y, is_fast=True)[0],
                       0.3392,
                       atol=1e-4)

    # additional test for mantel
    X = np.genfromtxt(dir_name + 'mantel_test_stat_X_mtx.csv', delimiter=',')
    Y = np.genfromtxt(dir_name + 'mantel_test_stat_Y_mtx.csv', delimiter=',')
    X = X[:, np.newaxis]
    Y = Y[:, np.newaxis]
    assert np.allclose(mantel.test_statistic(X, Y)[0], 0.7115, atol=1e-4)
    assert np.allclose(mantel.test_statistic(X, Y, is_fast=True)[0],
                       0.4575,
                       atol=1e-4)  # faster version
Ejemplo n.º 11
0
sns.set_palette("deep")

n_sims = 100
n_samples = 100
n_components = 2
n_permutations = 1000
size = (n_samples, n_components)

#%% mgcpy package
p_vals = np.zeros(n_sims)
for i in tqdm(range(n_sims)):
    sample1 = np.random.uniform(0.2, 0.7, size=size)
    sample2 = np.random.uniform(0.2, 0.7, size=size)

    sample, indicator = k_sample_transform(sample1, sample2)
    test = DCorr(which_test="unbiased")
    p, p_meta = test.p_value(
        sample, indicator, replication_factor=n_permutations, is_fast=False
    )
    p_vals[i] = p

plt.figure()
sns.distplot(p_vals)
plt.title("MGCPy DCorr, 2-sample under null, unbiased, not fast")
plt.xlabel("p-value")
plt.savefig("graspy-misc/profile_dcorr/mgcpy_dcorr.png", facecolor="w")

#%% mgcpy with is_fast=True
# p_vals = np.zeros(n_sims)
# for i in tqdm(range(n_sims)):
#     sample1 = np.random.uniform(0.2, 0.7, size=size)
Ejemplo n.º 12
0
def test_dcorr():
    # test the special case when one of the dataset has zero variance
    X = np.array([1, 1, 1])[:, np.newaxis]
    Y = np.array([1, 2, 3])[:, np.newaxis]
    unbiased = DCorr(which_test='unbiased')
    assert np.allclose(unbiased.test_statistic(X, Y)[0], 0)

    dir_name = './mgcpy/independence_tests/unit_tests/dcorr/data/'
    X = np.genfromtxt(dir_name + 'test_stat_X_mtx.csv', delimiter=',')
    Y = np.genfromtxt(dir_name + 'test_stat_Y_mtx.csv', delimiter=',')
    Y = Y[:, np.newaxis]
    unbiased = DCorr(which_test='unbiased')
    biased = DCorr(which_test='biased')
    mantel = DCorr(which_test='mantel')

    # test get_name
    assert unbiased.get_name() == 'unbiased'
    assert biased.get_name() == 'biased'
    assert mantel.get_name() == 'mantel'

    # test statistic
    assert np.allclose(unbiased.test_statistic(X, Y)[0], 0.1174, atol=1e-4)
    assert np.allclose(biased.test_statistic(X, Y)[0], 0.1548, atol=1e-4)
    assert np.allclose(mantel.test_statistic(X, Y)[0], 0.2421, atol=1e-4)

    # test statistic (fast versions)
    assert np.allclose(unbiased.test_statistic(X, Y, is_fast=True)[0],
                       0.1562,
                       atol=1e-4)
    assert np.allclose(biased.test_statistic(X, Y, is_fast=True)[0],
                       0.3974,
                       atol=1e-4)
    assert np.allclose(mantel.test_statistic(X, Y, is_fast=True)[0],
                       0.3392,
                       atol=1e-4)

    # additional test for mantel
    X = np.genfromtxt(dir_name + 'mantel_test_stat_X_mtx.csv', delimiter=',')
    Y = np.genfromtxt(dir_name + 'mantel_test_stat_Y_mtx.csv', delimiter=',')
    X = X[:, np.newaxis]
    Y = Y[:, np.newaxis]
    assert np.allclose(mantel.test_statistic(X, Y)[0], 0.7115, atol=1e-4)
    assert np.allclose(mantel.test_statistic(X, Y, is_fast=True)[0],
                       0.7552,
                       atol=1e-4)  # faster version
    '''
    test p value
    analytical p value for unbiased dcorr is compared with R package energy
    other p values are compared with the permutation tests in mgc-paper
    the value is the mean and atol is set to 4 times standard deviation
    '''
    X = np.genfromtxt(dir_name + 'pvalue_X_mtx.csv', delimiter=',')
    Y = np.genfromtxt(dir_name + 'pvalue_Y_mtx.csv', delimiter=',')
    Y = Y[:, np.newaxis]

    # p value
    assert np.allclose(unbiased.p_value(X, Y)[0], 0.0640, atol=0.03)
    assert np.allclose(biased.p_value(X, Y)[0], 0.0510, atol=0.03)
    assert np.allclose(mantel.p_value(X, Y)[0], 0.1020, atol=0.03)

    # p value (faster versions)
    assert np.allclose(unbiased.p_value(X, Y, is_fast=True)[0],
                       0.7429,
                       atol=0.03)
    assert np.allclose(biased.p_value(X, Y, is_fast=True)[0], 0, atol=0.03)
    assert np.allclose(mantel.p_value(X, Y, is_fast=True)[0], 0, atol=0.03)