def fill_params_dict_list_sample_sizes(base_path, do_fast_mgc=False): mcorr = DCorr(which_test='unbiased') dcorr = DCorr(which_test='biased') mantel = DCorr(which_test='mantel') mgc = MGC() hhg = HHG() pearson = RVCorr(which_test='pearson') mdmr = MDMR() independence_tests = [mcorr, dcorr, mantel, mgc, hhg, pearson, mdmr] params_dict_list = [] for sim_name, sim_func in simulations.items(): for test in independence_tests: params_dict = { 'independence_test': test, 'simulation_type': sim_func[1], 'base_path': base_path, 'additional_params': {} } params_dict_list.append(params_dict) if do_fast_mgc: fast_mgc = MGC() additional_params = {"is_fast": True} params_dict = { 'independence_test': fast_mgc, 'simulation_type': sim_func[1], 'base_path': base_path, 'additional_params': additional_params } params_dict_list.append(params_dict) return params_dict_list
def fill_params_dict_list_dimensions(do_fast_mgc=False): mcorr = DCorr(which_test='unbiased') dcorr = DCorr(which_test='biased') mantel = DCorr(which_test='mantel') mgc = MGC() rv_corr = RVCorr(which_test='rv') hhg = HHG() cca = RVCorr(which_test='cca') mdmr = MDMR() independence_tests = [] # [mgc, mcorr, dcorr, mantel, rv_corr, cca] params_dict_list = [] for sim_name, sim_func in simulations.items(): for test in independence_tests: params_dict = { 'independence_test': test, 'simulation_type': sim_func[1], 'dim': find_dim(sim_name), 'additional_params': {} } params_dict_list.append(params_dict) if do_fast_mgc: fast_mgc = MGC() additional_params = {"is_fast": True} params_dict = { 'independence_test': fast_mgc, 'simulation_type': sim_func[1], 'dim': find_dim(sim_name), 'additional_params': additional_params } params_dict_list.append(params_dict) return params_dict_list
def paired_two_sample_test_dcorr(x, y, which_test="biased", compute_distance_matrix=None, is_fast=False): ''' Compute paired two sample test's DCorr test_statistic :param X: is interpreted as either: - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for n samples OR - a ``[n*p]`` data matrix, a matrix with n samples in p dimensions :type X: 2D numpy.array :param Y: is interpreted as either: - a ``[n*n]`` distance matrix, a square matrix with zeros on diagonal for n samples OR - a ``[n*p]`` data matrix, a matrix with n samples in p dimensions :type Y: 2D numpy.array :return: paired two sample DCorr test_statistic :rtype: float ''' assert x.shape == y.shape, "Matrices X and Y need to be of same dimensions [n, p]" dcorr = DCorr(is_paired=True, which_test=which_test, compute_distance_matrix=compute_distance_matrix) return dcorr.p_value(x, y, is_fast=is_fast)
def fill_params_dict_list_epsilons(base_path, do_fast_mgc=False): mcorr = DCorr(which_test='unbiased') mgc = MGC() manova = Manova() independence_tests = [manova, mcorr, mgc] three_sample_simulation_types = [1, 2, 3] params_dict_list = [] for sim_type in three_sample_simulation_types: for test in independence_tests: params_dict = { 'independence_test': test, 'simulation_type': sim_type, 'base_path': base_path, 'additional_params': {} } params_dict_list.append(params_dict) if do_fast_mgc: fast_mgc = MGC() additional_params = {"is_fast": True} params_dict = { 'independence_test': fast_mgc, 'simulation_type': sim_type, 'base_path': base_path, 'additional_params': additional_params } params_dict_list.append(params_dict) return params_dict_list
def fill_params_dict_list_sample_sizes(): mcorr = DCorr(which_test='unbiased') dcorr = DCorr(which_test='biased') mantel = DCorr(which_test='mantel') mgc = MGC() hhg = HHG() pearson = RVCorr(which_test='pearson') independence_tests = [] # [mgc, mcorr, dcorr, mantel, pearson] params_dict_list = [] for sim_name, sim_func in simulations.items(): for test in independence_tests: params_dict = { 'independence_test': test, 'simulation_type': sim_func[1] } params_dict_list.append(params_dict) return params_dict_list
def fill_params_dict_list_dimensions(): mcorr = DCorr(which_test='unbiased') dcorr = DCorr(which_test='biased') mantel = DCorr(which_test='mantel') mgc = MGC() rv_corr = RVCorr(which_test='rv') hhg = HHG() cca = RVCorr(which_test='cca') mdmr = MDMR() # initialize additional test # add the test that you wish to run in the `independence_tests` list independence_tests = [] # [mgc, mcorr, dcorr, mantel, rv_corr, cca] params_dict_list = [] for sim_name, sim_func in simulations.items(): for test in independence_tests: params_dict = { 'independence_test': test, 'simulation_type': sim_func[1], 'dim': find_dim(sim_name) } params_dict_list.append(params_dict) return params_dict_list
def __init__(self, compute_distance_matrix=None, which_test='unbiased', max_lag=0): ''' :param compute_distance_matrix: a function to compute the pairwise distance matrix, given a data matrix :type compute_distance_matrix: FunctionType or callable() :param which_test: the type of distance covariance estimate to use, can be 'unbiased', 'biased' 'mantel' :type which_test: string :param max_lag: Maximum lead/lag to check for dependence between X_t and Y_t+j (M parameter) :type max_lag: int ''' IndependenceTest.__init__(self) if which_test not in ['unbiased', 'biased']: raise ValueError('which_test must be unbiased or biased.') self.which_test = which_test self.dcorr = DCorr(which_test=self.which_test) self.max_lag = max_lag
def test_power(): test = DCorr(which_test='unbiased') simulation_type = 4 sim = joint_sim sample_sizes = [i for i in range(5, 101, 5)] matlab_file_name = './mgcpy/benchmarks/matlab_power_results/sample_size/CorrIndTestType{}N100Dim1.mat'.format( simulation_type) with h5py.File(matlab_file_name, 'r') as f: matlab_results = {} for k, v in f.items(): matlab_results[k] = np.transpose(np.array(v)) matlab_power = matlab_results['powerM'][0, :] estimated_power = np.zeros(len(sample_sizes)) for i in range(len(sample_sizes)): estimated_power[i] = power(test, sim, num_samples=sample_sizes[i], num_dimensions=1) assert np.allclose(estimated_power, matlab_power, atol=0.2)
def test_dcorr_p_value(): ''' test p value analytical p value for unbiased dcorr is compared with R package energy other p values are compared with the permutation tests in mgc-paper the value is the mean and atol is set to 4 times standard deviation ''' dir_name = './mgcpy/independence_tests/unit_tests/dcorr/data/' unbiased = DCorr(which_test='unbiased') biased = DCorr(which_test='biased') mantel = DCorr(which_test='mantel') X = np.genfromtxt(dir_name + 'pvalue_X_mtx.csv', delimiter=',') Y = np.genfromtxt(dir_name + 'pvalue_Y_mtx.csv', delimiter=',') Y = Y[:, np.newaxis] # p value assert np.allclose(unbiased.p_value(X, Y)[0], 0.0640, atol=0.03) assert np.allclose(biased.p_value(X, Y)[0], 0.0510, atol=0.03) assert np.allclose(mantel.p_value(X, Y)[0], 0.1020, atol=0.03) # p value (faster versions) assert np.allclose(unbiased.p_value(X, Y, is_fast=True)[0], 0.7429, atol=0.03) assert np.allclose(biased.p_value(X, Y, is_fast=True)[0], 1 / 1000, atol=0.03) assert np.allclose(mantel.p_value(X, Y, is_fast=True)[0], 1 / 1000, atol=0.03)
def test_dcorr_stat(): # test the special case when one of the dataset has zero variance X = np.array([1, 1, 1])[:, np.newaxis] Y = np.array([1, 2, 3])[:, np.newaxis] unbiased = DCorr(which_test='unbiased') assert np.allclose(unbiased.test_statistic(X, Y)[0], 0) dir_name = './mgcpy/independence_tests/unit_tests/dcorr/data/' X = np.genfromtxt(dir_name + 'test_stat_X_mtx.csv', delimiter=',') Y = np.genfromtxt(dir_name + 'test_stat_Y_mtx.csv', delimiter=',') Y = Y[:, np.newaxis] unbiased = DCorr(which_test='unbiased') biased = DCorr(which_test='biased') mantel = DCorr(which_test='mantel') # test get_name assert unbiased.get_name() == 'unbiased' assert biased.get_name() == 'biased' assert mantel.get_name() == 'mantel' # test statistic assert np.allclose(unbiased.test_statistic(X, Y)[0], 0.1174, atol=1e-4) assert np.allclose(biased.test_statistic(X, Y)[0], 0.1548, atol=1e-4) assert np.allclose(mantel.test_statistic(X, Y)[0], 0.2421, atol=1e-4) # test statistic (fast versions) assert np.allclose(unbiased.test_statistic(X, Y, is_fast=True)[0], 0.1562, atol=1e-4) assert np.allclose(biased.test_statistic(X, Y, is_fast=True)[0], 0.3974, atol=1e-4) assert np.allclose(mantel.test_statistic(X, Y, is_fast=True)[0], 0.3392, atol=1e-4) # additional test for mantel X = np.genfromtxt(dir_name + 'mantel_test_stat_X_mtx.csv', delimiter=',') Y = np.genfromtxt(dir_name + 'mantel_test_stat_Y_mtx.csv', delimiter=',') X = X[:, np.newaxis] Y = Y[:, np.newaxis] assert np.allclose(mantel.test_statistic(X, Y)[0], 0.7115, atol=1e-4) assert np.allclose(mantel.test_statistic(X, Y, is_fast=True)[0], 0.4575, atol=1e-4) # faster version
sns.set_palette("deep") n_sims = 100 n_samples = 100 n_components = 2 n_permutations = 1000 size = (n_samples, n_components) #%% mgcpy package p_vals = np.zeros(n_sims) for i in tqdm(range(n_sims)): sample1 = np.random.uniform(0.2, 0.7, size=size) sample2 = np.random.uniform(0.2, 0.7, size=size) sample, indicator = k_sample_transform(sample1, sample2) test = DCorr(which_test="unbiased") p, p_meta = test.p_value( sample, indicator, replication_factor=n_permutations, is_fast=False ) p_vals[i] = p plt.figure() sns.distplot(p_vals) plt.title("MGCPy DCorr, 2-sample under null, unbiased, not fast") plt.xlabel("p-value") plt.savefig("graspy-misc/profile_dcorr/mgcpy_dcorr.png", facecolor="w") #%% mgcpy with is_fast=True # p_vals = np.zeros(n_sims) # for i in tqdm(range(n_sims)): # sample1 = np.random.uniform(0.2, 0.7, size=size)
def test_dcorr(): # test the special case when one of the dataset has zero variance X = np.array([1, 1, 1])[:, np.newaxis] Y = np.array([1, 2, 3])[:, np.newaxis] unbiased = DCorr(which_test='unbiased') assert np.allclose(unbiased.test_statistic(X, Y)[0], 0) dir_name = './mgcpy/independence_tests/unit_tests/dcorr/data/' X = np.genfromtxt(dir_name + 'test_stat_X_mtx.csv', delimiter=',') Y = np.genfromtxt(dir_name + 'test_stat_Y_mtx.csv', delimiter=',') Y = Y[:, np.newaxis] unbiased = DCorr(which_test='unbiased') biased = DCorr(which_test='biased') mantel = DCorr(which_test='mantel') # test get_name assert unbiased.get_name() == 'unbiased' assert biased.get_name() == 'biased' assert mantel.get_name() == 'mantel' # test statistic assert np.allclose(unbiased.test_statistic(X, Y)[0], 0.1174, atol=1e-4) assert np.allclose(biased.test_statistic(X, Y)[0], 0.1548, atol=1e-4) assert np.allclose(mantel.test_statistic(X, Y)[0], 0.2421, atol=1e-4) # test statistic (fast versions) assert np.allclose(unbiased.test_statistic(X, Y, is_fast=True)[0], 0.1562, atol=1e-4) assert np.allclose(biased.test_statistic(X, Y, is_fast=True)[0], 0.3974, atol=1e-4) assert np.allclose(mantel.test_statistic(X, Y, is_fast=True)[0], 0.3392, atol=1e-4) # additional test for mantel X = np.genfromtxt(dir_name + 'mantel_test_stat_X_mtx.csv', delimiter=',') Y = np.genfromtxt(dir_name + 'mantel_test_stat_Y_mtx.csv', delimiter=',') X = X[:, np.newaxis] Y = Y[:, np.newaxis] assert np.allclose(mantel.test_statistic(X, Y)[0], 0.7115, atol=1e-4) assert np.allclose(mantel.test_statistic(X, Y, is_fast=True)[0], 0.7552, atol=1e-4) # faster version ''' test p value analytical p value for unbiased dcorr is compared with R package energy other p values are compared with the permutation tests in mgc-paper the value is the mean and atol is set to 4 times standard deviation ''' X = np.genfromtxt(dir_name + 'pvalue_X_mtx.csv', delimiter=',') Y = np.genfromtxt(dir_name + 'pvalue_Y_mtx.csv', delimiter=',') Y = Y[:, np.newaxis] # p value assert np.allclose(unbiased.p_value(X, Y)[0], 0.0640, atol=0.03) assert np.allclose(biased.p_value(X, Y)[0], 0.0510, atol=0.03) assert np.allclose(mantel.p_value(X, Y)[0], 0.1020, atol=0.03) # p value (faster versions) assert np.allclose(unbiased.p_value(X, Y, is_fast=True)[0], 0.7429, atol=0.03) assert np.allclose(biased.p_value(X, Y, is_fast=True)[0], 0, atol=0.03) assert np.allclose(mantel.p_value(X, Y, is_fast=True)[0], 0, atol=0.03)