def test_t_two_sample_switch(self): """t_two_sample should call t_one_observation if 1 item in sample.""" sample = array([4.02, 3.88, 3.34, 3.87, 3.18]) x = array([3.02]) self.assertFloatEqual(t_two_sample(x, sample), (-1.5637254, 0.1929248)) self.assertFloatEqual(t_two_sample(sample, x), (-1.5637254, 0.1929248)) #can't do the test if both samples have single item self.assertEqual(t_two_sample(x, x), (None, None))
def test_t_two_sample_switch(self): """t_two_sample should call t_one_observation if 1 item in sample.""" sample = array([4.02, 3.88, 3.34, 3.87, 3.18]) x = array([3.02]) self.assertFloatEqual(t_two_sample(x,sample),(-1.5637254,0.1929248)) self.assertFloatEqual(t_two_sample(sample, x),(-1.5637254,0.1929248)) #can't do the test if both samples have single item self.assertEqual(t_two_sample(x,x), (None, None))
def oneTrial(n,f=np.random.normal): N = 500 # num of individual tests SZ = n*2*N # need this many nums draw = f(loc=50,scale=3,size=SZ) counter = 0 for i in range(0,SZ,n*2): nums1 = draw[i:i+n] nums2 = draw[i+n:i+2*n] t, prob = stats.t_two_sample(nums1,nums2) if prob < 0.05: counter += 1 return 1.0*counter / N
def compare_alpha_diversities(rarefaction_lines, mapping_lines, category, depth): """compares alpha diversities inputs: rarefaction_file - rarefaction file which gives scores for various rarefactions and depths mapping_file - file that has ID's and categories that the ID's fall in category - the category to be compared, is a string depth - the depth of the rarefaction_file to use, is an integer outputs: results - a nested dictionary which specifies the category as the top level key, and as its value, dictionaries which give the results of the t_two_sample test for all unique pairs of values in the specified category """ rarefaction_data = parse_rarefaction(rarefaction_lines) mapping_data = parse_mapping_file_to_dict(mapping_lines)[0] value_pairs = make_value_pairs_from_category(mapping_data, category) category_values_Ids = make_category_values_Id_dict(mapping_data, category) SampleId_pairs = map_category_value_pairs_to_Ids(value_pairs, category_values_Ids) map_from_Id_to_col = make_SampleIds_rarefaction_columns_dict( rarefaction_data) reduced_rarefaction_mtx = extract_rarefaction_scores_at_depth(depth, rarefaction_data) results = {category:{}} for pair in range(len(SampleId_pairs)): i=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][0], reduced_rarefaction_mtx, map_from_Id_to_col)) j=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][1], reduced_rarefaction_mtx, map_from_Id_to_col)) results[category][(str(value_pairs[pair][0]), str(value_pairs[pair][1]))] =\ t_two_sample(i,j) return results
def t_test(nums1, nums2, col_num): if len(nums1) != len(nums2): print 'fail' return w1 = [] w2 = [] for i in range(0,len(nums1)): w1.append(nums1[i][col_num]) w2.append(nums2[i][col_num]) t, prob = stats.t_two_sample(w1,w2) print 'results:' print t, prob return t, prob
def assertSimilarMeans(self, observed, expected, pvalue=0.01, msg=None): """Fail if observed p is lower than pvalue""" if self._suite_pvalue: pvalue = self._suite_pvalue observed, expected = asarray(observed), asarray(expected) t, p = t_two_sample(observed, expected) if p > pvalue: return elif p is None or not isfinite(p): #handle case where all elements were the same if not observed.shape: observed = observed.reshape((1,)) if not expected.shape: expected = expected.reshape((1,)) if observed[0] == expected[0]: return else: raise self.failureException(msg or 'p-value %s, t-test p %s' % (repr(pvalue), repr(p)))
def assertSimilarMeans(self, observed, expected, pvalue=0.01, msg=None): """Fail if observed p is lower than pvalue""" if self._suite_pvalue: pvalue = self._suite_pvalue observed, expected = asarray(observed), asarray(expected) t, p = t_two_sample(observed, expected) if p > pvalue: return elif p is None or not isfinite(p): #handle case where all elements were the same if not observed.shape: observed = observed.reshape((1,)) if not expected.shape: expected = expected.reshape((1,)) if observed[0] == expected[0]: return else: raise self.failureException, \ (msg or 'p-value %s, t-test p %s' % (`pvalue`, `p`))
fout.write(pstr + "\n") varcount += 1 fout.write("pdf(\"plot.bl_distribution.pdf" + "\", width=8, height=4)\n") fout.write("plot(lengths0, proportions0, type='l',xlab=\"BLs, binned\", ylab=\"proportion\", col=\"" + colors[0].__str__() + "\", lwd='2', pch=" + pch[0].__str__() + ", main=\"BL Distribution\");\n") for i in range(1, varcount): fout.write("points(lengths" + i.__str__() + ", proportions" + i.__str__() + ", type='l', col=\"" + colors[i].__str__() + "\", lwd='2', pch=" + pch[i].__str__() + ")\n") fout.write("dev.off()\n") fout.close() os.system("r --no-save < " + scriptpath) """ # # barplot # dataseries = {} for binid in range(0, get_bin_count()): dataseries[binid] = {} for path in paths: if bins[path].__contains__( binid ): dataseries[binid][path] = bins[path][binid] else: dataseries[binid][path] = 0.0 barplot1(dataseries, "BL bins", "proportion", "bl_distribution" + id.__str__()) # print stats print "\n.\n. Stats about these ML branch length distributions:\n." for path in paths: stats_about_bls( path, filepath_bls[path]) [t, p] = stats.t_two_sample(filepath_bls[ paths[0] ], filepath_bls[ paths[1] ]) print "T = ", t, "P=", p
def test_t_two_sample_no_variance(self): """t_two_sample should return None if lists are invariant""" x = array([1, 1, 1]) y = array([0, 0, 0]) self.assertEqual(t_two_sample(x,x), (None, None)) self.assertEqual(t_two_sample(x,y), (None, None))
def test_t_two_sample(self): """t_two_sample should match example on p.225 of Sokal and Rohlf""" I = array([7.2, 7.1, 9.1, 7.2, 7.3, 7.2, 7.5]) II = array([8.8, 7.5, 7.7, 7.6, 7.4, 6.7, 7.2]) self.assertFloatEqual(t_two_sample(I, II), (-0.1184, 0.45385 * 2), 0.001)
def monte_carlo_group_distances_within_between(single_field, \ paired_field, dmat, dir_prefix = '', \ subdir_prefix='monte_carlo_group_distances',\ num_iters=10): """Calculate Monte Carlo stats within and between fields. Specifically: - find the groups for each specified col (or combination of cols) - do t test between each pair of groups - randomize matrix n times and find empirical value of t for each pair - compare the actual value of t to the randomized values WARNING: Only symmetric, hollow distance matrices may be used as input. Asymmetric distance matrices, such as those obtained by the UniFrac Gain metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input. """ path_prefix = path.join(dir_prefix, subdir_prefix) #if dir doesn't exist if not path.isdir(path_prefix): # make directory mkdir(path_prefix) real_dists = [] within_category_distances = \ within_category_distances_grouped(single_field,label_suffix='') real_dists.extend([['Within',field,distances] for field,\ distances in within_category_distances.items()]) between_category_distances = \ between_category_distances_grouped(single_field,label_suffix='') real_dists.extend([['Between',field,distances] for field,\ distances in between_category_distances.items()]) within_and_between = \ within_and_between_fields(paired_field) real_dists.extend([[field.split('_',1)[0],\ field.split('_',1)[1],distances] for \ field, distances in within_and_between.items()]) outfile = open( path.join(path_prefix, 'group_distances_within_and_between.xls'), 'w') outfile.write('\t'.join(['Comparison','Category_1','Avg',\ 'Comparison','Category_2','Avg','t','p',\ 'p_greater','p_less','Iterations\n'])) rand_distances = get_random_dists(real_dists, dmat, num_iters) #iterate over the groups for i, (first_g1, second_g1, distances_g1) in \ enumerate(real_dists[:-1]): real_dist_1 = average(distances_g1) rand_dists_1 = [rand_distances[n][i][-1] for n in range(num_iters)] #then for each other pair (not including same group) for j in range(i + 1, len(real_dists)): first_g2, second_g2, distances_g2 = real_dists[j] real_dist_2 = average(distances_g2) rand_dists_2 = [rand_distances[n][j][-1] \ for n in range(num_iters)] ttests = [t_two_sample(rand_dists_1[n],rand_dists_2[n])[0] \ for n in range(num_iters)] real_ttest = t_two_sample(distances_g1, distances_g2) curr_line = [first_g1, second_g1, real_dist_1, \ first_g2, second_g2, real_dist_2] curr_line.extend([real_ttest[0], real_ttest[1],\ (array(ttests)>real_ttest[0]).sum()/float(num_iters), \ (array(ttests)<real_ttest[0]).sum()/float(num_iters), \ num_iters]) outfile.write('\t'.join(map(str, curr_line))) outfile.write('\n')
def monte_carlo_group_distances(mapping_file, dmatrix_file, prefs, \ dir_prefix = '', subdir_prefix='monte_carlo_group_distances',\ default_iters=10, fields=None): """Calculate Monte Carlo stats for specified group distances. Specifically: - find the groups for each specified col (or combination of cols) - do t test between each pair of groups - randomize matrix n times and find empirical value of t for each pair - compare the actual value of t to the randomized values WARNING: Only symmetric, hollow distance matrices may be used as input. Asymmetric distance matrices, such as those obtained by the UniFrac Gain metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input. """ mapping, header, comments = parse_mapping_file(open(mapping_file, 'U')) header = [header] header.extend(mapping) mapping = header distance_header, distance_matrix = \ parse_distmat(open(dmatrix_file,'U')) orig_distance_matrix = distance_matrix.copy() path_prefix = path.join(dir_prefix, subdir_prefix) #if dir doesn't exist if not path.isdir(path_prefix): # make directory mkdir(path_prefix) if fields is None: fields = [mapping[0][0]] if prefs is None: prefs = {} if 'MONTE_CARLO_GROUP_DISTANCES' not in prefs: prefs = build_monte_carlo_prefs(fields, default_iters) for field, num_iters in prefs['MONTE_CARLO_GROUP_DISTANCES'].items(): if '&&' in field: groups = group_by_fields(mapping, field.split('&&')) else: groups = group_by_field(mapping, field) outfile = open( path.join(path_prefix, 'group_distances_' + field + '.xls'), 'w') outfile.write('\t'.join(['Category_1a','Category_1b','Avg',\ 'Category_2a','Category_2b','Avg','t','p',\ 'p_greater','p_less','Iterations\n'])) real_dists = distances_by_groups(distance_header, distance_matrix,\ groups) #iterate over the groups for i, (first_g1, second_g1, distances_g1) in \ enumerate(real_dists[:-1]): real_dist_1 = average(distances_g1) #then for each other pair (not including same group) for j in range(i + 1, len(real_dists)): first_g2, second_g2, distances_g2 = real_dists[j] real_dist_2 = average(distances_g2) # permute distances just within these groups! rand_dists_1, rand_dists_2 = \ permute_between_groups(distances_g1, distances_g2, num_iters) ttests = [t_two_sample(rand_dists_1[n].flatten(),rand_dists_2[n].flatten())[0] \ for n in range(num_iters)] real_ttest = t_two_sample(distances_g1.flatten(), distances_g2.flatten()) curr_line = [first_g1, second_g1, real_dist_1, \ first_g2, second_g2, real_dist_2] curr_line.extend([real_ttest[0], real_ttest[1],\ (array(ttests)>real_ttest[0]).sum()/float(num_iters), \ (array(ttests)<real_ttest[0]).sum()/float(num_iters), \ num_iters]) outfile.write('\t'.join(map(str, curr_line))) outfile.write('\n')
def t_test(nums1, nums2): t, prob = stats.t_two_sample(nums1,nums2) print t print prob
def monte_carlo_group_distances(mapping_file, dmatrix_file, prefs, \ dir_prefix = '', subdir_prefix='monte_carlo_group_distances',\ default_iters=10, fields=None): """Calculate Monte Carlo stats for specified group distances. Specifically: - find the groups for each specified col (or combination of cols) - do t test between each pair of groups - randomize matrix n times and find empirical value of t for each pair - compare the actual value of t to the randomized values WARNING: Only symmetric, hollow distance matrices may be used as input. Asymmetric distance matrices, such as those obtained by the UniFrac Gain metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input. """ mapping, header, comments = parse_mapping_file(open(mapping_file,'U')) header = [header] header.extend(mapping) mapping=header distance_header, distance_matrix = \ parse_distmat(open(dmatrix_file,'U')) orig_distance_matrix = distance_matrix.copy() path_prefix = path.join(dir_prefix,subdir_prefix) #if dir doesn't exist if not path.isdir(path_prefix): # make directory mkdir(path_prefix) if fields is None: fields = [mapping[0][0]] if prefs is None: prefs = {} if 'MONTE_CARLO_GROUP_DISTANCES' not in prefs: prefs = build_monte_carlo_prefs(fields,default_iters) for field, num_iters in prefs['MONTE_CARLO_GROUP_DISTANCES'].items(): if '&&' in field: groups = group_by_fields(mapping, field.split('&&')) else: groups = group_by_field(mapping, field) outfile = open(path.join(path_prefix, 'group_distances_'+field+'.txt'), 'w') outfile.write('\t'.join(['Category_1a','Category_1b','Avg',\ 'Category_2a','Category_2b','Avg','t','p',\ 'p_greater','p_less','Iterations\n'])) real_dists = distances_by_groups(distance_header, distance_matrix,\ groups) #iterate over the groups for i, (first_g1, second_g1, distances_g1) in \ enumerate(real_dists[:-1]): real_dist_1 = average(distances_g1) #then for each other pair (not including same group) for j in range(i+1,len(real_dists)): first_g2, second_g2, distances_g2 = real_dists[j] real_dist_2 = average(distances_g2) # permute distances just within these groups! rand_dists_1, rand_dists_2 = \ permute_between_groups(distances_g1, distances_g2, num_iters) ttests = [t_two_sample(rand_dists_1[n].flatten(),rand_dists_2[n].flatten())[0] \ for n in range(num_iters)] real_ttest = t_two_sample(distances_g1.flatten(), distances_g2.flatten()) curr_line = [first_g1, second_g1, real_dist_1, \ first_g2, second_g2, real_dist_2] curr_line.extend([real_ttest[0], real_ttest[1],\ (array(ttests)>real_ttest[0]).sum()/float(num_iters), \ (array(ttests)<real_ttest[0]).sum()/float(num_iters), \ num_iters]) outfile.write('\t'.join(map(str, curr_line))) outfile.write('\n')
def compare_alpha_diversities(rarefaction_lines, mapping_lines, category, depth, test_type='nonparametric', num_permutations=999): """Compares alpha diversity values for differences per category treatment. Notes: Returns a defaultdict which as keys has the pairs of treatments being compared, and as values, lists of (pval,tval) tuples for each comparison at for a given iteration. Inputs: rarefaction_lines - list of lines, result of multiple rarefactions. mapping_lines - list of lines, mapping file lines. category - str, the category to be compared, eg 'Treatment' or 'Age'. depth - int, depth of the rarefaction file to use. test_type - str, the type of t-test to perform. Must be either 'parametric' or 'nonparametric'. num_permutations - int, the number of Monte Carlo permutations to use if test_type is 'nonparametric'. """ if test_type == 'nonparametric' and num_permutations < 1: raise ValueError("Invalid number of permutations: %d. Must be greater " "than zero." % num_permutations) rarefaction_data = parse_rarefaction(rarefaction_lines) mapping_data = parse_mapping_file_to_dict(mapping_lines)[0] # samid_pairs, treatment_pairs are in the same order samid_pairs, treatment_pairs = sampleId_pairs(mapping_data, rarefaction_data, category) # extract only rows of the rarefaction data that are at the given depth rare_mat = array([row for row in rarefaction_data[3] if row[0]==depth]) # Average each col of the rarefaction mtx. Computing t test on averages over # all iterations. Avoids more comps which kills signifigance. rare_mat = (rare_mat.sum(0)/rare_mat.shape[0])[2:] #remove depth,iter cols sids = rarefaction_data[0][3:] # 0-2 are header strings results = {} for sid_pair, treatment_pair in zip(samid_pairs, treatment_pairs): # if there is only 1 sample for each treatment in a comparison, and mc # using mc method, will error (e.g. mc_t_two_sample([1],[1]). if len(sid_pair[0])==1 and len(sid_pair[1])==1: t_key = '%s,%s' % (treatment_pair[0], treatment_pair[1]) results[t_key]= (None,None) else: pair0_indices = [sids.index(i) for i in sid_pair[0]] pair1_indices = [sids.index(i) for i in sid_pair[1]] t_key = '%s,%s' % (treatment_pair[0], treatment_pair[1]) i = rare_mat.take(pair0_indices) j = rare_mat.take(pair1_indices) # found discussion of how to quickly check an array for nan here: # http://stackoverflow.com/questions/6736590/fast-check-for-nan-in-numpy if isnan(np_min(i)) or isnan(np_min(j)): results[t_key]= (None,None) continue if test_type == 'parametric': obs_t, p_val = t_two_sample(i,j) elif test_type == 'nonparametric': obs_t, _, _, p_val = mc_t_two_sample(i,j, permutations=num_permutations) if p_val != None: p_val = float(format_p_value_for_num_iters(p_val, num_iters=num_permutations)) elif p_val == None: #None will error in format_p_val obs_t, p_val = None, None else: raise ValueError("Invalid test type '%s'." % test_type) results[t_key]= (obs_t,p_val) return results
def monte_carlo_group_distances(mapping_file, dmatrix_file, prefs, \ dir_prefix = '', subdir_prefix='monte_carlo_group_distances',\ default_iters=10, fields=None): """Calculate Monte Carlo stats for specified group distances. Specifically: - find the groups for each specified col (or combination of cols) - do t test between each pair of groups - randomize matrix n times and find empirical value of t for each pair - compare the actual value of t to the randomized values """ mapping, header, comments = parse_mapping_file(open(mapping_file, 'U')) header = [header] header.extend(mapping) mapping = header distance_header, distance_matrix = \ parse_distmat(open(dmatrix_file,'U')) orig_distance_matrix = distance_matrix.copy() path_prefix = _make_path([dir_prefix, subdir_prefix]) #if dir doesn't exist if not path.isdir(path_prefix): # make directory mkdir(path_prefix) if fields is None: fields = [mapping[0][0]] if prefs is None: prefs = {} if 'MONTE_CARLO_GROUP_DISTANCES' not in prefs: prefs = build_monte_carlo_prefs(fields, default_iters) for field, num_iters in prefs['MONTE_CARLO_GROUP_DISTANCES'].items(): if '&&' in field: groups = group_by_fields(mapping, field.split('&&')) else: groups = group_by_field(mapping, field) outfile = open(path_prefix + 'group_distances_' + field + '.xls', 'w') outfile.write('\t'.join(['Category_1a','Category_1b','Avg',\ 'Category_2a','Category_2b','Avg','t','p',\ 'p_greater','p_less','Iterations\n'])) real_dists = distances_by_groups(distance_header, distance_matrix,\ groups) rand_distances = [distances_by_groups(distance_header, \ permute_for_monte_carlo(distance_matrix), groups) \ for i in range(num_iters)] #iterate over the groups for i, (first_g1, second_g1, distances_g1) in \ enumerate(real_dists[:-1]): real_dist_1 = average(distances_g1) rand_dists_1 = [rand_distances[n][i][-1] for n in range(num_iters)] #then for each other pair (not including same group) for j in range(i + 1, len(real_dists)): first_g2, second_g2, distances_g2 = real_dists[j] real_dist_2 = average(distances_g2) rand_dists_2 = [rand_distances[n][j][-1] \ for n in range(num_iters)] ttests = [t_two_sample(rand_dists_1[n],rand_dists_2[n])[0] \ for n in range(num_iters)] real_ttest = t_two_sample(distances_g1, distances_g2) curr_line = [first_g1, second_g1, real_dist_1, \ first_g2, second_g2, real_dist_2] curr_line.extend([real_ttest[0], real_ttest[1],\ (array(ttests)>real_ttest[0]).sum()/float(num_iters), \ (array(ttests)<real_ttest[0]).sum()/float(num_iters), \ num_iters]) outfile.write('\t'.join(map(str, curr_line))) outfile.write('\n')
def test_t_two_sample_no_variance(self): """t_two_sample should return None if lists are invariant""" x = array([1, 1, 1]) y = array([0, 0, 0]) self.assertEqual(t_two_sample(x, x), (None, None)) self.assertEqual(t_two_sample(x, y), (None, None))
def monte_carlo_group_distances_within_between(single_field, \ paired_field, dmat, dir_prefix = '', \ subdir_prefix='monte_carlo_group_distances',\ num_iters=10): """Calculate Monte Carlo stats within and between fields. Specifically: - find the groups for each specified col (or combination of cols) - do t test between each pair of groups - randomize matrix n times and find empirical value of t for each pair - compare the actual value of t to the randomized values WARNING: Only symmetric, hollow distance matrices may be used as input. Asymmetric distance matrices, such as those obtained by the UniFrac Gain metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input. """ path_prefix = path.join(dir_prefix,subdir_prefix) #if dir doesn't exist if not path.isdir(path_prefix): # make directory mkdir(path_prefix) real_dists = [] within_category_distances = \ within_category_distances_grouped(single_field,label_suffix='') real_dists.extend([['Within',field,distances] for field,\ distances in within_category_distances.items()]) between_category_distances = \ between_category_distances_grouped(single_field,label_suffix='') real_dists.extend([['Between',field,distances] for field,\ distances in between_category_distances.items()]) within_and_between = \ within_and_between_fields(paired_field) real_dists.extend([[field.split('_',1)[0],\ field.split('_',1)[1],distances] for \ field, distances in within_and_between.items()]) outfile = open(path.join(path_prefix, 'group_distances_within_and_between.txt'), 'w') outfile.write('\t'.join(['Comparison','Category_1','Avg',\ 'Comparison','Category_2','Avg','t','p',\ 'p_greater','p_less','Iterations\n'])) rand_distances = get_random_dists(real_dists, dmat, num_iters) #iterate over the groups for i, (first_g1, second_g1, distances_g1) in \ enumerate(real_dists[:-1]): real_dist_1 = average(distances_g1) rand_dists_1 = [rand_distances[n][i][-1] for n in range(num_iters)] #then for each other pair (not including same group) for j in range(i+1,len(real_dists)): first_g2, second_g2, distances_g2 = real_dists[j] real_dist_2 = average(distances_g2) rand_dists_2 = [rand_distances[n][j][-1] \ for n in range(num_iters)] ttests = [t_two_sample(rand_dists_1[n],rand_dists_2[n])[0] \ for n in range(num_iters)] real_ttest = t_two_sample(distances_g1, distances_g2) curr_line = [first_g1, second_g1, real_dist_1, \ first_g2, second_g2, real_dist_2] curr_line.extend([real_ttest[0], real_ttest[1],\ (array(ttests)>real_ttest[0]).sum()/float(num_iters), \ (array(ttests)<real_ttest[0]).sum()/float(num_iters), \ num_iters]) outfile.write('\t'.join(map(str, curr_line))) outfile.write('\n')
def compare_alpha_diversities(rarefaction_lines, mapping_lines, category, depth, test_type='nonparametric', num_permutations=999): """compares alpha diversities inputs: rarefaction_file - rarefaction file which gives scores for various rarefactions and depths mapping_file - file that has ID's and categories that the ID's fall in category - the category to be compared, is a string depth - the depth of the rarefaction_file to use, is an integer test_type - the type of t-test to perform, is a string. Must be either 'parametric' or 'nonparametric' num_permutations - the number of Monte Carlo permutations to use if test_type is 'nonparametric', is an integer outputs: results - a nested dictionary which specifies the category as the top level key, and as its value, dictionaries which give the results of the t_two_sample test for all unique pairs of values in the specified category """ if test_type == 'nonparametric' and num_permutations < 1: raise ValueError("Invalid number of permutations: %d. Must be greater " "than zero." % num_permutations) rarefaction_data = parse_rarefaction(rarefaction_lines) mapping_data = parse_mapping_file_to_dict(mapping_lines)[0] value_pairs = make_value_pairs_from_category(mapping_data, category) category_values_Ids = make_category_values_Id_dict(mapping_data, category) SampleId_pairs = map_category_value_pairs_to_Ids(value_pairs, category_values_Ids) map_from_Id_to_col = make_SampleIds_rarefaction_columns_dict( rarefaction_data) reduced_rarefaction_mtx = extract_rarefaction_scores_at_depth(depth, rarefaction_data) results = {category:{}} for pair in range(len(SampleId_pairs)): # Must flatten the matrix because t_two_sample only operates on # non-nested sequences (otherwise we'll get the wrong degrees of # freedom). i=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][0], reduced_rarefaction_mtx, map_from_Id_to_col)).flatten() j=(convert_SampleIds_to_rarefaction_mtx(SampleId_pairs[pair][1], reduced_rarefaction_mtx, map_from_Id_to_col)).flatten() if test_type == 'parametric': obs_t, p_val = t_two_sample(i,j) elif test_type == 'nonparametric': obs_t, _, _, p_val = mc_t_two_sample(i,j, permutations=num_permutations) p_val = format_p_value_for_num_iters(p_val, num_permutations) else: raise ValueError("Invalid test type '%s'." % test_type) results[category][(str(value_pairs[pair][0]), str(value_pairs[pair][1]))] = obs_t, p_val return results