def single_object_beta(self, otu_table, metric, tree_string, missing_sams=None): """ running single_file_beta should give same result using --rows""" if missing_sams is None: missing_sams = [] metrics = list_known_nonphylogenetic_metrics() metrics.extend(list_known_phylogenetic_metrics()) # new metrics that don't trivially parallelize must be dealt with # carefully warnings.filterwarnings( 'ignore', 'dissimilarity binary_dist_chisq is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_chisq is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_gower is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_hellinger is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'unifrac had no information for\ sample M*') # self.files_to_remove.extend([input_path,tree_path]) # self.folders_to_remove.append(output_dir) # os.mkdir(output_dir+'/ft/') for metric in metrics: # do it beta_out = single_object_beta(otu_table, metric, tree_string, rowids=None, full_tree=False) sams, dmtx = parse_distmat(beta_out) # do it by rows for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] # row_outname = output_dir + '/' + metric + '_' +\ # in_fname r_out = single_object_beta(otu_table, metric, tree_string, rowids=rows, full_tree=False) col_sams, row_sams, row_dmtx = parse_matrix(r_out) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] npt.assert_almost_equal(row_v1, full_v1) # full tree run: if 'full_tree' in str(metric).lower(): continue # do it by rows with full tree for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] #~ row_outname = output_dir + '/ft/' + metric + '_' +\ #~ in_fname r_out = single_object_beta(otu_table, metric, tree_string, rowids=None, full_tree=True) col_sams, row_sams, row_dmtx = parse_matrix(r_out) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] npt.assert_almost_equal(row_v1, full_v1) # do it with full tree r_out = single_object_beta(otu_table, metric, tree_string, rowids=None, full_tree=True) sams_ft, dmtx_ft = parse_distmat(r_out) self.assertEqual(sams_ft, sams) npt.assert_almost_equal(dmtx_ft, dmtx)
def single_object_beta(self, otu_table, metric, tree_string, missing_sams=None): """ running single_file_beta should give same result using --rows""" if missing_sams==None: missing_sams = [] # setup #input_path = get_tmp_filename() #in_fname = os.path.split(input_path)[1] #f = open(input_path,'w') #f.write(otu_table_string) #f.close() #tree_path = get_tmp_filename() #f = open(tree_path,'w') #f.write(tree_string) #f.close() metrics = list_known_nonphylogenetic_metrics() metrics.extend(list_known_phylogenetic_metrics()) #output_dir = get_tmp_filename(suffix = '') #os.mkdir(output_dir) # new metrics that don't trivially parallelize must be dealt with # carefully warnings.filterwarnings('ignore','dissimilarity binary_dist_chisq is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore','dissimilarity dist_chisq is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore','dissimilarity dist_gower is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore','dissimilarity dist_hellinger is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore','unifrac had no information for\ sample M*') #self.files_to_remove.extend([input_path,tree_path]) #self.folders_to_remove.append(output_dir) #os.mkdir(output_dir+'/ft/') for metric in metrics: # do it beta_out = single_object_beta(otu_table, metric, tree_string,rowids=None, full_tree=False) sams, dmtx = parse_distmat(beta_out) # do it by rows for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] #row_outname = output_dir + '/' + metric + '_' +\ #in_fname r_out = single_object_beta(otu_table, metric, tree_string,rowids=rows, full_tree=False) col_sams, row_sams, row_dmtx = parse_matrix(r_out) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j,k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] self.assertFloatEqual(row_v1, full_v1) ### full tree run: if 'full_tree' in str(metric).lower(): continue # do it by rows with full tree for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] #~ row_outname = output_dir + '/ft/' + metric + '_' +\ #~ in_fname r_out = single_object_beta(otu_table, metric, tree_string,rowids=None, full_tree=True) col_sams, row_sams, row_dmtx = parse_matrix(r_out) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j,k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] self.assertFloatEqual(row_v1, full_v1) # # do it with full tree r_out = single_object_beta(otu_table, metric, tree_string,rowids=None, full_tree=True) sams_ft, dmtx_ft = parse_distmat(r_out) self.assertEqual(sams_ft, sams) self.assertFloatEqual(dmtx_ft, dmtx)
def single_file_beta(self, otu_table_string, tree_string, missing_sams=None, use_metric_list=False): """ running single_file_beta should give same result using --rows""" if missing_sams is None: missing_sams = [] # setup fd, input_path = mkstemp(suffix='.txt') os.close(fd) in_fname = os.path.split(input_path)[1] f = open(input_path, 'w') f.write(otu_table_string) f.close() fd, tree_path = mkstemp(suffix='.tre') os.close(fd) f = open(tree_path, 'w') f.write(tree_string) f.close() metrics = list_known_nonphylogenetic_metrics() metrics.extend(list_known_phylogenetic_metrics()) output_dir = mkdtemp() # new metrics that don't trivially parallelize must be dealt with # carefully warnings.filterwarnings( 'ignore', 'dissimilarity binary_dist_chisq is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_chisq is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_gower is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings( 'ignore', 'dissimilarity dist_hellinger is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'unifrac had no information for\ sample M*') self.files_to_remove.extend([input_path, tree_path]) self.folders_to_remove.append(output_dir) os.mkdir(output_dir + '/ft/') for metric in metrics: # do it if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir, rowids=None) else: single_file_beta(input_path, metric, tree_path, output_dir, rowids=None) sams, dmtx = parse_distmat( open(output_dir + '/' + metric + '_' + in_fname)) # do it by rows for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] row_outname = output_dir + '/' + metric + '_' +\ in_fname if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir, rowids=rows) else: single_file_beta(input_path, metric, tree_path, output_dir, rowids=rows) col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname)) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] npt.assert_almost_equal(row_v1, full_v1) # full tree run: if 'full_tree' in str(metric).lower(): continue # do it by rows with full tree for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] row_outname = output_dir + '/ft/' + metric + '_' +\ in_fname if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir + '/ft/', rowids=rows, full_tree=True) else: single_file_beta(input_path, metric, tree_path, output_dir + '/ft/', rowids=rows, full_tree=True) col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname)) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] npt.assert_almost_equal(row_v1, full_v1) # do it with full tree if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir + '/ft/', rowids=None, full_tree=True) else: single_file_beta(input_path, metric, tree_path, output_dir + '/ft/', rowids=None, full_tree=True) sams_ft, dmtx_ft = parse_distmat( open(output_dir + '/ft/' + metric + '_' + in_fname)) self.assertEqual(sams_ft, sams) npt.assert_almost_equal(dmtx_ft, dmtx)
def single_file_beta( self, otu_table_string, tree_string, missing_sams=None, use_metric_list=False): """ running single_file_beta should give same result using --rows""" if missing_sams is None: missing_sams = [] # setup fd, input_path = mkstemp(suffix='.txt') close(fd) in_fname = os.path.split(input_path)[1] f = open(input_path, 'w') f.write(otu_table_string) f.close() fd, tree_path = mkstemp(suffix='.tre') close(fd) f = open(tree_path, 'w') f.write(tree_string) f.close() metrics = list_known_nonphylogenetic_metrics() metrics.extend(list_known_phylogenetic_metrics()) output_dir = mkdtemp() # new metrics that don't trivially parallelize must be dealt with # carefully warnings.filterwarnings('ignore', 'dissimilarity binary_dist_chisq is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'dissimilarity dist_chisq is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'dissimilarity dist_gower is not\ parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'dissimilarity dist_hellinger is\ not parallelized, calculating the whole matrix...') warnings.filterwarnings('ignore', 'unifrac had no information for\ sample M*') self.files_to_remove.extend([input_path, tree_path]) self.folders_to_remove.append(output_dir) os.mkdir(output_dir + '/ft/') for metric in metrics: # do it if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir, rowids=None) else: single_file_beta(input_path, metric, tree_path, output_dir, rowids=None) sams, dmtx = parse_distmat(open(output_dir + '/' + metric + '_' + in_fname)) # do it by rows for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] row_outname = output_dir + '/' + metric + '_' +\ in_fname if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir, rowids=rows) else: single_file_beta(input_path, metric, tree_path, output_dir, rowids=rows) col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname)) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] assert_almost_equal(row_v1, full_v1) # full tree run: if 'full_tree' in str(metric).lower(): continue # do it by rows with full tree for i in range(len(sams)): if sams[i] in missing_sams: continue rows = sams[i] row_outname = output_dir + '/ft/' + metric + '_' +\ in_fname if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir + '/ft/', rowids=rows, full_tree=True) else: single_file_beta(input_path, metric, tree_path, output_dir + '/ft/', rowids=rows, full_tree=True) col_sams, row_sams, row_dmtx = parse_matrix(open(row_outname)) self.assertEqual(row_dmtx.shape, (len(rows.split(',')), len(sams))) # make sure rows same as full for j in range(len(rows.split(','))): for k in range(len(sams)): row_v1 = row_dmtx[j, k] full_v1 =\ dmtx[sams.index(row_sams[j]), sams.index(col_sams[k])] assert_almost_equal(row_v1, full_v1) # do it with full tree if use_metric_list: single_file_beta(input_path, [metric], tree_path, output_dir + '/ft/', rowids=None, full_tree=True) else: single_file_beta(input_path, metric, tree_path, output_dir + '/ft/', rowids=None, full_tree=True) sams_ft, dmtx_ft = parse_distmat(open(output_dir + '/ft/' + metric + '_' + in_fname)) self.assertEqual(sams_ft, sams) assert_almost_equal(dmtx_ft, dmtx)