def setUp(self): self.methods = ('pearson', 'spearman') self.alternatives = ('two-sided', 'greater', 'less') # Small dataset of minimal size (3x3). Mix of floats and ints in a # native Python nested list structure. self.minx = [[0, 1, 2], [1, 0, 3], [2, 3, 0]] self.miny = [[0, 2, 7], [2, 0, 6], [7, 6, 0]] self.minz = [[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]] # No variation in distances. Taken from Figure 10.20(b), pg. 603 in L&L # 3rd edition. Their example is 4x4 but using 3x3 here for easy # comparison to the minimal dataset above. self.no_variation = [[0, 0.667, 0.667], [0.667, 0, 0.667], [0.667, 0.667, 0]] # This second dataset is derived from vegan::mantel's example dataset. # The "veg" distance matrix contains Bray-Curtis distances derived from # the varespec data (named "veg.dist" in the example). The "env" # distance matrix contains Euclidean distances derived from scaled # varechem data (named "env.dist" in the example). self.veg_dm_vegan = np.loadtxt( get_data_path('mantel_veg_dm_vegan.txt')) self.env_dm_vegan = np.loadtxt( get_data_path('mantel_env_dm_vegan.txt')) # Expected test statistic when comparing x and y with method='pearson'. self.exp_x_vs_y = 0.7559289 # Expected test statistic when comparing x and z with method='pearson'. self.exp_x_vs_z = -0.9897433
def setUp(self): self.multi_fp = get_data_path('ecoli_multi.sam') self.single_fp = get_data_path('ecoli_single.sam') self.single_exp = \ ('MANLSGYNFAYLDEQTKRMIRRAILKAVAIPGYQVPFGGREMP' 'MPYGWGTGGIQLTASVIGESDVLKVIDQGADDTTNAVSIRNFF' 'KRVTGVNTTERTDDATLIQTRHRIPETPLTEDQIIIFQVPIPE' 'PLRFIEPRETETRTMHALEEYGVMQVKLYEDIARFGHIATTYA' 'YPVKVNGRYVMDPSPIPKFDNPKMDMMPALQLFGAGREKRIYA' 'VPPFTHVESLDFDDHPFTVQQWDEPCAICGSTHSYLDEVVLDD' 'AGNRMFVCSDTDYCRQQNEAKSQ', { '@HD': 'VN:1.5\tSO:query', '@PG': 'PN:DIAMOND', '@mm': 'BlastX', 'QNAME': 'WP_000002278.1', 'FLAG': 0, 'RNAME': 'UniRef100_P16688', 'POS': 1, 'MAPQ': 255, 'CIGAR': '281M', 'RNEXT': '*', 'PNEXT': 0, 'TLEN': 0, 'QUAL': '*', 'AS': 573, 'NM': 3, 'ZR': 1477, 'ZE': 5.9e-164, 'ZI': 98, 'ZL': 281, 'ZF': 1, 'ZS': 1, 'MD': '102V117R54S5' }) self.header_fp = get_data_path('header.sam')
def setUp(self): # Crawford dataset for unweighted UniFrac fp = get_data_path('PCoA_sample_data_3') self.ordination = pcoa(DistanceMatrix.read(fp)) fp = get_data_path('PCoA_biplot_descriptors') self.descriptors = pd.read_table(fp, index_col='Taxon').T
def test_default_valid_multi_line(self): fp = get_data_path('blast7_default_multi_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 'subject2', 70.00, 5.0, 0.0, 0.0, 7.0, 60.0, 3.0, 100.0, 9e-05, 10.5], ['query1', 'subject2', 30.00, 8.0, 0.0, 0.0, 6.0, 15.0, 1.0, 100.0, 0.053, 12.0], ['query1', 'subject2', 90.00, 2.0, 0.0, 0.0, 9.0, 35.0, 2.0, 100.0, 0.002, 8.3]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) fp = get_data_path('legacy9_multi_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0, 8.0, 4.0, 10.0, 1e-05, 15.5], ['query1', 'subject1', 70.00, 8.0, 0.0, 1.0, 0.0, 9.0, 5.0, 7.0, 0.231, 7.8], ['query1', 'subject1', 90.00, 5.0, 1.0, 1.0, 0.0, 0.0, 2.0, 10.0, 0.022, 13.0]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp)
def test_any_sequences_to_fasta(self): # test writing with default parameters fh = io.StringIO() _tabular_msa_to_fasta(self.msa, fh) obs = fh.getvalue() fh.close() with io.open(get_data_path('fasta_3_seqs_defaults')) as fh: exp = fh.read() self.assertEqual(obs, exp) # test writing with non-defaults fasta_fh = io.StringIO() qual_fh = io.StringIO() _tabular_msa_to_fasta(self.msa, fasta_fh, id_whitespace_replacement='*', description_newline_replacement='+', max_width=3, qual=qual_fh) obs_fasta = fasta_fh.getvalue() obs_qual = qual_fh.getvalue() fasta_fh.close() qual_fh.close() with io.open(get_data_path('fasta_3_seqs_non_defaults')) as fh: exp_fasta = fh.read() with io.open(get_data_path('qual_3_seqs_non_defaults')) as fh: exp_qual = fh.read() self.assertEqual(obs_fasta, exp_fasta) self.assertEqual(obs_qual, exp_qual)
def setUp(self): """varespec and varechem from Väre etal. 1995 DOI: 10.2307/3236351""" self.Y = pd.read_csv(get_data_path('varespec.csv'), index_col=0) self.X = pd.read_csv(get_data_path('varechem.csv'), index_col=0) self.Y.index.name = None self.X.index.name = None
def test_confirm_betadispr_results(self): mp_dm = DistanceMatrix.read(get_data_path('moving_pictures_dm.tsv')) mp_mf = pd.read_csv(get_data_path('moving_pictures_mf.tsv'), sep='\t') mp_mf.set_index('#SampleID', inplace=True) obs_med_mp = permdisp(mp_dm, mp_mf, column='BodySite') obs_cen_mp = permdisp(mp_dm, mp_mf, column='BodySite', test='centroid') exp_data_m = ['PERMDISP', 'F-value', 33, 4, 10.1956, 0.001, 999] exp_data_c = ['PERMDISP', 'F-value', 33, 4, 17.4242, 0.001, 999] exp_ind = ['method name', 'test statistic name', 'sample size', 'number of groups', 'test statistic', 'p-value', 'number of permutations'] exp_med_mp = pd.Series(data=exp_data_m, index=exp_ind, dtype='object', name='PERMDISP results') exp_cen_mp = pd.Series(data=exp_data_c, index=exp_ind, dtype='object', name='PERMDISP results') self.assert_series_equal(exp_med_mp, obs_med_mp) self.assert_series_equal(exp_cen_mp, obs_cen_mp)
def setUp(self): self.table1 = np.array( [[1, 3, 0, 1, 0], [0, 2, 0, 4, 4], [0, 0, 6, 2, 1], [0, 0, 1, 1, 1], [5, 3, 5, 0, 0], [0, 0, 0, 3, 5]]) self.sids1 = list('ABCDEF') self.oids1 = ['OTU%d' % i for i in range(1, 6)] self.t1 = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,OTU5:0.75):1.25):0.0)root;')) self.t1_w_extra_tips = TreeNode.read( StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' u'0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0' u')root;')) self.t2 = TreeNode.read( StringIO(u'((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)' u'root;')) self.oids2 = ['OTU%d' % i for i in range(1, 5)] # the following table and tree are derived from the QIIME 1.9.1 # "tiny-test" data tt_table_fp = get_data_path( os.path.join('qiime-191-tt', 'otu-table.tsv'), 'data') tt_tree_fp = get_data_path( os.path.join('qiime-191-tt', 'tree.nwk'), 'data') self.q_table = pd.read_csv(tt_table_fp, sep='\t', skiprows=1, index_col=0) self.q_tree = TreeNode.read(tt_tree_fp)
def setUp(self): self.positives = [get_data_path(e) for e in [ 'phylip_dna_3_seqs', 'phylip_single_seq_long', 'phylip_single_seq_short', 'phylip_two_chunks', 'phylip_variable_length_ids', 'phylip_varied_whitespace_in_seqs', 'phylip_whitespace_in_header_1', 'phylip_whitespace_in_header_2', 'phylip_whitespace_in_header_3', ]] # negative tests for sniffer don't include # phylip_invalid_empty_line_between_seqs, phylip_invalid_too_few_seqs, # phylip_invalid_too_many_seqs - because sniffer only reads first seq self.negatives = [get_data_path(e) for e in [ 'empty', 'whitespace_only', 'phylip_invalid_empty_line_after_header', 'phylip_invalid_empty_line_before_header', 'phylip_invalid_header_too_long', 'phylip_invalid_header_too_short', 'phylip_invalid_no_header', 'phylip_invalid_seq_too_long', 'phylip_invalid_seq_too_short', 'phylip_invalid_zero_seq_len', 'phylip_invalid_zero_seqs', ]]
def setUp(self): super(MantelTests, self).setUp() self.methods = ('pearson', 'spearman') self.alternatives = ('two-sided', 'greater', 'less') # No variation in distances. Taken from Figure 10.20(b), pg. 603 in L&L # 3rd edition. Their example is 4x4 but using 3x3 here for easy # comparison to the minimal dataset above. self.no_variation = [[0, 0.667, 0.667], [0.667, 0, 0.667], [0.667, 0.667, 0]] # This second dataset is derived from vegan::mantel's example dataset. # The "veg" distance matrix contains Bray-Curtis distances derived from # the varespec data (named "veg.dist" in the example). The "env" # distance matrix contains Euclidean distances derived from scaled # varechem data (named "env.dist" in the example). self.veg_dm_vegan = np.loadtxt( get_data_path('mantel_veg_dm_vegan.txt')) self.env_dm_vegan = np.loadtxt( get_data_path('mantel_env_dm_vegan.txt')) # Expected test statistic when comparing x and y with method='pearson'. self.exp_x_vs_y = 0.7559289 # Expected test statistic when comparing x and z with method='pearson'. self.exp_x_vs_z = -0.9897433
def test_any_sequences_to_fasta(self): for fn, obj in ((_sequence_collection_to_fasta, self.seq_coll), (_alignment_to_fasta, self.align)): # test writing with default parameters fh = StringIO() fn(obj, fh) obs = fh.getvalue() fh.close() with open(get_data_path('fasta_3_seqs_defaults'), 'U') as fh: exp = fh.read() self.assertEqual(obs, exp) # test writing with non-defaults fasta_fh = StringIO() qual_fh = StringIO() fn(obj, fasta_fh, id_whitespace_replacement='*', description_newline_replacement='+', max_width=3, qual=qual_fh) obs_fasta = fasta_fh.getvalue() obs_qual = qual_fh.getvalue() fasta_fh.close() qual_fh.close() with open(get_data_path('fasta_3_seqs_non_defaults'), 'U') as fh: exp_fasta = fh.read() with open(get_data_path('qual_3_seqs_non_defaults'), 'U') as fh: exp_qual = fh.read() self.assertEqual(obs_fasta, exp_fasta) self.assertEqual(obs_qual, exp_qual)
def setUp(self): self.jbe_con = get_data_path('test_contacts/1jbeA.psicov') self.jbe_pdb = get_data_path('test_contacts/1jbeA_clean.pdb') self.qjp_con = get_data_path('test_contacts/1qjpA.psicov') self.qjp_pdb = get_data_path('test_contacts/1qjpA_clean.pdb') self.real_n_con_qjp = {'lr': 6441, 'sr': 2337, 'all': 8778} self.real_n_con_jbe = {'lr': 4742, 'sr': 2025, 'all': 6767} self.positive_params = [ {'-t': 'all', '-l': 1}, {'-t': 'all', '-l': 2}, {'-t': 'all', '-l': 5}, {'-t': 'all', '-l': 10}, {'-t': 'lr', '-l': 1}, {'-t': 'lr', '-l': 2}, {'-t': 'lr', '-l': 10}, {'-t': 'sr', '-l': 2}, {'-t': 'sr', '-l': 10}]
def test_any_sequence_to_fasta(self): # store writer function, sequence object to write, expected # fasta filepath for default parameters, expected fasta filepath for # non-defaults, and expected qual filepath for non-defaults id_ = 'f o o' desc = 'b\na\nr' test_data = ( (_biological_sequence_to_fasta, Sequence('ACGT', id=id_, description=desc, quality=range(1, 5)), ('fasta_single_bio_seq_defaults', 'fasta_single_bio_seq_non_defaults', 'qual_single_bio_seq_non_defaults')), (_dna_sequence_to_fasta, DNA('TACG', id=id_, description=desc, quality=range(4)), ('fasta_single_dna_seq_defaults', 'fasta_single_dna_seq_non_defaults', 'qual_single_dna_seq_non_defaults')), (_rna_sequence_to_fasta, RNA('UACG', id=id_, description=desc, quality=range(2, 6)), ('fasta_single_rna_seq_defaults', 'fasta_single_rna_seq_non_defaults', 'qual_single_rna_seq_non_defaults')), (_protein_sequence_to_fasta, Protein('PQQ', id=id_, description=desc, quality=[42, 41, 40]), ('fasta_single_prot_seq_defaults', 'fasta_single_prot_seq_non_defaults', 'qual_single_prot_seq_non_defaults'))) for fn, obj, fps in test_data: defaults_fp, non_defaults_fasta_fp, non_defaults_qual_fp = fps # test writing with default parameters fh = StringIO() fn(obj, fh) obs = fh.getvalue() fh.close() with open(get_data_path(defaults_fp), 'U') as fh: exp = fh.read() self.assertEqual(obs, exp) # test writing with non-defaults fasta_fh = StringIO() qual_fh = StringIO() fn(obj, fasta_fh, id_whitespace_replacement='-', description_newline_replacement='_', max_width=1, qual=qual_fh) obs_fasta = fasta_fh.getvalue() obs_qual = qual_fh.getvalue() fasta_fh.close() qual_fh.close() with open(get_data_path(non_defaults_fasta_fp), 'U') as fh: exp_fasta = fh.read() with open(get_data_path(non_defaults_qual_fp), 'U') as fh: exp_qual = fh.read() self.assertEqual(obs_fasta, exp_fasta) self.assertEqual(obs_qual, exp_qual)
def test_simple(self): eigvals = [0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895, 0.16054235, 0.15017696, 0.12245775, 0.0] proportion_explained = [0.2675738328, 0.157044696, 0.1399118638, 0.1091402725, 0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0] sample_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634'] axis_labels = ['PC%d' % i for i in range(1, 10)] expected_results = OrdinationResults( short_method_name='PCoA', long_method_name='Principal Coordinate Analysis', eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame( np.loadtxt(get_data_path('exp_PCoAEigenResults_site')), index=sample_ids, columns=axis_labels), proportion_explained=pd.Series(proportion_explained, index=axis_labels)) dm = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) results = pcoa(dm) assert_ordination_results_equal(results, expected_results, ignore_directionality=True)
def setUp(self): self.positives = [get_data_path(e) for e in [ 'fastq_multi_seq_sanger', 'fastq_single_seq_illumina1.3', 'fastq_wrapping_as_illumina_no_description', 'fastq_wrapping_as_sanger_no_description', 'fastq_wrapping_original_sanger_no_description', 'fastq_writer_illumina1.3_defaults', 'fastq_writer_sanger_defaults', 'fastq_writer_sanger_non_defaults', 'illumina_full_range_as_illumina.fastq', 'illumina_full_range_as_sanger.fastq', 'illumina_full_range_original_illumina.fastq', 'longreads_as_illumina.fastq', 'longreads_as_sanger.fastq', 'longreads_original_sanger.fastq', 'misc_dna_as_illumina.fastq', 'misc_dna_as_sanger.fastq', 'misc_dna_original_sanger.fastq', 'misc_rna_as_illumina.fastq', 'misc_rna_as_sanger.fastq', 'misc_rna_original_sanger.fastq', 'sanger_full_range_as_illumina.fastq', 'sanger_full_range_as_sanger.fastq', 'sanger_full_range_original_sanger.fastq', 'solexa_full_range_original_solexa.fastq', 'wrapping_as_illumina.fastq', 'wrapping_as_sanger.fastq', 'wrapping_original_sanger.fastq' ]] self.negatives = [get_data_path(e) for e in [ 'empty', 'whitespace_only', 'fastq_invalid_missing_header', 'fastq_invalid_missing_seq_data', 'error_diff_ids.fastq', 'error_double_qual.fastq', 'error_double_seq.fastq', 'error_long_qual.fastq', 'error_no_qual.fastq', 'error_qual_del.fastq', 'error_qual_escape.fastq', 'error_qual_null.fastq', 'error_qual_space.fastq', 'error_qual_tab.fastq', 'error_qual_unit_sep.fastq', 'error_qual_vtab.fastq', 'error_short_qual.fastq', 'error_spaces.fastq', 'error_tabs.fastq', 'error_trunc_at_seq.fastq', 'error_trunc_at_plus.fastq', 'error_trunc_at_qual.fastq', 'error_trunc_in_title.fastq', 'error_trunc_in_seq.fastq', 'error_trunc_in_plus.fastq', 'error_trunc_in_qual.fastq', ]]
def setUp(self): self.cfg_fps = list() self.misc_fp = get_data_path("misc.cfg") self.misc_fp_local = get_data_path("misc_local.cfg") self.param_fp = get_data_path("param.cfg") self.param_fp_local = get_data_path("param_local.cfg") self.patcher = mock.patch("click.get_app_dir", return_value=dirname(self.misc_fp)) self.patcher.start()
def test_filepaths_as_input(self): dms = [ get_data_path('dm.txt'), get_data_path('dm2.txt'), ] np.random.seed(0) obs = pwmantel(dms) assert_data_frame_almost_equal(obs, self.exp_results_dm_dm2)
def test_phylogenetic_basis_large1(self): fname = get_data_path('large_tree.nwk', subfolder='data/phylogeny') t = TreeNode.read(fname) exp_basis = np.loadtxt( get_data_path('large_tree_basis.txt', subfolder='data/phylogeny')) res_basis, res_keys = phylogenetic_basis(t) npt.assert_allclose(exp_basis, res_basis)
def test_wrong_amount_of_columns_error(self): fp = get_data_path("blast7_invalid_too_many_columns") with assertRaisesRegex(self, BLAST7FormatError, "Number of fields.*\(2\)"): _blast7_to_data_frame(fp) fp = get_data_path("legacy9_invalid_too_many_columns") with assertRaisesRegex(self, BLAST7FormatError, "Number of fields.*\(12\)"): _blast7_to_data_frame(fp)
def setUp(self): self.file_hhsearch1 = get_data_path( 'test_split_search/GRAMNEG_T1D_5168.out') self.file_fasta1 = get_data_path( 'test_split_search/GRAMNEG_T1D_5168.fasta') self.file_hhsearch2 = get_data_path( 'test_split_search/GRAMNEG_T1D_3144_1-275.out') self.file_fasta2 = get_data_path( 'test_split_search/GRAMNEG_T1D_3144_1-275.fasta')
def test_scaling2(self): scores = self.ordination.scores(2) # Load data as computed with vegan 2.0-8 vegan_species = np.loadtxt(get_data_path("example2_species_scaling2_from_vegan")) npt.assert_almost_equal(scores.species, vegan_species, decimal=6) vegan_site = np.loadtxt(get_data_path("example2_site_scaling2_from_vegan")) npt.assert_almost_equal(scores.site, vegan_site, decimal=6)
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998.""" Y = np.loadtxt(get_data_path('example2_Y')) X = np.loadtxt(get_data_path('example2_X')) self.ordination = RDA(Y, X, ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9'], ['Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5'])
def setUp(self): self.positives = [get_data_path(e) for e in [ 'stockholm_extensive', 'stockholm_minimal', 'stockholm_rna', 'stockholm_runon_gf_with_whitespace', 'stockholm_runon_gf_no_whitespace', 'stockholm_duplicate_sequence_names', 'stockholm_duplicate_gr', 'stockholm_duplicate_gc', 'stockholm_invalid_nonexistent_gr', 'stockholm_invalid_nonexistent_gs', 'stockholm_no_data', 'stockholm_blank_lines', 'stockholm_differing_gc_data_length', 'stockholm_differing_gr_data_length', 'stockholm_differing_seq_lengths', 'stockholm_duplicate_sequence_names', 'stockholm_duplicate_tree_ids', 'stockholm_extensive_mixed', 'stockholm_invalid_data_type', 'stockholm_malformed_gf_line', 'stockholm_malformed_gs_line', 'stockholm_malformed_gr_line', 'stockholm_malformed_gc_line', 'stockholm_malformed_data_line', 'stockholm_metadata_only', 'stockholm_multiple_msa', 'stockholm_multiple_trees', 'stockholm_runon_gs_with_whitespace', 'stockholm_runon_gs_no_whitespace', 'stockholm_single_tree_with_id', 'stockholm_single_tree_without_id', 'stockholm_whitespace_only_lines', 'stockholm_all_data_types', 'stockholm_two_of_each_metadata', 'stockholm_data_only', 'stockholm_nonstring_labels', 'stockholm_missing_reference_items', 'stockholm_multiple_references', 'stockholm_runon_references', 'stockholm_runon_references_mixed', 'stockholm_single_reference', 'stockholm_missing_reference_items', 'stockholm_missing_rn_tag', 'stockholm_different_padding', 'stockholm_multi_line_tree_no_id', 'stockholm_multi_line_tree_with_id', 'stockholm_multiple_multi_line_trees' ]] self.negatives = [get_data_path(e) for e in [ 'stockholm_missing_header', 'empty', 'whitespace_only' ]]
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998.""" Y = np.loadtxt(get_data_path("example2_Y")) X = np.loadtxt(get_data_path("example2_X")) self.ordination = RDA( Y, X, ["Site0", "Site1", "Site2", "Site3", "Site4", "Site5", "Site6", "Site7", "Site8", "Site9"], ["Species0", "Species1", "Species2", "Species3", "Species4", "Species5"], )
def test_stockholm_runon_gs(self): fp = get_data_path('stockholm_runon_gs_no_whitespace') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([DNA('ATCGTTCAGTG', metadata={'LN': 'This is a runon GS line.'})], index=['seq1']) self.assertEqual(msa, exp) fp = get_data_path('stockholm_runon_gs_with_whitespace') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) self.assertEqual(msa, exp)
def test_from_file_error(self): for test_path in self.fferror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(FileFormatError): OrdinationResults.from_file(f) for test_path in self.verror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(ValueError): OrdinationResults.from_file(f)
def test_balance_basis_large1(self): fname = get_data_path('large_tree.nwk', subfolder='data') t = TreeNode.read(fname) # note that the basis is in reverse level order exp_basis = np.loadtxt( get_data_path('large_tree_basis.txt', subfolder='data')) res_basis, res_keys = balance_basis(t) npt.assert_allclose(exp_basis[:, ::-1], res_basis)
def test_create_config_overwrite(self): cfg_obs = _create_config() cfg_obs.read(get_data_path('default.cfg')) # overwrite "db_path" cfg_obs.read(get_data_path('param.cfg')) cfg_exp = ConfigParser() cfg_exp['DEFAULT']['db_path'] = 'db' cfg_exp.add_section('prodigal') cfg_exp['prodigal']['-t'] = '1' self.assertEqual(cfg_obs, cfg_exp)
def test_scaling1(self): scores = rda(self.Y, self.X, scaling=1) sample_constraints = pd.DataFrame(np.loadtxt( get_data_path('example2_sample_constraints_scaling1'))) # Load data as computed with vegan 2.0-8 vegan_features = pd.DataFrame( np.loadtxt(get_data_path( 'example2_species_scaling1_from_vegan')), index=self.feature_ids, columns=self.pc_ids) vegan_samples = pd.DataFrame( np.loadtxt(get_data_path( 'example2_site_scaling1_from_vegan')), index=self.sample_ids, columns=self.pc_ids) sample_constraints = pd.DataFrame( np.loadtxt(get_data_path( 'example2_sample_constraints_scaling1')), index=self.sample_ids, columns=self.pc_ids) mat = np.loadtxt(get_data_path( 'example2_biplot_scaling1')) cropped_pc_ids = self.pc_ids[:mat.shape[1]] biplot_scores = pd.DataFrame(mat, index=self.env_ids, columns=cropped_pc_ids) proportion_explained = pd.Series([0.44275783, 0.25614586, 0.15280354, 0.10497021, 0.02873375, 0.00987052, 0.00471828], index=self.pc_ids) eigvals = pd.Series([25.897954, 14.982578, 8.937841, 6.139956, 1.680705, 0.577350, 0.275984], index=self.pc_ids) exp = OrdinationResults( 'RDA', 'Redundancy Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=sample_constraints, biplot_scores=biplot_scores, proportion_explained=proportion_explained, eigvals=eigvals) assert_ordination_results_equal(scores, exp, ignore_directionality=True, decimal=6)
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998 (p. 590). Loaded results as computed with vegan 2.0-8 and compared with table 11.5 if also there.""" Y = np.loadtxt(get_data_path('example3_Y')) X = np.loadtxt(get_data_path('example3_X')) self.ordination = CCA(Y, X[:, :-1], ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9'], ['Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5', 'Species6', 'Species7', 'Species8'])
def test_stockholm_mixed_runon_references(self): fp = get_data_path('stockholm_runon_references_mixed') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA( [], metadata={ 'RN': [ OrderedDict([('RC', 'A Runon Comment'), ('RM', '123456789'), ('RT', 'A Runon Title'), ('RA', 'The Author'), ('RL', 'A Location')]) ] }) self.assertEqual(msa, exp)
def setUp(self): self.jbe_con = get_data_path('1jbeA.psicov') self.jbe_pdb = get_data_path('1jbeA_clean.pdb') self.qjp_con = get_data_path('1qjpA.psicov') self.qjp_pdb = get_data_path('1qjpA_clean.pdb') self.real_n_con_qjp = {'lr': 6441, 'sr': 2337, 'all': 8778} self.real_n_con_jbe = {'lr': 4742, 'sr': 2025, 'all': 6767} self.positive_params = [{ '-t': 'all', '-l': 1 }, { '-t': 'all', '-l': 2 }, { '-t': 'all', '-l': 5 }, { '-t': 'all', '-l': 10 }, { '-t': 'lr', '-l': 1 }, { '-t': 'lr', '-l': 2 }, { '-t': 'lr', '-l': 10 }, { '-t': 'sr', '-l': 2 }, { '-t': 'sr', '-l': 10 }]
def test_from_seralized_results(self): # the current implementation of ordination results loses some # information, test that pcoa_biplot works fine regardless results = OrdinationResults.read(get_data_path('PCoA_skbio')) serialized = pcoa_biplot(results, self.descriptors) in_memory = pcoa_biplot(self.ordination, self.descriptors) assert_ordination_results_equal(serialized, in_memory, ignore_directionality=True, ignore_axis_labels=True, ignore_method_names=True)
def setUp(self): self.positives = [get_data_path(e) for e in [ 'blast7_default_single_line', 'blast7_default_multi_line', 'blast7_custom_minimal', 'blast7_custom_single_line', 'blast7_custom_multi_line', 'blast7_custom_mixed_nans', 'blast7_invalid_differing_fields', 'blast7_invalid_no_data', 'blast7_invalid_too_many_columns', 'legacy9_and_blast7_default', 'legacy9_invalid_too_many_columns', 'legacy9_mixed_nans', 'legacy9_multi_line', 'legacy9_single_line']] self.negatives = [get_data_path(e) for e in [ 'blast7_invalid_gibberish', 'blast7_invalid_for_sniffer', 'blast7_invalid_for_sniffer_2', 'empty']]
def test_custom_valid_multi_line(self): fp = get_data_path("blast7_custom_multi_line") df = _blast7_to_data_frame(fp) exp = pd.DataFrame([[1.0, 8.0, 3.0, 10.0, 8.0, 0.0, 1.0, 'query1', 'subject2'], [2.0, 5.0, 2.0, 15.0, 8.0, 0.0, 2.0, 'query1', 'subject2'], [1.0, 6.0, 2.0, 12.0, 8.0, 0.0, 1.0, 'query1', 'subject2']], columns=['qstart', 'qend', 'sstart', 'send', 'nident', 'mismatch', 'sframe', 'qaccver', 'saccver']) assert_data_frame_almost_equal(df, exp)
def test__qiime2_rclr(self): """Tests q2-rclr matches standalone rclr.""" # make mock table to write samps_ids = ['s%i' % i for i in range(self.cdata.shape[0])] feats_ids = ['f%i' % i for i in range(self.cdata.shape[1])] table_test = Table(self.cdata.T, feats_ids, samps_ids) # write table in_ = get_data_path('test.biom', subfolder='data') out_path = os_path_sep.join(in_.split(os_path_sep)[:-1]) test_path = os.path.join(out_path, 'rclr-test.biom') with biom_open(test_path, 'w') as wf: table_test.to_hdf5(wf, "test") # run standalone runner = CliRunner() result = runner.invoke(sdc.commands['rclr'], ['--in-biom', test_path, '--output-dir', out_path]) out_table = get_data_path('rclr-table.biom', subfolder='data') res_table = load_table(out_table) standalone_mat = res_table.matrix_data.toarray().T # check that exit code was 0 (indicating success) try: self.assertEqual(0, result.exit_code) except AssertionError: ex = result.exception error = Exception('Command failed with non-zero exit code') raise error.with_traceback(ex.__traceback__) # run QIIME2 q2_table_test = Artifact.import_data("FeatureTable[Frequency]", table_test) q2_res = rclr_transformation(q2_table_test).rclr_table.view(Table) q2_res_mat = q2_res.matrix_data.toarray().T # check same and check both correct npt.assert_allclose(standalone_mat, q2_res_mat) npt.assert_allclose(standalone_mat, self.true) npt.assert_allclose(q2_res_mat, self.true)
def test_stockholm_to_msa_different_padding(self): fp = get_data_path('stockholm_different_padding') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA( [], metadata={ 'RN': [ OrderedDict([('RC', 'A Runon Comment Without ' 'Whitespace')]), OrderedDict([('RC', 'A Runon Comment With ' 'Whitespace')]) ] }) self.assertEqual(msa, exp)
def test_proportional_artifact(self): from qiime2.plugins.gneiss.methods import correlation_clustering table_f = get_data_path("feature-table.qza") in_table = qiime2.Artifact.load(table_f) res = correlation_clustering(in_table, pseudocount=0.1) res_clust = res.clustering._view(TreeNode) exp_str = ('((F4:0.228723591874,(F5:0.074748541601,' '(F1:0.00010428164962,F2:0.00010428164962)' 'y4:0.0746442599513)y3:0.153975050273)' 'y1:0.70266138894,(F3:0.266841737789,F6:0.266841737789)' 'y2:0.664543243026)y0;\n') exp_tree = TreeNode.read([exp_str]) self.assert_tree_almost_equals(exp_tree, res_clust)
def test_assign_ids_intersect(self): from qiime2.plugins.gneiss.methods import assign_ids tree_f = get_data_path("tree_extra.qza") table_f = get_data_path("polytomy_table.qza") tree = qiime2.Artifact.load(tree_f) table = qiime2.Artifact.load(table_f) output = assign_ids(input_tree=tree, input_table=table) res_tree = output.output_tree._view(TreeNode) res_table = output.output_table._view(pd.DataFrame) for n in res_tree.levelorder(include_self=True): self.assertTrue(n.name is not None) exp = list('abde') res = [n.name for n in res_tree.tips()] self.assertEqual(exp, res) exp = pd.DataFrame( { 's1': [1.0, 2.0, 4.0, 5.0], 's2': [1.0, 5.0, 6.0, 0.0] }, index=['a', 'b', 'd', 'e']).T pdt.assert_frame_equal(exp, res_table)
def test_valid_nan_handling(self): fp = get_data_path('blast6_custom_mixed_nans') df = _blast6_to_data_frame(fp, columns=[ 'qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps' ]) exp = pd.DataFrame( [[np.nan, 'PAAWWWWW', 8.0, 1.0, 100.00, np.nan, 0.0], ['query1', np.nan, 8.0, 1.0, np.nan, 8.0, 0.0]], columns=[ 'qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps' ]) assert_data_frame_almost_equal(df, exp)
def test_getoptS_small(self): """Test singular values from U and V.""" data = loadmat(get_data_path('small_test.mat')) M_E = np.array(data['M_E'].todense()) E = data['E'] x = data['x'] y = data['y'] res = singular_values(x, y, M_E, E) exp = np.array([[0.93639499, 0.07644197, -0.02828782], [-0.03960841, 0.60787383, 0.00521257], [0.00729038, 0.00785834, 0.67853083]]) npt.assert_allclose(res, exp, atol=1e-5)
def test_getoptS_small(self): # warning : this test must ALWAYS pass data = loadmat(get_data_path('small_test.mat')) M_E = np.array(data['M_E'].todense()) E = data['E'] x = data['x'] y = data['y'] res = getoptS(x, y, M_E, E) exp = np.array([[0.93639499, 0.07644197, -0.02828782], [-0.03960841, 0.60787383, 0.00521257], [0.00729038, 0.00785834, 0.67853083]]) npt.assert_allclose(res, exp, atol=1e-5)
def setUp(self): self.valid_files = [ ([('f o o', 'bar\n\nbaz', 'AACCGG', [16, 17, 18, 19, 20, 21]), ('bar', 'baz foo', 'TTGGCC', [23, 22, 21, 20, 19, 18]), ('ba\n\t\tz', 'foo bar', 'GATTTC', [20, 21, 22, 23, 24, 18])], [({ 'variant': 'sanger' }, get_data_path('fastq_writer_sanger_defaults')), ({ 'phred_offset': 33 }, get_data_path('fastq_writer_sanger_defaults')), ({ 'variant': 'illumina1.8' }, get_data_path('fastq_writer_sanger_defaults')), ({ 'variant': 'illumina1.3' }, get_data_path('fastq_writer_illumina1.3_defaults')), ({ 'variant': 'sanger', 'id_whitespace_replacement': '%', 'description_newline_replacement': '^' }, get_data_path('fastq_writer_sanger_non_defaults'))]), ]
def test_no_collapsed_nodes(self): st = TreeNode.read(get_data_path(self.newick)) tr, ts = diamondtree(st, breadth_scaling=6, depth_scaling=30, cladecolors={ 'y5': '#FF0000', 'y18': '#0000FF' }, bgcolors={'y29': '#00FF00'}) tr.render(file_name=self.fname, tree_style=ts) self.assertTrue(os.path.exists(self.fname)) self.assertTrue(os.path.getsize(self.fname) > 0)
def test_standalone_rclr(self): """Test the standalone rlcr.""" # make mock table to write samps_ids = ['s%i' % i for i in range(self.cdata.shape[0])] feats_ids = ['f%i' % i for i in range(self.cdata.shape[1])] table_test = Table(self.cdata.T, feats_ids, samps_ids) # write table in_ = get_data_path('test.biom', subfolder='rpca_data') out_path = os_path_sep.join(in_.split(os_path_sep)[:-1]) test_path = os.path.join(out_path, 'rclr-test.biom') with biom_open(test_path, 'w') as wf: table_test.to_hdf5(wf, "test") runner = CliRunner() result = runner.invoke(sdc.commands['rclr'], ['--in-biom', test_path, '--output-dir', out_path]) out_table = get_data_path('rclr-table.biom', subfolder='rpca_data') res_table = load_table(out_table) test_cmat = res_table.matrix_data.toarray().T npt.assert_allclose(test_cmat, self.true) # Lastly, check that exit code was 0 (indicating success) CliTestCase().assertExitCode(0, result)
def test_fastq_to_generator_invalid_files_illumina(self): # files that should be invalid for illumina1.3 and illumina1.8 variants fps = [ get_data_path(fp) for fp in [ 'sanger_full_range_original_sanger.fastq', 'solexa_full_range_original_solexa.fastq' ] ] for fp in fps: with self.assertRaisesRegexp(ValueError, 'out of range \[0, 62\]'): list(_fastq_to_generator(fp, variant='illumina1.3')) with self.assertRaisesRegexp(ValueError, 'out of range \[0, 62\]'): list(_fastq_to_generator(fp, variant='illumina1.8'))
def setUp(self): """Data from table 11.3 in Legendre & Legendre 1998 (p. 590). Loaded results as computed with vegan 2.0-8 and compared with table 11.5 if also there.""" self.feature_ids = [ 'Feature0', 'Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5', 'Feature6', 'Feature7', 'Feature8' ] self.sample_ids = [ 'Sample0', 'Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', 'Sample6', 'Sample7', 'Sample8', 'Sample9' ] self.env_ids = ['Constraint0', 'Constraint1', 'Constraint2'] self.pc_ids = [ 'CCA1', 'CCA2', 'CCA3', 'CCA4', 'CCA5', 'CCA6', 'CCA7', 'CCA8', 'CCA9' ] self.Y = pd.DataFrame(np.loadtxt(get_data_path('example3_Y')), columns=self.feature_ids, index=self.sample_ids) self.X = pd.DataFrame(np.loadtxt(get_data_path('example3_X'))[:, :-1], columns=self.env_ids, index=self.sample_ids)
def test_standalone_rpca(self): """Checks the output produced by gemelli's RPCA standalone script. This is more of an "integration test" than a unit test -- the details of the algorithm used by the standalone RPCA script are checked in more detail in gemelli/tests/test_optspace.py, etc. """ in_ = get_data_path('test.biom', subfolder='rpca_data') out_ = os_path_sep.join(in_.split(os_path_sep)[:-1]) runner = CliRunner() result = runner.invoke(sdc.commands['rpca'], ['--in-biom', in_, '--output-dir', out_]) # Read the results dist_res = pd.read_csv(get_data_path('distance-matrix.tsv', subfolder='rpca_data'), sep='\t', index_col=0) ord_res = OrdinationResults.read( get_data_path('ordination.txt', subfolder='rpca_data')) # Read the expected results dist_exp = pd.read_csv(get_data_path('expected-distance-matrix.tsv', subfolder='rpca_data'), sep='\t', index_col=0) ord_exp = OrdinationResults.read( get_data_path('expected-ordination.txt', subfolder='rpca_data')) # Check that the distance matrix matches our expectations assert_array_almost_equal(dist_res.values, dist_exp.values) # Check that the ordination results match our expectations -- checking # each value for both features and samples assert_ordinationresults_equal(ord_res, ord_exp) # Lastly, check that gemelli's exit code was 0 (indicating success) CliTestCase().assertExitCode(0, result)
def test_scaling1(self): scores = rda(self.Y, self.X, scaling=1) biplot_scores = pd.DataFrame(np.loadtxt( get_data_path('example2_biplot_scaling1'))) sample_constraints = pd.DataFrame(np.loadtxt( get_data_path('example2_sample_constraints_scaling1'))) # Load data as computed with vegan 2.0-8 vegan_features = pd.DataFrame( np.loadtxt(get_data_path( 'example2_species_scaling1_from_vegan')), index=self.feature_ids, columns=self.pc_ids) vegan_samples = pd.DataFrame( np.loadtxt(get_data_path( 'example2_site_scaling1_from_vegan')), index=self.sample_ids, columns=self.pc_ids) sample_constraints = pd.DataFrame( np.loadtxt(get_data_path( 'example2_sample_constraints_scaling1')), index=self.sample_ids, columns=self.pc_ids) biplot_scores = pd.DataFrame( np.loadtxt(get_data_path( 'example2_biplot_scaling1'))) # These are wrong. See issue #1002 proportion_explained = pd.Series([0.44275783, 0.25614586, 0.15280354, 0.10497021, 0.02873375, 0.00987052, 0.00471828], index=self.pc_ids) # These are wrong. See issue #1002 eigvals = pd.Series([25.897954, 14.982578, 8.937841, 6.139956, 1.680705, 0.577350, 0.275984], index=self.pc_ids) exp = OrdinationResults( 'RDA', 'Redundancy Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=sample_constraints, biplot_scores=biplot_scores, proportion_explained=proportion_explained, eigvals=eigvals) assert_ordination_results_equal(scores, exp, ignore_directionality=True, ignore_biplot_scores_labels=True, decimal=6)
def test_standalone_rpca_rank_est(self): """Checks the standalone rank estimate is used instead of a explicit rank setting. """ in_ = get_data_path('test.biom') out_ = os_path_sep.join(in_.split(os_path_sep)[:-1]) runner = CliRunner() result = runner.invoke(sdc.commands['auto-rpca'], ['--in-biom', in_, '--output-dir', out_]) # Read the results dist_res = pd.read_csv(get_data_path('distance-matrix.tsv'), sep='\t', index_col=0) ord_res = OrdinationResults.read(get_data_path('ordination.txt')) # Read the expected results file_ = 'expected-est-distance-matrix.tsv' dist_exp = pd.read_csv(get_data_path(file_), sep='\t', index_col=0) ord_exp = OrdinationResults.read(get_data_path( 'expected-est-ordination.txt')) # Check that the distance matrix matches our expectations assert_array_almost_equal(dist_res.values, dist_exp.values) # Check that the ordination results match our expectations -- checking # each value for both features and samples assert_deicode_ordinationresults_equal(ord_res, ord_exp) # Lastly, check that DEICODE's exit code was 0 (indicating success) try: self.assertEqual(0, result.exit_code) except AssertionError: ex = result.exception error = Exception('Command failed with non-zero exit code') raise error.with_traceback(ex.__traceback__)
def test_msa_to_stockholm_data_only(self): fp = get_data_path('stockholm_data_only') msa = TabularMSA([ RNA('ACUCCGACAUGCUCC'), RNA('UAGUGCCGAACGCUG'), RNA('GUGUGGGCGUGAUUC') ], index=['seq1', 'seq2', 'seq3']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_lme_artifact(self): from qiime2.plugins.gneiss.visualizers import lme_regression table_f = get_data_path("lme_balances.qza") tree_f = get_data_path("lme_tree.qza") metadata_f = get_data_path("test_lme_metadata.txt") in_table = qiime2.Artifact.load(table_f) in_tree = qiime2.Artifact.load(tree_f) in_metadata = qiime2.Metadata(pd.read_table(metadata_f, index_col=0)) viz = lme_regression(in_table, in_tree, in_metadata, 'ph', 'host_subject_id') os.mkdir('regression_summary_dir') viz.visualization.export_data('regression_summary_dir') res_coef = pd.read_csv(os.path.join('regression_summary_dir', 'coefficients.csv'), index_col=0) self.assertAlmostEqual(res_coef.loc['y0', 'groups RE'], 1.105630e+00, places=5) shutil.rmtree('regression_summary_dir')
def test_msa_to_stockholm_multiple_trees(self): fp = get_data_path('stockholm_multiple_trees') msa = TabularMSA([], metadata=OrderedDict([('NH', OrderedDict([('tree1', 'ABCD'), ('tree2', 'EFGH'), ('tree3', 'IJKL') ]))])) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)
def test_parse_easel_output(self): obs = parse_easel_output(self.fp_infernal) self.assertEqual('INFERNAL', obs['software'].iloc[0]) self.assertEqual('1.1.2', obs['software version'].iloc[0]) self.assertEqual('Markergenes/FSSC/allFSSC.cm', obs['fp_query'].iloc[0]) self.assertEqual('Sequences_Fusarium/taxid_99000016/FW16.genome.fna', obs['fp_target'].iloc[0]) with open(get_data_path('easel2sam/exp_parse_easle_output.txt')) as f: exp = ''.join(f.readlines()) assert_frame_equal(_str2pd(exp), obs) obs = parse_easel_output(self.fp_nohit) self.assertEqual(obs.shape[0], 0)
def test_standalone_rpca_rank_est(self): """Checks the standalone RPCA rank estimate is used instead of a explicit rank setting. """ in_ = get_data_path('test.biom', subfolder='rpca_data') out_ = os_path_sep.join(in_.split(os_path_sep)[:-1]) runner = CliRunner() result = runner.invoke(sdc.commands['auto-rpca'], ['--in-biom', in_, '--output-dir', out_]) # Read the results dist_res = pd.read_csv(get_data_path('distance-matrix.tsv', subfolder='rpca_data'), sep='\t', index_col=0) ord_res = OrdinationResults.read( get_data_path('ordination.txt', subfolder='rpca_data')) # Read the expected results file_ = 'expected-est-distance-matrix.tsv' dist_exp = pd.read_csv(get_data_path(file_, subfolder='rpca_data'), sep='\t', index_col=0) ord_exp = OrdinationResults.read( get_data_path('expected-est-ordination.txt', subfolder='rpca_data')) # Check that the distance matrix matches our expectations assert_array_almost_equal(dist_res.values, dist_exp.values) # Check that the ordination results match our expectations -- checking # each value for both features and samples assert_ordinationresults_equal(ord_res, ord_exp) # Lastly, check that gemelli's exit code was 0 (indicating success) CliTestCase().assertExitCode(0, result)
def test_stockholm_maintains_order(self): fp = get_data_path('stockholm_two_of_each_metadata') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) msa_order = list(msa.metadata.items()) exp_order = [('NM', 'Kestrel Gorlick'), ('DT', 'February 5th, 2016')] self.assertEqual(msa_order, exp_order) msa_order = list(msa[0].metadata.items()) exp_order = [('AL', 'ABCD'), ('NS', '1234')] self.assertEqual(msa_order, exp_order) msa_order = list(msa.positional_metadata.columns) exp_order = ['SS_cons', 'AS_cons'] self.assertEqual(msa_order, exp_order) msa_order = list(msa[0].positional_metadata.columns) exp_order = ['SS', 'AS'] self.assertEqual(msa_order, exp_order)
def test_pcoa_biplot_from_ape(self): """Test against a reference implementation from R's ape package The test data was generated with the R script below and using a modified version of pcoa.biplot that returns the U matrix. library(ape) # files can be found in the test data folder of the ordination module y = t(read.table('PCoA_biplot_descriptors', row.names = 1, header = 1)) dm = read.table('PCoA_sample_data_3', row.names = 1, header = 1) h = pcoa(dm) # biplot.pcoa will only calculate the biplot for two axes at a time acc = NULL for (axes in c(1, 3, 5, 7)) { new = biplot.pcoa(h, y, plot.axes=c(axes, axes+1), rn = rep('.', length(colnames(dm))) ) if(is.null(acc)) { acc = new } else { b = acc acc <- cbind(acc, new) } } write.csv(acc, file='PCoA_biplot_projected_descriptors') """ obs = pcoa_biplot(self.ordination, self.descriptors) # we'll build a dummy ordination results object based on the expected # the main thing we'll compare and modify is the features dataframe exp = deepcopy(obs) fp = get_data_path('PCoA_biplot_projected_descriptors') # R won't calculate the last dimension, so pad with zeros to make the # arrays comparable exp.features = pd.read_table(fp, sep=',', index_col=0) exp.features['Axis.9'] = np.zeros_like(exp.features['Axis.8']) # make the order comparable exp.features = exp.features.reindex(obs.features.index) assert_ordination_results_equal(obs, exp, ignore_directionality=True, ignore_axis_labels=True)
def test_parse(self): imd1 = IntervalMetadata(None) imd1.add(bounds=[(3588441, 3588818)], metadata={ 'ncRNA_class': 'RNaseP_bact_a', 'type': 'ncRNA', 'strand': '-', 'db_xref': 'RF00010', 'source': 'Rfam' }) imd1.add(bounds=[(3355449, 3355633)], metadata={ 'ncRNA_class': '5S_rRNA', 'type': 'rRNA', 'strand': '+', 'product': '5s_rRNA', 'db_xref': 'RF00001', 'source': 'Rfam' }) imd2 = IntervalMetadata(None) imd2.add(bounds=[(85215, 85384)], metadata={ 'ncRNA_class': 'LSU_rRNA_bacteria', 'type': 'rRNA', 'strand': '+', 'product': '23s_rRNA', 'db_xref': 'RF02541', 'source': 'Rfam' }) imd3 = IntervalMetadata(None) imd3.add(bounds=[(8739, 8777)], metadata={ 'ncRNA_class': 'SSU_rRNA_bacteria', 'type': 'rRNA', 'strand': '+', 'product': '16s_rRNA', 'db_xref': 'RF00177', 'source': 'Rfam' }) exp = (('NC_016822.1', imd1), ('NC_016833.1', imd2), ('NC_016834.1', imd3)) fp = get_data_path('cmscan.txt') gen = _generator(fp) for (exp_id, exp_imd), (obs_id, obs_imd) in zip(exp, gen): self.assertEqual(exp_id, obs_id) self.assertEqual(exp_imd, obs_imd)
def test_standalone_rpca(self): """Checks the output produced by gemelli's standalone script. This is more of an "integration test" than a unit test -- the details of the algorithm used by the standalone CTF script are checked in more detail in gemelli/tests/test_factorization.py. """ in_table = get_data_path('test-small.biom') in_meta = get_data_path('test-small.tsv') out_ = os_path_sep.join(in_table.split(os_path_sep)[:-1]) runner = CliRunner() result = runner.invoke(standalone_ctf, ['--in-biom', in_table, '--sample-metadata-file', in_meta, '--individual-id-column', 'host_subject_id', '--state-column-1', 'context', '--output-dir', out_]) # check exit code was 0 (indicating success) CliTestCase().assertExitCode(0, result) # Read the results samp_res = pd.read_csv( get_data_path('context-subject-ordination.tsv'), sep='\t', index_col=0) feat_res = pd.read_csv( get_data_path('context-features-ordination.tsv'), sep='\t', index_col=0) # Read the expected results samp_exp = pd.read_csv( get_data_path('expected-context-subject-ordination.tsv'), sep='\t', index_col=0) feat_exp = pd.read_csv( get_data_path('expected-context-features-ordination.tsv'), sep='\t', index_col=0) # Check that the distance matrix matches our expectations comp_col = ['PC1', 'PC2', 'PC3'] cent_ = samp_res[comp_col].mean().values.max() self.assertAlmostEqual(cent_, 0) cent_ = feat_res[comp_col].mean().values.max() self.assertAlmostEqual(cent_, 0) # check matched assert_allclose(absolute_sort(samp_res[comp_col].values), absolute_sort(samp_exp[comp_col].values), atol=.5) assert_allclose(absolute_sort(feat_res[comp_col].values), absolute_sort(feat_exp[comp_col].values), atol=.5)
def test_msa_to_stockholm_nonstring_values(self): fp = get_data_path('stockholm_nonstring_labels') msa = TabularMSA([DNA('ACTG', metadata=OrderedDict([(8, 123)]), positional_metadata=OrderedDict([(1.0, [1, 2, 3, 4])]) )], metadata=OrderedDict([(1.3, 2857)]), positional_metadata=OrderedDict([(25, [4, 3, 2, 1])]), index=[11214]) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp)