Esempio n. 1
0
    def setUp(self):
        self.methods = ('pearson', 'spearman')
        self.alternatives = ('two-sided', 'greater', 'less')

        # Small dataset of minimal size (3x3). Mix of floats and ints in a
        # native Python nested list structure.
        self.minx = [[0, 1, 2], [1, 0, 3], [2, 3, 0]]
        self.miny = [[0, 2, 7], [2, 0, 6], [7, 6, 0]]
        self.minz = [[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]]

        # No variation in distances. Taken from Figure 10.20(b), pg. 603 in L&L
        # 3rd edition. Their example is 4x4 but using 3x3 here for easy
        # comparison to the minimal dataset above.
        self.no_variation = [[0, 0.667, 0.667],
                             [0.667, 0, 0.667],
                             [0.667, 0.667, 0]]

        # This second dataset is derived from vegan::mantel's example dataset.
        # The "veg" distance matrix contains Bray-Curtis distances derived from
        # the varespec data (named "veg.dist" in the example). The "env"
        # distance matrix contains Euclidean distances derived from scaled
        # varechem data (named "env.dist" in the example).
        self.veg_dm_vegan = np.loadtxt(
            get_data_path('mantel_veg_dm_vegan.txt'))
        self.env_dm_vegan = np.loadtxt(
            get_data_path('mantel_env_dm_vegan.txt'))

        # Expected test statistic when comparing x and y with method='pearson'.
        self.exp_x_vs_y = 0.7559289

        # Expected test statistic when comparing x and z with method='pearson'.
        self.exp_x_vs_z = -0.9897433
Esempio n. 2
0
 def setUp(self):
     self.multi_fp = get_data_path('ecoli_multi.sam')
     self.single_fp = get_data_path('ecoli_single.sam')
     self.single_exp = \
         ('MANLSGYNFAYLDEQTKRMIRRAILKAVAIPGYQVPFGGREMP'
          'MPYGWGTGGIQLTASVIGESDVLKVIDQGADDTTNAVSIRNFF'
          'KRVTGVNTTERTDDATLIQTRHRIPETPLTEDQIIIFQVPIPE'
          'PLRFIEPRETETRTMHALEEYGVMQVKLYEDIARFGHIATTYA'
          'YPVKVNGRYVMDPSPIPKFDNPKMDMMPALQLFGAGREKRIYA'
          'VPPFTHVESLDFDDHPFTVQQWDEPCAICGSTHSYLDEVVLDD'
          'AGNRMFVCSDTDYCRQQNEAKSQ', {
              '@HD': 'VN:1.5\tSO:query',
              '@PG': 'PN:DIAMOND',
              '@mm': 'BlastX',
              'QNAME': 'WP_000002278.1',
              'FLAG': 0,
              'RNAME': 'UniRef100_P16688',
              'POS': 1,
              'MAPQ': 255,
              'CIGAR': '281M',
              'RNEXT': '*',
              'PNEXT': 0,
              'TLEN': 0,
              'QUAL': '*',
              'AS': 573,
              'NM': 3,
              'ZR': 1477,
              'ZE': 5.9e-164,
              'ZI': 98,
              'ZL': 281,
              'ZF': 1,
              'ZS': 1,
              'MD': '102V117R54S5'
          })
     self.header_fp = get_data_path('header.sam')
    def setUp(self):
        # Crawford dataset for unweighted UniFrac
        fp = get_data_path('PCoA_sample_data_3')
        self.ordination = pcoa(DistanceMatrix.read(fp))

        fp = get_data_path('PCoA_biplot_descriptors')
        self.descriptors = pd.read_table(fp, index_col='Taxon').T
Esempio n. 4
0
    def test_default_valid_multi_line(self):
        fp = get_data_path('blast7_default_multi_line')
        df = _blast7_to_data_frame(fp)
        exp = pd.DataFrame([['query1', 'subject2', 70.00, 5.0, 0.0, 0.0, 7.0,
                             60.0, 3.0, 100.0, 9e-05, 10.5],
                            ['query1', 'subject2', 30.00, 8.0, 0.0, 0.0, 6.0,
                             15.0, 1.0, 100.0, 0.053, 12.0],
                            ['query1', 'subject2', 90.00, 2.0, 0.0, 0.0, 9.0,
                             35.0, 2.0, 100.0, 0.002, 8.3]],
                           columns=['qseqid', 'sseqid', 'pident', 'length',
                                    'mismatch', 'gapopen', 'qstart', 'qend',
                                    'sstart', 'send', 'evalue', 'bitscore'])
        assert_data_frame_almost_equal(df, exp)

        fp = get_data_path('legacy9_multi_line')
        df = _blast7_to_data_frame(fp)
        exp = pd.DataFrame([['query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0,
                             8.0, 4.0, 10.0, 1e-05, 15.5],
                            ['query1', 'subject1', 70.00, 8.0, 0.0, 1.0, 0.0,
                             9.0, 5.0, 7.0, 0.231, 7.8],
                            ['query1', 'subject1', 90.00, 5.0, 1.0, 1.0, 0.0,
                             0.0, 2.0, 10.0, 0.022, 13.0]],
                           columns=['qseqid', 'sseqid', 'pident', 'length',
                                    'mismatch', 'gapopen', 'qstart', 'qend',
                                    'sstart', 'send', 'evalue', 'bitscore'])
        assert_data_frame_almost_equal(df, exp)
Esempio n. 5
0
    def test_any_sequences_to_fasta(self):
        # test writing with default parameters
        fh = io.StringIO()
        _tabular_msa_to_fasta(self.msa, fh)
        obs = fh.getvalue()
        fh.close()

        with io.open(get_data_path('fasta_3_seqs_defaults')) as fh:
            exp = fh.read()

        self.assertEqual(obs, exp)

        # test writing with non-defaults
        fasta_fh = io.StringIO()
        qual_fh = io.StringIO()
        _tabular_msa_to_fasta(self.msa, fasta_fh,
                              id_whitespace_replacement='*',
                              description_newline_replacement='+', max_width=3,
                              qual=qual_fh)
        obs_fasta = fasta_fh.getvalue()
        obs_qual = qual_fh.getvalue()
        fasta_fh.close()
        qual_fh.close()

        with io.open(get_data_path('fasta_3_seqs_non_defaults')) as fh:
            exp_fasta = fh.read()
        with io.open(get_data_path('qual_3_seqs_non_defaults')) as fh:
            exp_qual = fh.read()

        self.assertEqual(obs_fasta, exp_fasta)
        self.assertEqual(obs_qual, exp_qual)
    def setUp(self):
        """varespec and varechem from Väre etal. 1995 DOI: 10.2307/3236351"""

        self.Y = pd.read_csv(get_data_path('varespec.csv'), index_col=0)
        self.X = pd.read_csv(get_data_path('varechem.csv'), index_col=0)
        self.Y.index.name = None
        self.X.index.name = None
Esempio n. 7
0
    def test_confirm_betadispr_results(self):
        mp_dm = DistanceMatrix.read(get_data_path('moving_pictures_dm.tsv'))
        mp_mf = pd.read_csv(get_data_path('moving_pictures_mf.tsv'), sep='\t')
        mp_mf.set_index('#SampleID', inplace=True)

        obs_med_mp = permdisp(mp_dm, mp_mf,
                              column='BodySite')
        obs_cen_mp = permdisp(mp_dm, mp_mf, column='BodySite',
                              test='centroid')

        exp_data_m = ['PERMDISP', 'F-value', 33, 4, 10.1956, 0.001, 999]
        exp_data_c = ['PERMDISP', 'F-value', 33, 4, 17.4242, 0.001, 999]
        exp_ind = ['method name', 'test statistic name', 'sample size',
                   'number of groups', 'test statistic', 'p-value',
                   'number of permutations']

        exp_med_mp = pd.Series(data=exp_data_m, index=exp_ind, dtype='object',
                               name='PERMDISP results')

        exp_cen_mp = pd.Series(data=exp_data_c, index=exp_ind, dtype='object',
                               name='PERMDISP results')

        self.assert_series_equal(exp_med_mp, obs_med_mp)

        self.assert_series_equal(exp_cen_mp, obs_cen_mp)
Esempio n. 8
0
    def setUp(self):
        self.table1 = np.array(
           [[1, 3, 0, 1, 0],
            [0, 2, 0, 4, 4],
            [0, 0, 6, 2, 1],
            [0, 0, 1, 1, 1],
            [5, 3, 5, 0, 0],
            [0, 0, 0, 3, 5]])
        self.sids1 = list('ABCDEF')
        self.oids1 = ['OTU%d' % i for i in range(1, 6)]
        self.t1 = TreeNode.read(
            StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:'
                     u'0.75,OTU5:0.75):1.25):0.0)root;'))
        self.t1_w_extra_tips = TreeNode.read(
            StringIO(u'(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:'
                     u'0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0'
                     u')root;'))

        self.t2 = TreeNode.read(
            StringIO(u'((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)'
                     u'root;'))
        self.oids2 = ['OTU%d' % i for i in range(1, 5)]

        # the following table and tree are derived from the QIIME 1.9.1
        # "tiny-test" data
        tt_table_fp = get_data_path(
            os.path.join('qiime-191-tt', 'otu-table.tsv'), 'data')
        tt_tree_fp = get_data_path(
            os.path.join('qiime-191-tt', 'tree.nwk'), 'data')

        self.q_table = pd.read_csv(tt_table_fp, sep='\t', skiprows=1,
                                   index_col=0)
        self.q_tree = TreeNode.read(tt_tree_fp)
Esempio n. 9
0
    def setUp(self):
        self.positives = [get_data_path(e) for e in [
            'phylip_dna_3_seqs',
            'phylip_single_seq_long',
            'phylip_single_seq_short',
            'phylip_two_chunks',
            'phylip_variable_length_ids',
            'phylip_varied_whitespace_in_seqs',
            'phylip_whitespace_in_header_1',
            'phylip_whitespace_in_header_2',
            'phylip_whitespace_in_header_3',
        ]]

        # negative tests for sniffer don't include
        # phylip_invalid_empty_line_between_seqs, phylip_invalid_too_few_seqs,
        # phylip_invalid_too_many_seqs - because sniffer only reads first seq
        self.negatives = [get_data_path(e) for e in [
            'empty',
            'whitespace_only',
            'phylip_invalid_empty_line_after_header',
            'phylip_invalid_empty_line_before_header',
            'phylip_invalid_header_too_long',
            'phylip_invalid_header_too_short',
            'phylip_invalid_no_header',
            'phylip_invalid_seq_too_long',
            'phylip_invalid_seq_too_short',
            'phylip_invalid_zero_seq_len',
            'phylip_invalid_zero_seqs',
        ]]
Esempio n. 10
0
    def setUp(self):
        super(MantelTests, self).setUp()

        self.methods = ('pearson', 'spearman')
        self.alternatives = ('two-sided', 'greater', 'less')

        # No variation in distances. Taken from Figure 10.20(b), pg. 603 in L&L
        # 3rd edition. Their example is 4x4 but using 3x3 here for easy
        # comparison to the minimal dataset above.
        self.no_variation = [[0, 0.667, 0.667],
                             [0.667, 0, 0.667],
                             [0.667, 0.667, 0]]

        # This second dataset is derived from vegan::mantel's example dataset.
        # The "veg" distance matrix contains Bray-Curtis distances derived from
        # the varespec data (named "veg.dist" in the example). The "env"
        # distance matrix contains Euclidean distances derived from scaled
        # varechem data (named "env.dist" in the example).
        self.veg_dm_vegan = np.loadtxt(
            get_data_path('mantel_veg_dm_vegan.txt'))
        self.env_dm_vegan = np.loadtxt(
            get_data_path('mantel_env_dm_vegan.txt'))

        # Expected test statistic when comparing x and y with method='pearson'.
        self.exp_x_vs_y = 0.7559289

        # Expected test statistic when comparing x and z with method='pearson'.
        self.exp_x_vs_z = -0.9897433
Esempio n. 11
0
    def test_any_sequences_to_fasta(self):
        for fn, obj in ((_sequence_collection_to_fasta, self.seq_coll),
                        (_alignment_to_fasta, self.align)):
            # test writing with default parameters
            fh = StringIO()
            fn(obj, fh)
            obs = fh.getvalue()
            fh.close()

            with open(get_data_path('fasta_3_seqs_defaults'), 'U') as fh:
                exp = fh.read()

            self.assertEqual(obs, exp)

            # test writing with non-defaults
            fasta_fh = StringIO()
            qual_fh = StringIO()
            fn(obj, fasta_fh, id_whitespace_replacement='*',
               description_newline_replacement='+', max_width=3, qual=qual_fh)
            obs_fasta = fasta_fh.getvalue()
            obs_qual = qual_fh.getvalue()
            fasta_fh.close()
            qual_fh.close()

            with open(get_data_path('fasta_3_seqs_non_defaults'), 'U') as fh:
                exp_fasta = fh.read()
            with open(get_data_path('qual_3_seqs_non_defaults'), 'U') as fh:
                exp_qual = fh.read()

            self.assertEqual(obs_fasta, exp_fasta)
            self.assertEqual(obs_qual, exp_qual)
Esempio n. 12
0
    def setUp(self):
        self.jbe_con = get_data_path('test_contacts/1jbeA.psicov')
        self.jbe_pdb = get_data_path('test_contacts/1jbeA_clean.pdb')

        self.qjp_con = get_data_path('test_contacts/1qjpA.psicov')
        self.qjp_pdb = get_data_path('test_contacts/1qjpA_clean.pdb')

        self.real_n_con_qjp = {'lr': 6441,
                               'sr': 2337,
                               'all': 8778}

        self.real_n_con_jbe = {'lr': 4742,
                               'sr': 2025,
                               'all': 6767}

        self.positive_params = [
            {'-t': 'all', '-l': 1},
            {'-t': 'all', '-l': 2},
            {'-t': 'all', '-l': 5},
            {'-t': 'all', '-l': 10},
            {'-t': 'lr', '-l': 1},
            {'-t': 'lr', '-l': 2},
            {'-t': 'lr', '-l': 10},
            {'-t': 'sr', '-l': 2},
            {'-t': 'sr', '-l': 10}]
Esempio n. 13
0
    def test_any_sequence_to_fasta(self):
        # store writer function, sequence object to write, expected
        # fasta filepath for default parameters, expected fasta filepath for
        # non-defaults, and expected qual filepath for non-defaults
        id_ = 'f o o'
        desc = 'b\na\nr'
        test_data = (
            (_biological_sequence_to_fasta,
             Sequence('ACGT', id=id_, description=desc,
                      quality=range(1, 5)),
             ('fasta_single_bio_seq_defaults',
              'fasta_single_bio_seq_non_defaults',
              'qual_single_bio_seq_non_defaults')),
            (_dna_sequence_to_fasta,
             DNA('TACG', id=id_, description=desc, quality=range(4)),
             ('fasta_single_dna_seq_defaults',
              'fasta_single_dna_seq_non_defaults',
              'qual_single_dna_seq_non_defaults')),
            (_rna_sequence_to_fasta,
             RNA('UACG', id=id_, description=desc, quality=range(2, 6)),
             ('fasta_single_rna_seq_defaults',
              'fasta_single_rna_seq_non_defaults',
              'qual_single_rna_seq_non_defaults')),
            (_protein_sequence_to_fasta,
             Protein('PQQ', id=id_, description=desc, quality=[42, 41, 40]),
             ('fasta_single_prot_seq_defaults',
              'fasta_single_prot_seq_non_defaults',
              'qual_single_prot_seq_non_defaults')))

        for fn, obj, fps in test_data:
            defaults_fp, non_defaults_fasta_fp, non_defaults_qual_fp = fps

            # test writing with default parameters
            fh = StringIO()
            fn(obj, fh)
            obs = fh.getvalue()
            fh.close()

            with open(get_data_path(defaults_fp), 'U') as fh:
                exp = fh.read()

            self.assertEqual(obs, exp)

            # test writing with non-defaults
            fasta_fh = StringIO()
            qual_fh = StringIO()
            fn(obj, fasta_fh, id_whitespace_replacement='-',
               description_newline_replacement='_', max_width=1, qual=qual_fh)
            obs_fasta = fasta_fh.getvalue()
            obs_qual = qual_fh.getvalue()
            fasta_fh.close()
            qual_fh.close()

            with open(get_data_path(non_defaults_fasta_fp), 'U') as fh:
                exp_fasta = fh.read()
            with open(get_data_path(non_defaults_qual_fp), 'U') as fh:
                exp_qual = fh.read()

            self.assertEqual(obs_fasta, exp_fasta)
            self.assertEqual(obs_qual, exp_qual)
    def test_simple(self):
        eigvals = [0.51236726, 0.30071909, 0.26791207, 0.20898868,
                   0.19169895, 0.16054235,  0.15017696,  0.12245775,
                   0.0]
        proportion_explained = [0.2675738328, 0.157044696, 0.1399118638,
                                0.1091402725, 0.1001110485,
                                0.0838401162, 0.0784269939,
                                0.0639511764, 0.0]
        sample_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354',
                      'PC.593', 'PC.355', 'PC.607', 'PC.634']
        axis_labels = ['PC%d' % i for i in range(1, 10)]

        expected_results = OrdinationResults(
            short_method_name='PCoA',
            long_method_name='Principal Coordinate Analysis',
            eigvals=pd.Series(eigvals, index=axis_labels),
            samples=pd.DataFrame(
                np.loadtxt(get_data_path('exp_PCoAEigenResults_site')),
                index=sample_ids, columns=axis_labels),
            proportion_explained=pd.Series(proportion_explained,
                                           index=axis_labels))

        dm = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        results = pcoa(dm)

        assert_ordination_results_equal(results, expected_results,
                                        ignore_directionality=True)
Esempio n. 15
0
    def setUp(self):
        self.positives = [get_data_path(e) for e in [
            'fastq_multi_seq_sanger',
            'fastq_single_seq_illumina1.3',
            'fastq_wrapping_as_illumina_no_description',
            'fastq_wrapping_as_sanger_no_description',
            'fastq_wrapping_original_sanger_no_description',
            'fastq_writer_illumina1.3_defaults',
            'fastq_writer_sanger_defaults',
            'fastq_writer_sanger_non_defaults',
            'illumina_full_range_as_illumina.fastq',
            'illumina_full_range_as_sanger.fastq',
            'illumina_full_range_original_illumina.fastq',
            'longreads_as_illumina.fastq',
            'longreads_as_sanger.fastq',
            'longreads_original_sanger.fastq',
            'misc_dna_as_illumina.fastq',
            'misc_dna_as_sanger.fastq',
            'misc_dna_original_sanger.fastq',
            'misc_rna_as_illumina.fastq',
            'misc_rna_as_sanger.fastq',
            'misc_rna_original_sanger.fastq',
            'sanger_full_range_as_illumina.fastq',
            'sanger_full_range_as_sanger.fastq',
            'sanger_full_range_original_sanger.fastq',
            'solexa_full_range_original_solexa.fastq',
            'wrapping_as_illumina.fastq',
            'wrapping_as_sanger.fastq',
            'wrapping_original_sanger.fastq'
        ]]

        self.negatives = [get_data_path(e) for e in [
            'empty',
            'whitespace_only',
            'fastq_invalid_missing_header',
            'fastq_invalid_missing_seq_data',
            'error_diff_ids.fastq',
            'error_double_qual.fastq',
            'error_double_seq.fastq',
            'error_long_qual.fastq',
            'error_no_qual.fastq',
            'error_qual_del.fastq',
            'error_qual_escape.fastq',
            'error_qual_null.fastq',
            'error_qual_space.fastq',
            'error_qual_tab.fastq',
            'error_qual_unit_sep.fastq',
            'error_qual_vtab.fastq',
            'error_short_qual.fastq',
            'error_spaces.fastq',
            'error_tabs.fastq',
            'error_trunc_at_seq.fastq',
            'error_trunc_at_plus.fastq',
            'error_trunc_at_qual.fastq',
            'error_trunc_in_title.fastq',
            'error_trunc_in_seq.fastq',
            'error_trunc_in_plus.fastq',
            'error_trunc_in_qual.fastq',
        ]]
Esempio n. 16
0
 def setUp(self):
     self.cfg_fps = list()
     self.misc_fp = get_data_path("misc.cfg")
     self.misc_fp_local = get_data_path("misc_local.cfg")
     self.param_fp = get_data_path("param.cfg")
     self.param_fp_local = get_data_path("param_local.cfg")
     self.patcher = mock.patch("click.get_app_dir", return_value=dirname(self.misc_fp))
     self.patcher.start()
Esempio n. 17
0
    def test_filepaths_as_input(self):
        dms = [
            get_data_path('dm.txt'),
            get_data_path('dm2.txt'),
        ]
        np.random.seed(0)

        obs = pwmantel(dms)
        assert_data_frame_almost_equal(obs, self.exp_results_dm_dm2)
Esempio n. 18
0
 def test_phylogenetic_basis_large1(self):
     fname = get_data_path('large_tree.nwk',
                           subfolder='data/phylogeny')
     t = TreeNode.read(fname)
     exp_basis = np.loadtxt(
         get_data_path('large_tree_basis.txt',
                       subfolder='data/phylogeny'))
     res_basis, res_keys = phylogenetic_basis(t)
     npt.assert_allclose(exp_basis, res_basis)
Esempio n. 19
0
 def test_wrong_amount_of_columns_error(self):
     fp = get_data_path("blast7_invalid_too_many_columns")
     with assertRaisesRegex(self, BLAST7FormatError,
                            "Number of fields.*\(2\)"):
         _blast7_to_data_frame(fp)
     fp = get_data_path("legacy9_invalid_too_many_columns")
     with assertRaisesRegex(self, BLAST7FormatError,
                            "Number of fields.*\(12\)"):
         _blast7_to_data_frame(fp)
 def setUp(self):
     self.file_hhsearch1 = get_data_path(
         'test_split_search/GRAMNEG_T1D_5168.out')
     self.file_fasta1 = get_data_path(
         'test_split_search/GRAMNEG_T1D_5168.fasta')
     self.file_hhsearch2 = get_data_path(
         'test_split_search/GRAMNEG_T1D_3144_1-275.out')
     self.file_fasta2 = get_data_path(
         'test_split_search/GRAMNEG_T1D_3144_1-275.fasta')
Esempio n. 21
0
    def test_scaling2(self):
        scores = self.ordination.scores(2)

        # Load data as computed with vegan 2.0-8
        vegan_species = np.loadtxt(get_data_path("example2_species_scaling2_from_vegan"))
        npt.assert_almost_equal(scores.species, vegan_species, decimal=6)

        vegan_site = np.loadtxt(get_data_path("example2_site_scaling2_from_vegan"))
        npt.assert_almost_equal(scores.site, vegan_site, decimal=6)
Esempio n. 22
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998."""
     Y = np.loadtxt(get_data_path('example2_Y'))
     X = np.loadtxt(get_data_path('example2_X'))
     self.ordination = RDA(Y, X,
                           ['Site0', 'Site1', 'Site2', 'Site3', 'Site4',
                            'Site5', 'Site6', 'Site7', 'Site8', 'Site9'],
                           ['Species0', 'Species1', 'Species2', 'Species3',
                            'Species4', 'Species5'])
Esempio n. 23
0
    def setUp(self):
        self.positives = [get_data_path(e) for e in [
            'stockholm_extensive',
            'stockholm_minimal',
            'stockholm_rna',
            'stockholm_runon_gf_with_whitespace',
            'stockholm_runon_gf_no_whitespace',
            'stockholm_duplicate_sequence_names',
            'stockholm_duplicate_gr',
            'stockholm_duplicate_gc',
            'stockholm_invalid_nonexistent_gr',
            'stockholm_invalid_nonexistent_gs',
            'stockholm_no_data',
            'stockholm_blank_lines',
            'stockholm_differing_gc_data_length',
            'stockholm_differing_gr_data_length',
            'stockholm_differing_seq_lengths',
            'stockholm_duplicate_sequence_names',
            'stockholm_duplicate_tree_ids',
            'stockholm_extensive_mixed',
            'stockholm_invalid_data_type',
            'stockholm_malformed_gf_line',
            'stockholm_malformed_gs_line',
            'stockholm_malformed_gr_line',
            'stockholm_malformed_gc_line',
            'stockholm_malformed_data_line',
            'stockholm_metadata_only',
            'stockholm_multiple_msa',
            'stockholm_multiple_trees',
            'stockholm_runon_gs_with_whitespace',
            'stockholm_runon_gs_no_whitespace',
            'stockholm_single_tree_with_id',
            'stockholm_single_tree_without_id',
            'stockholm_whitespace_only_lines',
            'stockholm_all_data_types',
            'stockholm_two_of_each_metadata',
            'stockholm_data_only',
            'stockholm_nonstring_labels',
            'stockholm_missing_reference_items',
            'stockholm_multiple_references',
            'stockholm_runon_references',
            'stockholm_runon_references_mixed',
            'stockholm_single_reference',
            'stockholm_missing_reference_items',
            'stockholm_missing_rn_tag',
            'stockholm_different_padding',
            'stockholm_multi_line_tree_no_id',
            'stockholm_multi_line_tree_with_id',
            'stockholm_multiple_multi_line_trees'
            ]]

        self.negatives = [get_data_path(e) for e in [
            'stockholm_missing_header',
            'empty',
            'whitespace_only'
            ]]
Esempio n. 24
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998."""
     Y = np.loadtxt(get_data_path("example2_Y"))
     X = np.loadtxt(get_data_path("example2_X"))
     self.ordination = RDA(
         Y,
         X,
         ["Site0", "Site1", "Site2", "Site3", "Site4", "Site5", "Site6", "Site7", "Site8", "Site9"],
         ["Species0", "Species1", "Species2", "Species3", "Species4", "Species5"],
     )
Esempio n. 25
0
 def test_stockholm_runon_gs(self):
     fp = get_data_path('stockholm_runon_gs_no_whitespace')
     msa = _stockholm_to_tabular_msa(fp, constructor=DNA)
     exp = TabularMSA([DNA('ATCGTTCAGTG',
                           metadata={'LN': 'This is a runon GS line.'})],
                      index=['seq1'])
     self.assertEqual(msa, exp)
     fp = get_data_path('stockholm_runon_gs_with_whitespace')
     msa = _stockholm_to_tabular_msa(fp, constructor=DNA)
     self.assertEqual(msa, exp)
Esempio n. 26
0
    def test_from_file_error(self):
        for test_path in self.fferror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(FileFormatError):
                    OrdinationResults.from_file(f)

        for test_path in self.verror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(ValueError):
                    OrdinationResults.from_file(f)
Esempio n. 27
0
 def test_balance_basis_large1(self):
     fname = get_data_path('large_tree.nwk',
                           subfolder='data')
     t = TreeNode.read(fname)
     # note that the basis is in reverse level order
     exp_basis = np.loadtxt(
         get_data_path('large_tree_basis.txt',
                       subfolder='data'))
     res_basis, res_keys = balance_basis(t)
     npt.assert_allclose(exp_basis[:, ::-1], res_basis)
Esempio n. 28
0
    def test_create_config_overwrite(self):
        cfg_obs = _create_config()
        cfg_obs.read(get_data_path('default.cfg'))
        # overwrite "db_path"
        cfg_obs.read(get_data_path('param.cfg'))

        cfg_exp = ConfigParser()
        cfg_exp['DEFAULT']['db_path'] = 'db'
        cfg_exp.add_section('prodigal')
        cfg_exp['prodigal']['-t'] = '1'
        self.assertEqual(cfg_obs, cfg_exp)
    def test_scaling1(self):

        scores = rda(self.Y, self.X, scaling=1)

        sample_constraints = pd.DataFrame(np.loadtxt(
            get_data_path('example2_sample_constraints_scaling1')))

        # Load data as computed with vegan 2.0-8
        vegan_features = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_species_scaling1_from_vegan')),
            index=self.feature_ids,
            columns=self.pc_ids)

        vegan_samples = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_site_scaling1_from_vegan')),
            index=self.sample_ids,
            columns=self.pc_ids)

        sample_constraints = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_sample_constraints_scaling1')),
            index=self.sample_ids,
            columns=self.pc_ids)
        mat = np.loadtxt(get_data_path(
            'example2_biplot_scaling1'))
        cropped_pc_ids = self.pc_ids[:mat.shape[1]]
        biplot_scores = pd.DataFrame(mat,
                                     index=self.env_ids,
                                     columns=cropped_pc_ids)

        proportion_explained = pd.Series([0.44275783, 0.25614586,
                                          0.15280354, 0.10497021,
                                          0.02873375, 0.00987052,
                                          0.00471828],
                                         index=self.pc_ids)

        eigvals = pd.Series([25.897954, 14.982578, 8.937841, 6.139956,
                             1.680705, 0.577350, 0.275984],
                            index=self.pc_ids)

        exp = OrdinationResults(
            'RDA', 'Redundancy Analysis',
            samples=vegan_samples,
            features=vegan_features,
            sample_constraints=sample_constraints,
            biplot_scores=biplot_scores,
            proportion_explained=proportion_explained,
            eigvals=eigvals)

        assert_ordination_results_equal(scores, exp,
                                        ignore_directionality=True,
                                        decimal=6)
Esempio n. 30
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998
     (p. 590). Loaded results as computed with vegan 2.0-8 and
     compared with table 11.5 if also there."""
     Y = np.loadtxt(get_data_path('example3_Y'))
     X = np.loadtxt(get_data_path('example3_X'))
     self.ordination = CCA(Y, X[:, :-1],
                           ['Site0', 'Site1', 'Site2', 'Site3', 'Site4',
                            'Site5', 'Site6', 'Site7', 'Site8', 'Site9'],
                           ['Species0', 'Species1', 'Species2', 'Species3',
                            'Species4', 'Species5', 'Species6', 'Species7',
                            'Species8'])
Esempio n. 31
0
 def test_stockholm_mixed_runon_references(self):
     fp = get_data_path('stockholm_runon_references_mixed')
     msa = _stockholm_to_tabular_msa(fp, constructor=DNA)
     exp = TabularMSA(
         [],
         metadata={
             'RN': [
                 OrderedDict([('RC', 'A Runon Comment'),
                              ('RM', '123456789'), ('RT', 'A Runon Title'),
                              ('RA', 'The Author'), ('RL', 'A Location')])
             ]
         })
     self.assertEqual(msa, exp)
Esempio n. 32
0
    def setUp(self):
        self.jbe_con = get_data_path('1jbeA.psicov')
        self.jbe_pdb = get_data_path('1jbeA_clean.pdb')

        self.qjp_con = get_data_path('1qjpA.psicov')
        self.qjp_pdb = get_data_path('1qjpA_clean.pdb')

        self.real_n_con_qjp = {'lr': 6441, 'sr': 2337, 'all': 8778}

        self.real_n_con_jbe = {'lr': 4742, 'sr': 2025, 'all': 6767}

        self.positive_params = [{
            '-t': 'all',
            '-l': 1
        }, {
            '-t': 'all',
            '-l': 2
        }, {
            '-t': 'all',
            '-l': 5
        }, {
            '-t': 'all',
            '-l': 10
        }, {
            '-t': 'lr',
            '-l': 1
        }, {
            '-t': 'lr',
            '-l': 2
        }, {
            '-t': 'lr',
            '-l': 10
        }, {
            '-t': 'sr',
            '-l': 2
        }, {
            '-t': 'sr',
            '-l': 10
        }]
    def test_from_seralized_results(self):
        # the current implementation of ordination results loses some
        # information, test that pcoa_biplot works fine regardless
        results = OrdinationResults.read(get_data_path('PCoA_skbio'))

        serialized = pcoa_biplot(results, self.descriptors)
        in_memory = pcoa_biplot(self.ordination, self.descriptors)

        assert_ordination_results_equal(serialized,
                                        in_memory,
                                        ignore_directionality=True,
                                        ignore_axis_labels=True,
                                        ignore_method_names=True)
Esempio n. 34
0
    def setUp(self):
        self.positives = [get_data_path(e) for e in [
            'blast7_default_single_line',
            'blast7_default_multi_line',
            'blast7_custom_minimal',
            'blast7_custom_single_line',
            'blast7_custom_multi_line',
            'blast7_custom_mixed_nans',
            'blast7_invalid_differing_fields',
            'blast7_invalid_no_data',
            'blast7_invalid_too_many_columns',
            'legacy9_and_blast7_default',
            'legacy9_invalid_too_many_columns',
            'legacy9_mixed_nans',
            'legacy9_multi_line',
            'legacy9_single_line']]

        self.negatives = [get_data_path(e) for e in [
            'blast7_invalid_gibberish',
            'blast7_invalid_for_sniffer',
            'blast7_invalid_for_sniffer_2',
            'empty']]
Esempio n. 35
0
 def test_custom_valid_multi_line(self):
     fp = get_data_path("blast7_custom_multi_line")
     df = _blast7_to_data_frame(fp)
     exp = pd.DataFrame([[1.0, 8.0, 3.0, 10.0, 8.0, 0.0, 1.0, 'query1',
                          'subject2'],
                         [2.0, 5.0, 2.0, 15.0, 8.0, 0.0, 2.0, 'query1',
                          'subject2'],
                         [1.0, 6.0, 2.0, 12.0, 8.0, 0.0, 1.0, 'query1',
                          'subject2']],
                        columns=['qstart', 'qend', 'sstart', 'send',
                                 'nident', 'mismatch', 'sframe',
                                 'qaccver', 'saccver'])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 36
0
    def test__qiime2_rclr(self):
        """Tests q2-rclr matches standalone rclr."""

        # make mock table to write
        samps_ids = ['s%i' % i for i in range(self.cdata.shape[0])]
        feats_ids = ['f%i' % i for i in range(self.cdata.shape[1])]
        table_test = Table(self.cdata.T, feats_ids, samps_ids)
        # write table
        in_ = get_data_path('test.biom', subfolder='data')
        out_path = os_path_sep.join(in_.split(os_path_sep)[:-1])
        test_path = os.path.join(out_path, 'rclr-test.biom')
        with biom_open(test_path, 'w') as wf:
            table_test.to_hdf5(wf, "test")
        # run standalone
        runner = CliRunner()
        result = runner.invoke(sdc.commands['rclr'],
                               ['--in-biom', test_path,
                                '--output-dir', out_path])
        out_table = get_data_path('rclr-table.biom',
                                  subfolder='data')
        res_table = load_table(out_table)
        standalone_mat = res_table.matrix_data.toarray().T
        # check that exit code was 0 (indicating success)
        try:
            self.assertEqual(0, result.exit_code)
        except AssertionError:
            ex = result.exception
            error = Exception('Command failed with non-zero exit code')
            raise error.with_traceback(ex.__traceback__)
        # run QIIME2
        q2_table_test = Artifact.import_data("FeatureTable[Frequency]",
                                             table_test)
        q2_res = rclr_transformation(q2_table_test).rclr_table.view(Table)
        q2_res_mat = q2_res.matrix_data.toarray().T
        # check same and check both correct
        npt.assert_allclose(standalone_mat, q2_res_mat)
        npt.assert_allclose(standalone_mat, self.true)
        npt.assert_allclose(q2_res_mat, self.true)
Esempio n. 37
0
 def test_stockholm_to_msa_different_padding(self):
     fp = get_data_path('stockholm_different_padding')
     msa = _stockholm_to_tabular_msa(fp, constructor=DNA)
     exp = TabularMSA(
         [],
         metadata={
             'RN': [
                 OrderedDict([('RC', 'A Runon Comment Without '
                               'Whitespace')]),
                 OrderedDict([('RC', 'A Runon Comment With '
                               'Whitespace')])
             ]
         })
     self.assertEqual(msa, exp)
Esempio n. 38
0
    def test_proportional_artifact(self):
        from qiime2.plugins.gneiss.methods import correlation_clustering
        table_f = get_data_path("feature-table.qza")
        in_table = qiime2.Artifact.load(table_f)

        res = correlation_clustering(in_table, pseudocount=0.1)
        res_clust = res.clustering._view(TreeNode)
        exp_str = ('((F4:0.228723591874,(F5:0.074748541601,'
                   '(F1:0.00010428164962,F2:0.00010428164962)'
                   'y4:0.0746442599513)y3:0.153975050273)'
                   'y1:0.70266138894,(F3:0.266841737789,F6:0.266841737789)'
                   'y2:0.664543243026)y0;\n')
        exp_tree = TreeNode.read([exp_str])
        self.assert_tree_almost_equals(exp_tree, res_clust)
Esempio n. 39
0
    def test_assign_ids_intersect(self):
        from qiime2.plugins.gneiss.methods import assign_ids
        tree_f = get_data_path("tree_extra.qza")
        table_f = get_data_path("polytomy_table.qza")
        tree = qiime2.Artifact.load(tree_f)
        table = qiime2.Artifact.load(table_f)
        output = assign_ids(input_tree=tree, input_table=table)
        res_tree = output.output_tree._view(TreeNode)
        res_table = output.output_table._view(pd.DataFrame)
        for n in res_tree.levelorder(include_self=True):
            self.assertTrue(n.name is not None)
        exp = list('abde')
        res = [n.name for n in res_tree.tips()]
        self.assertEqual(exp, res)

        exp = pd.DataFrame(
            {
                's1': [1.0, 2.0, 4.0, 5.0],
                's2': [1.0, 5.0, 6.0, 0.0]
            },
            index=['a', 'b', 'd', 'e']).T

        pdt.assert_frame_equal(exp, res_table)
Esempio n. 40
0
 def test_valid_nan_handling(self):
     fp = get_data_path('blast6_custom_mixed_nans')
     df = _blast6_to_data_frame(fp,
                                columns=[
                                    'qacc', 'qseq', 'btop', 'sframe',
                                    'ppos', 'positive', 'gaps'
                                ])
     exp = pd.DataFrame(
         [[np.nan, 'PAAWWWWW', 8.0, 1.0, 100.00, np.nan, 0.0],
          ['query1', np.nan, 8.0, 1.0, np.nan, 8.0, 0.0]],
         columns=[
             'qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps'
         ])
     assert_data_frame_almost_equal(df, exp)
Esempio n. 41
0
    def test_getoptS_small(self):
        """Test singular values from U and V."""
        data = loadmat(get_data_path('small_test.mat'))

        M_E = np.array(data['M_E'].todense())
        E = data['E']

        x = data['x']
        y = data['y']
        res = singular_values(x, y, M_E, E)
        exp = np.array([[0.93639499, 0.07644197, -0.02828782],
                        [-0.03960841, 0.60787383, 0.00521257],
                        [0.00729038, 0.00785834, 0.67853083]])
        npt.assert_allclose(res, exp, atol=1e-5)
Esempio n. 42
0
    def test_getoptS_small(self):
        # warning : this test must ALWAYS pass
        data = loadmat(get_data_path('small_test.mat'))

        M_E = np.array(data['M_E'].todense())
        E = data['E']

        x = data['x']
        y = data['y']
        res = getoptS(x, y, M_E, E)
        exp = np.array([[0.93639499, 0.07644197, -0.02828782],
                        [-0.03960841, 0.60787383, 0.00521257],
                        [0.00729038, 0.00785834, 0.67853083]])
        npt.assert_allclose(res, exp, atol=1e-5)
Esempio n. 43
0
 def setUp(self):
     self.valid_files = [
         ([('f o  o', 'bar\n\nbaz', 'AACCGG', [16, 17, 18, 19, 20, 21]),
           ('bar', 'baz foo', 'TTGGCC', [23, 22, 21, 20, 19, 18]),
           ('ba\n\t\tz', 'foo bar', 'GATTTC', [20, 21, 22, 23, 24, 18])],
          [({
              'variant': 'sanger'
          }, get_data_path('fastq_writer_sanger_defaults')),
           ({
               'phred_offset': 33
           }, get_data_path('fastq_writer_sanger_defaults')),
           ({
               'variant': 'illumina1.8'
           }, get_data_path('fastq_writer_sanger_defaults')),
           ({
               'variant': 'illumina1.3'
           }, get_data_path('fastq_writer_illumina1.3_defaults')),
           ({
               'variant': 'sanger',
               'id_whitespace_replacement': '%',
               'description_newline_replacement': '^'
           }, get_data_path('fastq_writer_sanger_non_defaults'))]),
     ]
Esempio n. 44
0
    def test_no_collapsed_nodes(self):
        st = TreeNode.read(get_data_path(self.newick))
        tr, ts = diamondtree(st,
                             breadth_scaling=6,
                             depth_scaling=30,
                             cladecolors={
                                 'y5': '#FF0000',
                                 'y18': '#0000FF'
                             },
                             bgcolors={'y29': '#00FF00'})

        tr.render(file_name=self.fname, tree_style=ts)
        self.assertTrue(os.path.exists(self.fname))
        self.assertTrue(os.path.getsize(self.fname) > 0)
 def test_standalone_rclr(self):
     """Test the standalone rlcr."""
     # make mock table to write
     samps_ids = ['s%i' % i for i in range(self.cdata.shape[0])]
     feats_ids = ['f%i' % i for i in range(self.cdata.shape[1])]
     table_test = Table(self.cdata.T, feats_ids, samps_ids)
     # write table
     in_ = get_data_path('test.biom', subfolder='rpca_data')
     out_path = os_path_sep.join(in_.split(os_path_sep)[:-1])
     test_path = os.path.join(out_path, 'rclr-test.biom')
     with biom_open(test_path, 'w') as wf:
         table_test.to_hdf5(wf, "test")
     runner = CliRunner()
     result = runner.invoke(sdc.commands['rclr'],
                            ['--in-biom', test_path,
                             '--output-dir', out_path])
     out_table = get_data_path('rclr-table.biom',
                               subfolder='rpca_data')
     res_table = load_table(out_table)
     test_cmat = res_table.matrix_data.toarray().T
     npt.assert_allclose(test_cmat, self.true)
     # Lastly, check that exit code was 0 (indicating success)
     CliTestCase().assertExitCode(0, result)
Esempio n. 46
0
    def test_fastq_to_generator_invalid_files_illumina(self):
        # files that should be invalid for illumina1.3 and illumina1.8 variants
        fps = [
            get_data_path(fp) for fp in [
                'sanger_full_range_original_sanger.fastq',
                'solexa_full_range_original_solexa.fastq'
            ]
        ]

        for fp in fps:
            with self.assertRaisesRegexp(ValueError, 'out of range \[0, 62\]'):
                list(_fastq_to_generator(fp, variant='illumina1.3'))
            with self.assertRaisesRegexp(ValueError, 'out of range \[0, 62\]'):
                list(_fastq_to_generator(fp, variant='illumina1.8'))
Esempio n. 47
0
 def setUp(self):
     """Data from table 11.3 in Legendre & Legendre 1998
     (p. 590). Loaded results as computed with vegan 2.0-8 and
     compared with table 11.5 if also there."""
     self.feature_ids = [
         'Feature0', 'Feature1', 'Feature2', 'Feature3', 'Feature4',
         'Feature5', 'Feature6', 'Feature7', 'Feature8'
     ]
     self.sample_ids = [
         'Sample0', 'Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5',
         'Sample6', 'Sample7', 'Sample8', 'Sample9'
     ]
     self.env_ids = ['Constraint0', 'Constraint1', 'Constraint2']
     self.pc_ids = [
         'CCA1', 'CCA2', 'CCA3', 'CCA4', 'CCA5', 'CCA6', 'CCA7', 'CCA8',
         'CCA9'
     ]
     self.Y = pd.DataFrame(np.loadtxt(get_data_path('example3_Y')),
                           columns=self.feature_ids,
                           index=self.sample_ids)
     self.X = pd.DataFrame(np.loadtxt(get_data_path('example3_X'))[:, :-1],
                           columns=self.env_ids,
                           index=self.sample_ids)
Esempio n. 48
0
    def test_standalone_rpca(self):
        """Checks the output produced by gemelli's RPCA standalone script.

           This is more of an "integration test" than a unit test -- the
           details of the algorithm used by the standalone RPCA script are
           checked in more detail in gemelli/tests/test_optspace.py, etc.
        """
        in_ = get_data_path('test.biom', subfolder='rpca_data')
        out_ = os_path_sep.join(in_.split(os_path_sep)[:-1])
        runner = CliRunner()
        result = runner.invoke(sdc.commands['rpca'],
                               ['--in-biom', in_, '--output-dir', out_])
        # Read the results
        dist_res = pd.read_csv(get_data_path('distance-matrix.tsv',
                                             subfolder='rpca_data'),
                               sep='\t',
                               index_col=0)
        ord_res = OrdinationResults.read(
            get_data_path('ordination.txt', subfolder='rpca_data'))

        # Read the expected results
        dist_exp = pd.read_csv(get_data_path('expected-distance-matrix.tsv',
                                             subfolder='rpca_data'),
                               sep='\t',
                               index_col=0)
        ord_exp = OrdinationResults.read(
            get_data_path('expected-ordination.txt', subfolder='rpca_data'))

        # Check that the distance matrix matches our expectations
        assert_array_almost_equal(dist_res.values, dist_exp.values)

        # Check that the ordination results match our expectations -- checking
        # each value for both features and samples
        assert_ordinationresults_equal(ord_res, ord_exp)

        # Lastly, check that gemelli's exit code was 0 (indicating success)
        CliTestCase().assertExitCode(0, result)
Esempio n. 49
0
    def test_scaling1(self):

        scores = rda(self.Y, self.X, scaling=1)

        biplot_scores = pd.DataFrame(np.loadtxt(
            get_data_path('example2_biplot_scaling1')))

        sample_constraints = pd.DataFrame(np.loadtxt(
            get_data_path('example2_sample_constraints_scaling1')))

        # Load data as computed with vegan 2.0-8
        vegan_features = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_species_scaling1_from_vegan')),
            index=self.feature_ids,
            columns=self.pc_ids)

        vegan_samples = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_site_scaling1_from_vegan')),
            index=self.sample_ids,
            columns=self.pc_ids)

        sample_constraints = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_sample_constraints_scaling1')),
            index=self.sample_ids,
            columns=self.pc_ids)

        biplot_scores = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_biplot_scaling1')))

        # These are wrong. See issue #1002
        proportion_explained = pd.Series([0.44275783, 0.25614586,
                                          0.15280354, 0.10497021,
                                          0.02873375, 0.00987052,
                                          0.00471828],
                                         index=self.pc_ids)
        # These are wrong. See issue #1002
        eigvals = pd.Series([25.897954, 14.982578, 8.937841, 6.139956,
                             1.680705, 0.577350, 0.275984],
                            index=self.pc_ids)

        exp = OrdinationResults(
            'RDA', 'Redundancy Analysis',
            samples=vegan_samples,
            features=vegan_features,
            sample_constraints=sample_constraints,
            biplot_scores=biplot_scores,
            proportion_explained=proportion_explained,
            eigvals=eigvals)

        assert_ordination_results_equal(scores, exp,
                                        ignore_directionality=True,
                                        ignore_biplot_scores_labels=True,
                                        decimal=6)
Esempio n. 50
0
    def test_standalone_rpca_rank_est(self):
        """Checks the standalone rank estimate
           is used instead of a explicit rank
           setting.
        """
        in_ = get_data_path('test.biom')
        out_ = os_path_sep.join(in_.split(os_path_sep)[:-1])
        runner = CliRunner()
        result = runner.invoke(sdc.commands['auto-rpca'],
                               ['--in-biom', in_,
                                '--output-dir', out_])
        # Read the results
        dist_res = pd.read_csv(get_data_path('distance-matrix.tsv'), sep='\t',
                               index_col=0)
        ord_res = OrdinationResults.read(get_data_path('ordination.txt'))

        # Read the expected results
        file_ = 'expected-est-distance-matrix.tsv'
        dist_exp = pd.read_csv(get_data_path(file_),
                               sep='\t', index_col=0)
        ord_exp = OrdinationResults.read(get_data_path(
                                         'expected-est-ordination.txt'))

        # Check that the distance matrix matches our expectations
        assert_array_almost_equal(dist_res.values, dist_exp.values)

        # Check that the ordination results match our expectations -- checking
        # each value for both features and samples
        assert_deicode_ordinationresults_equal(ord_res, ord_exp)

        # Lastly, check that DEICODE's exit code was 0 (indicating success)
        try:
            self.assertEqual(0, result.exit_code)
        except AssertionError:
            ex = result.exception
            error = Exception('Command failed with non-zero exit code')
            raise error.with_traceback(ex.__traceback__)
Esempio n. 51
0
 def test_msa_to_stockholm_data_only(self):
     fp = get_data_path('stockholm_data_only')
     msa = TabularMSA([
         RNA('ACUCCGACAUGCUCC'),
         RNA('UAGUGCCGAACGCUG'),
         RNA('GUGUGGGCGUGAUUC')
     ],
                      index=['seq1', 'seq2', 'seq3'])
     fh = io.StringIO()
     _tabular_msa_to_stockholm(msa, fh)
     obs = fh.getvalue()
     fh.close()
     with io.open(fp) as fh:
         exp = fh.read()
     self.assertEqual(obs, exp)
Esempio n. 52
0
    def test_lme_artifact(self):
        from qiime2.plugins.gneiss.visualizers import lme_regression

        table_f = get_data_path("lme_balances.qza")
        tree_f = get_data_path("lme_tree.qza")
        metadata_f = get_data_path("test_lme_metadata.txt")

        in_table = qiime2.Artifact.load(table_f)
        in_tree = qiime2.Artifact.load(tree_f)
        in_metadata = qiime2.Metadata(pd.read_table(metadata_f, index_col=0))

        viz = lme_regression(in_table, in_tree, in_metadata, 'ph',
                             'host_subject_id')
        os.mkdir('regression_summary_dir')
        viz.visualization.export_data('regression_summary_dir')

        res_coef = pd.read_csv(os.path.join('regression_summary_dir',
                                            'coefficients.csv'),
                               index_col=0)

        self.assertAlmostEqual(res_coef.loc['y0', 'groups RE'],
                               1.105630e+00,
                               places=5)
        shutil.rmtree('regression_summary_dir')
Esempio n. 53
0
 def test_msa_to_stockholm_multiple_trees(self):
     fp = get_data_path('stockholm_multiple_trees')
     msa = TabularMSA([],
                      metadata=OrderedDict([('NH',
                                             OrderedDict([('tree1', 'ABCD'),
                                                          ('tree2', 'EFGH'),
                                                          ('tree3', 'IJKL')
                                                          ]))]))
     fh = io.StringIO()
     _tabular_msa_to_stockholm(msa, fh)
     obs = fh.getvalue()
     fh.close()
     with io.open(fp) as fh:
         exp = fh.read()
     self.assertEqual(obs, exp)
Esempio n. 54
0
    def test_parse_easel_output(self):
        obs = parse_easel_output(self.fp_infernal)
        self.assertEqual('INFERNAL', obs['software'].iloc[0])
        self.assertEqual('1.1.2', obs['software version'].iloc[0])
        self.assertEqual('Markergenes/FSSC/allFSSC.cm',
                         obs['fp_query'].iloc[0])
        self.assertEqual('Sequences_Fusarium/taxid_99000016/FW16.genome.fna',
                         obs['fp_target'].iloc[0])

        with open(get_data_path('easel2sam/exp_parse_easle_output.txt')) as f:
            exp = ''.join(f.readlines())
        assert_frame_equal(_str2pd(exp), obs)

        obs = parse_easel_output(self.fp_nohit)
        self.assertEqual(obs.shape[0], 0)
Esempio n. 55
0
    def test_standalone_rpca_rank_est(self):
        """Checks the standalone RPCA rank estimate
           is used instead of a explicit rank
           setting.
        """
        in_ = get_data_path('test.biom', subfolder='rpca_data')
        out_ = os_path_sep.join(in_.split(os_path_sep)[:-1])
        runner = CliRunner()
        result = runner.invoke(sdc.commands['auto-rpca'],
                               ['--in-biom', in_, '--output-dir', out_])
        # Read the results
        dist_res = pd.read_csv(get_data_path('distance-matrix.tsv',
                                             subfolder='rpca_data'),
                               sep='\t',
                               index_col=0)
        ord_res = OrdinationResults.read(
            get_data_path('ordination.txt', subfolder='rpca_data'))

        # Read the expected results
        file_ = 'expected-est-distance-matrix.tsv'
        dist_exp = pd.read_csv(get_data_path(file_, subfolder='rpca_data'),
                               sep='\t',
                               index_col=0)
        ord_exp = OrdinationResults.read(
            get_data_path('expected-est-ordination.txt',
                          subfolder='rpca_data'))

        # Check that the distance matrix matches our expectations
        assert_array_almost_equal(dist_res.values, dist_exp.values)

        # Check that the ordination results match our expectations -- checking
        # each value for both features and samples
        assert_ordinationresults_equal(ord_res, ord_exp)

        # Lastly, check that gemelli's exit code was 0 (indicating success)
        CliTestCase().assertExitCode(0, result)
Esempio n. 56
0
 def test_stockholm_maintains_order(self):
     fp = get_data_path('stockholm_two_of_each_metadata')
     msa = _stockholm_to_tabular_msa(fp, constructor=DNA)
     msa_order = list(msa.metadata.items())
     exp_order = [('NM', 'Kestrel Gorlick'), ('DT', 'February 5th, 2016')]
     self.assertEqual(msa_order, exp_order)
     msa_order = list(msa[0].metadata.items())
     exp_order = [('AL', 'ABCD'), ('NS', '1234')]
     self.assertEqual(msa_order, exp_order)
     msa_order = list(msa.positional_metadata.columns)
     exp_order = ['SS_cons', 'AS_cons']
     self.assertEqual(msa_order, exp_order)
     msa_order = list(msa[0].positional_metadata.columns)
     exp_order = ['SS', 'AS']
     self.assertEqual(msa_order, exp_order)
    def test_pcoa_biplot_from_ape(self):
        """Test against a reference implementation from R's ape package

        The test data was generated with the R script below and using a
        modified version of pcoa.biplot that returns the U matrix.

        library(ape)
        # files can be found in the test data folder of the ordination module
        y = t(read.table('PCoA_biplot_descriptors', row.names = 1, header = 1))
        dm = read.table('PCoA_sample_data_3', row.names = 1, header = 1)

        h = pcoa(dm)

        # biplot.pcoa will only calculate the biplot for two axes at a time
        acc = NULL
        for (axes in c(1, 3, 5, 7)) {
            new = biplot.pcoa(h, y, plot.axes=c(axes, axes+1),
                              rn = rep('.', length(colnames(dm))) )

            if(is.null(acc)) {
                acc = new
            }
            else {
                b = acc
                acc <- cbind(acc, new)
            }
        }
        write.csv(acc, file='PCoA_biplot_projected_descriptors')
        """
        obs = pcoa_biplot(self.ordination, self.descriptors)

        # we'll build a dummy ordination results object based on the expected
        # the main thing we'll compare and modify is the features dataframe
        exp = deepcopy(obs)

        fp = get_data_path('PCoA_biplot_projected_descriptors')
        # R won't calculate the last dimension, so pad with zeros to make the
        # arrays comparable
        exp.features = pd.read_table(fp, sep=',', index_col=0)
        exp.features['Axis.9'] = np.zeros_like(exp.features['Axis.8'])

        # make the order comparable
        exp.features = exp.features.reindex(obs.features.index)

        assert_ordination_results_equal(obs,
                                        exp,
                                        ignore_directionality=True,
                                        ignore_axis_labels=True)
Esempio n. 58
0
    def test_parse(self):
        imd1 = IntervalMetadata(None)
        imd1.add(bounds=[(3588441, 3588818)],
                 metadata={
                     'ncRNA_class': 'RNaseP_bact_a',
                     'type': 'ncRNA',
                     'strand': '-',
                     'db_xref': 'RF00010',
                     'source': 'Rfam'
                 })
        imd1.add(bounds=[(3355449, 3355633)],
                 metadata={
                     'ncRNA_class': '5S_rRNA',
                     'type': 'rRNA',
                     'strand': '+',
                     'product': '5s_rRNA',
                     'db_xref': 'RF00001',
                     'source': 'Rfam'
                 })
        imd2 = IntervalMetadata(None)
        imd2.add(bounds=[(85215, 85384)],
                 metadata={
                     'ncRNA_class': 'LSU_rRNA_bacteria',
                     'type': 'rRNA',
                     'strand': '+',
                     'product': '23s_rRNA',
                     'db_xref': 'RF02541',
                     'source': 'Rfam'
                 })
        imd3 = IntervalMetadata(None)
        imd3.add(bounds=[(8739, 8777)],
                 metadata={
                     'ncRNA_class': 'SSU_rRNA_bacteria',
                     'type': 'rRNA',
                     'strand': '+',
                     'product': '16s_rRNA',
                     'db_xref': 'RF00177',
                     'source': 'Rfam'
                 })
        exp = (('NC_016822.1', imd1), ('NC_016833.1', imd2), ('NC_016834.1',
                                                              imd3))

        fp = get_data_path('cmscan.txt')
        gen = _generator(fp)

        for (exp_id, exp_imd), (obs_id, obs_imd) in zip(exp, gen):
            self.assertEqual(exp_id, obs_id)
            self.assertEqual(exp_imd, obs_imd)
Esempio n. 59
0
    def test_standalone_rpca(self):
        """Checks the output produced by gemelli's standalone script.

           This is more of an "integration test" than a unit test -- the
           details of the algorithm used by the standalone CTF script are
           checked in more detail in gemelli/tests/test_factorization.py.
        """
        in_table = get_data_path('test-small.biom')
        in_meta = get_data_path('test-small.tsv')
        out_ = os_path_sep.join(in_table.split(os_path_sep)[:-1])
        runner = CliRunner()
        result = runner.invoke(standalone_ctf,
                               ['--in-biom',
                                in_table,
                                '--sample-metadata-file',
                                in_meta,
                                '--individual-id-column',
                                'host_subject_id',
                                '--state-column-1',
                                'context',
                                '--output-dir',
                                out_])
        # check exit code was 0 (indicating success)
        CliTestCase().assertExitCode(0, result)
        # Read the results
        samp_res = pd.read_csv(
            get_data_path('context-subject-ordination.tsv'),
            sep='\t',
            index_col=0)
        feat_res = pd.read_csv(
            get_data_path('context-features-ordination.tsv'),
            sep='\t',
            index_col=0)
        # Read the expected results
        samp_exp = pd.read_csv(
            get_data_path('expected-context-subject-ordination.tsv'),
            sep='\t',
            index_col=0)
        feat_exp = pd.read_csv(
            get_data_path('expected-context-features-ordination.tsv'),
            sep='\t',
            index_col=0)
        # Check that the distance matrix matches our expectations
        comp_col = ['PC1', 'PC2', 'PC3']
        cent_ = samp_res[comp_col].mean().values.max()
        self.assertAlmostEqual(cent_, 0)
        cent_ = feat_res[comp_col].mean().values.max()
        self.assertAlmostEqual(cent_, 0)
        # check matched
        assert_allclose(absolute_sort(samp_res[comp_col].values),
                        absolute_sort(samp_exp[comp_col].values),
                        atol=.5)
        assert_allclose(absolute_sort(feat_res[comp_col].values),
                        absolute_sort(feat_exp[comp_col].values),
                        atol=.5)
Esempio n. 60
0
 def test_msa_to_stockholm_nonstring_values(self):
     fp = get_data_path('stockholm_nonstring_labels')
     msa = TabularMSA([DNA('ACTG', metadata=OrderedDict([(8, 123)]),
                           positional_metadata=OrderedDict([(1.0,
                                                             [1, 2, 3, 4])])
                           )],
                      metadata=OrderedDict([(1.3, 2857)]),
                      positional_metadata=OrderedDict([(25, [4, 3, 2, 1])]),
                      index=[11214])
     fh = io.StringIO()
     _tabular_msa_to_stockholm(msa, fh)
     obs = fh.getvalue()
     fh.close()
     with io.open(fp) as fh:
         exp = fh.read()
     self.assertEqual(obs, exp)