コード例 #1
0
    def test_standalone_rpca(self):
        """Checks the output produced by DEICODE's standalone script.

           This is more of an "integration test" than a unit test -- the
           details of the algorithm used by the standalone RPCA script are
           checked in more detail in deicode/tests/test_optspace.py, etc.
        """
        in_ = get_data_path('test.biom')
        out_ = os_path_sep.join(in_.split(os_path_sep)[:-1])
        runner = CliRunner()
        result = runner.invoke(sdc.commands['rpca'],
                               ['--in-biom', in_,
                                '--output-dir', out_])
        # Read the results
        dist_res = pd.read_csv(get_data_path('distance-matrix.tsv'), sep='\t',
                               index_col=0)
        ord_res = OrdinationResults.read(get_data_path('ordination.txt'))

        # Read the expected results
        dist_exp = pd.read_csv(get_data_path('expected-distance-matrix.tsv'),
                               sep='\t', index_col=0)
        ord_exp = OrdinationResults.read(get_data_path(
                                         'expected-ordination.txt'))

        # Check that the distance matrix matches our expectations
        assert_array_almost_equal(dist_res.values, dist_exp.values)

        # Check that the ordination results match our expectations -- checking
        # each value for both features and samples
        assert_deicode_ordinationresults_equal(ord_res, ord_exp)

        # Lastly, check that DEICODE's exit code was 0 (indicating success)
        self.assertEqual(result.exit_code, 0)
コード例 #2
0
    def test_from_seralized_results(self):
        # the current implementation of ordination results loses some
        # information, test that pcoa_biplot works fine regardless
        results = OrdinationResults.read(get_data_path('PCoA_skbio'))

        serialized = pcoa_biplot(results, self.descriptors)
        in_memory = pcoa_biplot(self.ordination, self.descriptors)

        assert_ordination_results_equal(serialized, in_memory,
                                        ignore_directionality=True,
                                        ignore_axis_labels=True,
                                        ignore_method_names=True)
コード例 #3
0
    def test_standalone_rpca_rank_est(self):
        """Checks the standalone rank estimate
           is used instead of a explicit rank
           setting.
        """
        in_ = get_data_path('test.biom')
        out_ = os_path_sep.join(in_.split(os_path_sep)[:-1])
        runner = CliRunner()
        result = runner.invoke(sdc.commands['auto-rpca'],
                               ['--in-biom', in_,
                                '--output-dir', out_])
        # Read the results
        dist_res = pd.read_csv(get_data_path('distance-matrix.tsv'), sep='\t',
                               index_col=0)
        ord_res = OrdinationResults.read(get_data_path('ordination.txt'))

        # Read the expected results
        file_ = 'expected-est-distance-matrix.tsv'
        dist_exp = pd.read_csv(get_data_path(file_),
                               sep='\t', index_col=0)
        ord_exp = OrdinationResults.read(get_data_path(
                                         'expected-est-ordination.txt'))

        # Check that the distance matrix matches our expectations
        assert_array_almost_equal(dist_res.values, dist_exp.values)

        # Check that the ordination results match our expectations -- checking
        # each value for both features and samples
        assert_deicode_ordinationresults_equal(ord_res, ord_exp)

        # Lastly, check that DEICODE's exit code was 0 (indicating success)
        try:
            self.assertEqual(0, result.exit_code)
        except AssertionError:
            ex = result.exception
            error = Exception('Command failed with non-zero exit code')
            raise error.with_traceback(ex.__traceback__)
コード例 #4
0
def _generate_ordination_results_summary(files, metadata, out_dir):
    # Magic number [0] -> there is only one plain text file and it is the
    # ordination results
    ord_res = OrdinationResults.read(files['plain_text'][0])
    md_df = pd.DataFrame.from_dict(metadata, orient='index')
    emp = Emperor(ord_res, md_df, remote="emperor_support_files")

    html_summary_fp = join(out_dir, 'index.html')
    esf_dp = join(out_dir, 'emperor_support_files')
    makedirs(esf_dp)
    with open(html_summary_fp, 'w') as f:
        f.write(emp.make_emperor(standalone=True))
        emp.copy_support_files(esf_dp)

    return html_summary_fp, esf_dp
コード例 #5
0
    def test_standalone_rpca_rank_est(self):
        """Checks the standalone RPCA rank estimate
           is used instead of a explicit rank
           setting.
        """
        in_ = get_data_path('test.biom', subfolder='rpca_data')
        out_ = os_path_sep.join(in_.split(os_path_sep)[:-1])
        runner = CliRunner()
        result = runner.invoke(sdc.commands['auto-rpca'],
                               ['--in-biom', in_, '--output-dir', out_])
        # Read the results
        dist_res = pd.read_csv(get_data_path('distance-matrix.tsv',
                                             subfolder='rpca_data'),
                               sep='\t',
                               index_col=0)
        ord_res = OrdinationResults.read(
            get_data_path('ordination.txt', subfolder='rpca_data'))

        # Read the expected results
        file_ = 'expected-est-distance-matrix.tsv'
        dist_exp = pd.read_csv(get_data_path(file_, subfolder='rpca_data'),
                               sep='\t',
                               index_col=0)
        ord_exp = OrdinationResults.read(
            get_data_path('expected-est-ordination.txt',
                          subfolder='rpca_data'))

        # Check that the distance matrix matches our expectations
        assert_array_almost_equal(dist_res.values, dist_exp.values)

        # Check that the ordination results match our expectations -- checking
        # each value for both features and samples
        assert_ordinationresults_equal(ord_res, ord_exp)

        # Lastly, check that gemelli's exit code was 0 (indicating success)
        CliTestCase().assertExitCode(0, result)
コード例 #6
0
 def test_standalone_rpca_n_components(self):
     """Tests the standalone script when n_components is 2
     """
     in_ = get_data_path('test.biom')
     out_ = os_path_sep.join(in_.split(os_path_sep)[:-1])
     runner = CliRunner()
     # run the same command but with rank==2
     result = runner.invoke(standalone_rpca, [
         '--in-biom', in_, '--output-dir', out_, '--n_components', 2,
         '--max_iterations', 5
     ])
     self.assertEqual(result.exit_code, 0)
     ord_res = OrdinationResults.read(get_data_path('ordination.txt'))
     # check it contains three axis
     if len(ord_res.proportion_explained) == 3:
         pass
コード例 #7
0
def _simulation_data(data, ids):
    with open("ordination.txt","w", encoding='utf8') as ordination:
        ordination.write('Eigvals\t0'+'\n\n')
        ordination.write('Proportion explained\t0'+'\n\n')
        ordination.write('Species\t0\t0\n\n')
        ordination.write('Site\t'+str(len(data)*len(data[0][0]))+'\t3\n')
        dm = {}
        j=0
        for row in data:
            identifier = ids[j]
            for i in range(len(row[0])):
                ordination.write(str(identifier)+"_t"+str(i)+"\t"+str(row[0][i])+"\t"+str(row[1][i])+"\t"+str(row[2][i])+"\n")
                dm.update({str(identifier)+"."+str(i):[row[0][i],row[1][i],row[2][i]]})
            j+=1
        ordination.write("\n")
        ordination.write("Biplot\t0\t0\n\n")
        ordination.write("Site constraints\t0\t0\n")
        ordination_results = OrdinationResults.read("ordination.txt")
    ordination.close
    os.remove("ordination.txt")
    
    # Distance matrix (euclidean)
    dm_0 = []
    dm_0.append("")
    distance_matrix = []
    for key in dm.keys():
        dm_0.append(key)
    distance_matrix.append(dm_0)
    for key in dm.keys():
        dm_1 = []
        dm_1.append(key)
        for key1 in dm.keys():
            dm_1.append(str(distance.euclidean(dm[key],dm[key1])))
        distance_matrix.append(dm_1)

    #Mapping file
    md_0 = ["#SampleID","Subject","Treatment","Timepoint"]
    md_1 = ["#q2:types","categorical","categorical","numeric"]
    md = []
    for id in ids:
        for i in range(len(data[0][0])):
            md.append([id+"_t"+str(i),id,''.join([k for k in id if not k.isdigit()])[:-1],i])
    metadata = [md_0,md_1]
    for row in md:
        metadata.append(row)
    #ADD FUNCTIONALITY TO RETURN MAPPING FILE
    return ordination_results, distance_matrix
コード例 #8
0
def _validate_ordination_results(files, metadata, out_dir):
    # Magic number [0] -> there is only one plain text file, which is the
    # ordination results
    ord_res_fp = files['plain_text'][0]
    ord_res = OrdinationResults.read(ord_res_fp)

    # Get the ids of the ordination results and the metadata
    ord_res_ids = set(ord_res.samples.index)
    metadata_ids = set(metadata)

    if not metadata_ids.issuperset(ord_res_ids):
        return (False, None, "The ordination results contain samples not "
                "present in the metadata")

    filepaths = [(ord_res_fp, 'plain_text')]

    return True, [ArtifactInfo(None, 'ordination_results', filepaths)], ""
コード例 #9
0
 def test_standalone_rpca_n_components(self):
     """Tests the standalone RPCA script when n_components is 2
     """
     in_ = get_data_path('test.biom', subfolder='rpca_data')
     out_ = os_path_sep.join(in_.split(os_path_sep)[:-1])
     runner = CliRunner()
     # run the same command but with rank==2
     result = runner.invoke(sdc.commands['rpca'], [
         '--in-biom', in_, '--output-dir', out_, '--n_components', 2,
         '--max_iterations', 5
     ])
     CliTestCase().assertExitCode(0, result)
     ord_res = OrdinationResults.read(
         get_data_path('ordination.txt', subfolder='rpca_data'))
     # check it contains three axis
     if len(ord_res.proportion_explained) == 3:
         pass
コード例 #10
0
    def setUp(self):
        or_f = StringIO(PCOA_STRING)
        self.ord_res = OrdinationResults.read(or_f)

        data = \
            [['PC.354', 'Control', '20061218', 'Ctrol_mouse_I.D._354'],
             ['PC.355', 'Control', '20061218', 'Control_mouse_I.D._355'],
             ['PC.356', 'Control', '20061126', 'Control_mouse_I.D._356'],
             ['PC.481', 'Control', '20070314', 'Control_mouse_I.D._481'],
             ['PC.593', 'Control', '20071210', 'Control_mouse_I.D._593'],
             ['PC.607', 'Fast', '20071112', 'Fasting_mouse_I.D._607'],
             ['PC.634', 'Fast', '20080116', 'Fasting_mouse_I.D._634'],
             ['PC.635', 'Fast', '20080116', 'Fasting_mouse_I.D._635'],
             ['PC.636', 'Fast', '20080116', 'Fasting_mouse_I.D._636']]
        headers = ['SampleID', 'Treatment', 'DOB', 'Description']
        self.mf = pd.DataFrame(data=data, columns=headers)
        self.mf.set_index('SampleID', inplace=True)
コード例 #11
0
    def test_qiime2_rpca(self):
        """Tests that the Q2 and standalone RPCA results match."""

        tstdir = "test_output"
        # Run DEICODE through QIIME 2 (specifically, the Artifact API)
        ordination_qza, distmatrix_qza = q2deicode.actions.rpca(self.q2table)
        # Get the underlying data from these artifacts
        q2ordination = ordination_qza.view(OrdinationResults)
        q2distmatrix = distmatrix_qza.view(DistanceMatrix)

        # Next, run DEICODE outside of QIIME 2. We're gonna check that
        # everything matches up.
        # ...First, though, we need to write the contents of self.q2table to a
        # BIOM file, so DEICODE can understand it.
        self.q2table.export_data(get_data_path("", tstdir))
        q2table_loc = get_data_path('feature-table.biom', tstdir)
        # Derived from a line in test_standalone_rpca()
        tstdir_absolute = os_path_sep.join(q2table_loc.split(os_path_sep)[:-1])

        # Run DEICODE outside of QIIME 2...
        CliRunner().invoke(standalone_rpca, ['--in-biom', q2table_loc,
                                             '--output-dir', tstdir_absolute])
        # ...and read in the resulting output files. This code was derived from
        # test_standalone_rpca() elsewhere in DEICODE's codebase.
        stordination = OrdinationResults.read(get_data_path('ordination.txt',
                                                            tstdir))
        stdistmatrix_values = read_csv(
            get_data_path(
                'distance-matrix.tsv',
                tstdir),
            sep='\t',
            index_col=0).values

        # Convert the DistanceMatrix object a numpy array (which we can compare
        # with the other _values numpy arrays we've created from the other
        # distance matrices)
        q2distmatrix_values = q2distmatrix.to_data_frame().values

        # Finaly: actually check the consistency of Q2 and standalone results!
        np.testing.assert_array_almost_equal(q2distmatrix_values,
                                             stdistmatrix_values)
コード例 #12
0
def create_emperor_visual(args, pcfile):
    """
    Sample .pc file
    #     Eigvals	4
    # 0.2705559825337763	0.07359266496720843	0.02997793703738496	0.0
    # 
    # Proportion explained	4
    # 0.7231669539538659	0.19670525434062255	0.0801277917055116	0.0
    # 
    # Species	0	0
    # 
    # Site	4	4
    # ICM_LCY_Bv6--LCY_0001_2003_05_11	-0.04067063044757823	-0.09380781760926289	0.13680474645584195	0.0
    # ICM_LCY_Bv6--LCY_0003_2003_05_04	-0.11521436634022217	-0.15957409396683217	-0.10315005726535573	0.0
    # ICM_LCY_Bv6--LCY_0005_2003_05_16	0.4268532792747924	0.06657577342833808	-0.02212569426459717	0.0
    # ICM_LCY_Bv6--LCY_0007_2003_05_04	-0.2709682824869916	0.18680613814775715	-0.011528994925888972	0.0
    # 
    # Biplot	0	0
    # 
    # Site constraints	0	0
    """
    #print PCoA_result
    from emperor import Emperor
    from skbio import OrdinationResults

    #load metadata
    mf = load_mf(args.map_fp)
    # must read from file (scikit-bio version 0.5.1 http://scikit-bio.org/docs/0.5.1/generated/generated/skbio.stats.ordination.OrdinationResults.html
    res = OrdinationResults.read(pcfile)
    emp = Emperor(res, mf)
    #pcoa_outdir = os.path.join(args.basedir,'views', 'tmp',args.prefix+'_pcoa3d')
    pcoa_outdir = os.path.join(args.basedir, args.prefix + '_pcoa3d')
    print('OUT?', pcoa_outdir, args.basedir)
    os.makedirs(pcoa_outdir, mode=0o777, exist_ok=True)
    with open(os.path.join(pcoa_outdir, 'index.html'), 'w') as f:
        f.write(emp.make_emperor(standalone=True))
        emp.copy_support_files(pcoa_outdir)
コード例 #13
0
def create_emperor_visual(args, pcfile):
    """
    Sample .pc file
    #     Eigvals	4
    # 0.2705559825337763	0.07359266496720843	0.02997793703738496	0.0
    # 
    # Proportion explained	4
    # 0.7231669539538659	0.19670525434062255	0.0801277917055116	0.0
    # 
    # Species	0	0
    # 
    # Site	4	4
    # ICM_LCY_Bv6--LCY_0001_2003_05_11	-0.04067063044757823	-0.09380781760926289	0.13680474645584195	0.0
    # ICM_LCY_Bv6--LCY_0003_2003_05_04	-0.11521436634022217	-0.15957409396683217	-0.10315005726535573	0.0
    # ICM_LCY_Bv6--LCY_0005_2003_05_16	0.4268532792747924	0.06657577342833808	-0.02212569426459717	0.0
    # ICM_LCY_Bv6--LCY_0007_2003_05_04	-0.2709682824869916	0.18680613814775715	-0.011528994925888972	0.0
    # 
    # Biplot	0	0
    # 
    # Site constraints	0	0
    """
    #print PCoA_result
    from emperor import Emperor
    from skbio import OrdinationResults
    
    #load metadata
    mf = load_mf(args.map_fp)
    # must read from file (scikit-bio version 0.5.1 http://scikit-bio.org/docs/0.5.1/generated/generated/skbio.stats.ordination.OrdinationResults.html
    res = OrdinationResults.read(pcfile)
    emp = Emperor(res, mf)
    pcoa_outdir = os.path.join(args.basedir,'views', 'tmp',args.prefix+'_pcoa3d')
    print('OUT?',pcoa_outdir,args.basedir)
    os.makedirs(pcoa_outdir, exist_ok=True)
    with open(os.path.join(pcoa_outdir, 'index.html'), 'w') as f:
        f.write(emp.make_emperor(standalone=True))
        emp.copy_support_files(pcoa_outdir)
コード例 #14
0
def main(arguments):

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=textwrap.dedent(
            '''additional information: output of taxsum, betadiv and pcoa are written in STDOUT
If you use microbiomeutils in your work, please cite:

    Tremblay, Julien
    microbiomeutils 0.9 : Microbiome utilities
    https://github.com/jtremblay/microbiomeutils
    
Thank you.'''))
    subparsers = parser.add_subparsers(title='subcommands',
                                       description='valid subcommands',
                                       help='additional help',
                                       dest="command")
    parser_bd = subparsers.add_parser('betadiv')
    parser_bd.add_argument('-i',
                           '--infile-feature-table',
                           help="Input file",
                           type=argparse.FileType('r'))
    parser_bd.add_argument("-m",
                           "--metric",
                           help="Diversity metric (default: bray-curtis)",
                           choices=["bray-curtis", "weighted-unifrac"],
                           default="bray-curtis")
    parser_bd.add_argument("-t",
                           "--infile-tree",
                           help="Tree file (for weighted uniFrac)",
                           type=argparse.FileType('r'))

    #parser_bd.set_defaults(func=betadiv)
    parser_ts = subparsers.add_parser('taxsum')
    parser_ts.add_argument('-i',
                           '--infile-feature-table',
                           help="Input file",
                           type=argparse.FileType('r'))
    parser_ts.add_argument("-t",
                           "--sumtype",
                           help="Summary type (default: absolute)",
                           choices=["absolute", "relative"],
                           default="absolute")
    parser_ts.add_argument("-l",
                           "--level",
                           help="Level <int> 1 to 7",
                           choices=["1", "2", "3", "4", "5", "6", "7", "8"],
                           default="3")
    #parser_bd.set_defaults(func=taxsum)
    parser_ts = subparsers.add_parser('pcoa')
    parser_ts.add_argument('-i',
                           '--infile-distance-matrix',
                           help="Input file",
                           type=argparse.FileType('r'))

    parser_ts = subparsers.add_parser('emperor')
    parser_ts.add_argument('-i',
                           '--infile-coords',
                           help="Input file",
                           type=argparse.FileType('r'))
    parser_ts.add_argument('-m',
                           '--mapping-file',
                           help="Mapping file",
                           type=argparse.FileType('r'))
    parser_ts.add_argument('-o', '--outdir', help="Output directory")

    args = parser.parse_args(arguments)

    if args.command == 'betadiv':
        infile_feature_table = os.path.abspath(args.infile_feature_table.name)
        sys.stderr.write("[betadiv]\n")
        if args.infile_tree is None and args.metric == "weighted-unifrac":
            raise ValueError(
                'weighted-unifrac needs a tree supplied. --infile-tree needed')

        if args.metric == "bray-curtis":
            betadiv(infile_feature_table, args.metric)
        else:
            betadiv(infile_feature_table, args.metric, args.infile_tree.name)

    elif args.command == 'taxsum':
        infile_feature_table = os.path.abspath(args.infile_feature_table.name)
        sys.stderr.write("[taxsum]\n")
        taxsum(infile_feature_table, args.sumtype, args.level)

    elif args.command == 'pcoa':
        sys.stderr.write("[pcoa]\n")
        infile_distance_matrix = os.path.abspath(
            args.infile_distance_matrix.name)
        ord_res = do_pcoa(infile_distance_matrix)

    elif args.command == 'emperor':
        sys.stderr.write("[emperor]\n")
        metadata = pd.read_csv(args.mapping_file,
                               sep='\t',
                               index_col='#SampleID',
                               dtype={'#SampleID': 'string'})

        ordination = OrdinationResults.read(args.infile_coords)

        # the remote argument refers to where the support files will be located
        # relative to the plot itself i.e. index.html.
        emp = Emperor(ordination, metadata, remote='.')
        output_folder = args.outdir  # new folder where data will be saved

        # create an output directory
        os.makedirs(output_folder, exist_ok=True)

        with open(os.path.join(output_folder, 'index.html'), 'w') as f:
            f.write(emp.make_emperor(standalone=True))
            emp.copy_support_files(output_folder)
コード例 #15
0
 def setUp(self):
     self.test_matrix = OrdinationResults.read(
         get_data_path('unweighted_unifrac_pc.txt'))
コード例 #16
0
 def setUp(self):
     self.test_matrix = OrdinationResults.read(
         get_data_path('unweighted_unifrac_pc.txt'))