Example #1
0
    def test_split_ints(self):
        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')

        # build output file names
        left_path = os.path.join(self.tempdir, 'left.qza')
        right_path = os.path.join(self.tempdir, 'right.qza')

        # TODO: currently must pass `--verbose` to commands invoked by Click's
        # test runner because redirecting stdout/stderr raises an
        # "io.UnsupportedOperation: fileno" error. Likely related to Click
        # mocking a filesystem in the test runner.
        result = self.runner.invoke(
            command, ['split-ints', '--i-ints', self.artifact1_path,
                      '--o-left', left_path, '--o-right', right_path,
                      '--verbose'])
        # command completes successfully and creates the correct
        # output files
        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(left_path))
        self.assertTrue(os.path.exists(right_path))
        # results are correct
        left = Artifact.load(left_path)
        right = Artifact.load(right_path)
        self.assertEqual(left.view(list), [0])
        self.assertEqual(right.view(list), [42, 43])
Example #2
0
    def setUp(self):
        super().setUp()
        self.preprocess = self.plugin.pipelines['preprocess']

        continuous_metadata = pd.DataFrame(
            {
                'target': ['1.0', '2.0', '3.0', '4.0'],
                'contain_nan': ['3.3', '3.5', None, '3.9']
            },
            index=pd.Index(['A', 'B', 'C', 'D'], name='id'))
        self.continuous_metadata = continuous_metadata

        discrete_metadata = pd.DataFrame(
            {
                'target': ['0', '1', '0', '1'],
                'target_int': [1, 0, 1, 0],
                'contain_nan': ['0', '1', None, '1'],
                'non_encoded': ['10', '2', '', 'b']
            },
            index=pd.Index(['A', 'B', 'C', 'D'], name='id'))
        self.discrete_metadata = discrete_metadata

        TEST_DIR = path.split(__file__)[0]
        md_path = path.join(TEST_DIR, 'data/sample-metadata-binary.tsv')
        table_path = path.join(TEST_DIR, 'data/table.qza')
        rooted_tree_path = path.join(TEST_DIR, 'data/rooted-tree.qza')
        unrooted_tree_path = path.join(TEST_DIR, 'data/unrooted-tree.qza')

        self.mp_sample_metadata = Metadata.load(md_path)
        self.mp_table = Artifact.load(table_path)
        self.mp_rooted_tree = Artifact.load(rooted_tree_path)
        self.mp_unrooted_tree = Artifact.load(unrooted_tree_path)
Example #3
0
    def test_split_ints(self):
        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')

        # build output file names
        left_path = os.path.join(self.tempdir, 'left.qza')
        right_path = os.path.join(self.tempdir, 'right.qza')

        # TODO: currently must pass `--verbose` to commands invoked by Click's
        # test runner because redirecting stdout/stderr raises an
        # "io.UnsupportedOperation: fileno" error. Likely related to Click
        # mocking a filesystem in the test runner.
        result = self.runner.invoke(command, [
            'split-ints', '--i-ints', self.artifact1_path, '--o-left',
            left_path, '--o-right', right_path, '--verbose'
        ])
        # command completes successfully and creates the correct
        # output files
        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(left_path))
        self.assertTrue(os.path.exists(right_path))
        # results are correct
        left = Artifact.load(left_path)
        right = Artifact.load(right_path)
        self.assertEqual(left.view(list), [0])
        self.assertEqual(right.view(list), [42, 43])
Example #4
0
 def setUp(self):
     self.base_dir = \
         os.path.join(os.path.dirname(os.path.realpath(__file__)),
                      'files/little_test')
     self.aligned_seqs = pd.Series({
         'seq01':
         DNA('-CTAGTCATGCGAAGCGGCTCAGGATGATGATGAAGAC---------------------------------'
             ),
         'seq02':
         DNA('ACTAGTCATGCGAAGCGGCTCAGGATGATGATGAAGAC---------------------------------'
             ),
         'seq03':
         DNA('CATAGTCATWTCCGCGTTGGAGTTATGATGATGAWACCACCTCGTCCCAGTTCCGCGCTTCTGACGTGCA-'
             ),
         'seq04':
         DNA('------------------GGAGTTATGATGA--AGACCACCTCGTCCCAGTTCCGCGCTTCTGACGTGCAC'
             ),
         'seq05':
         DNA('CATAGTCATCGTTTATGTATGCCCATGATGATGCGAGCACCTCGTATGGATGTAGAGCCACTGACGTGCGG'
             ),
     })
     kmer1 = Artifact.load(os.path.join(self.base_dir,
                                        'frag_r1_db_map.qza'))
     kmer2 = Artifact.load(os.path.join(self.base_dir,
                                        'frag_r2_db_map.qza'))
     self.kmer_map1 = kmer1.view(pd.DataFrame)
     self.kmer_map2 = kmer2.view(pd.DataFrame)
     np.random.seed(5)
Example #5
0
    def validate_analysis_input(feature_table, rep_seqs, taxonomy):
        """
		Precheck input files prior to running denoise step

		Input:
			- feature_table: Path to QIIME2 artifact of type FeatureTable[Frequency]
			- rep_seqs: Path to QIIME2 artifact of type FeatureData[Sequence]
		"""
        # Check Artifact type
        try:
            feature_table_artifact = Artifact.load(feature_table)
            rep_seqs_artifact = Artifact.load(rep_seqs)

            if (str(feature_table_artifact.type) != "FeatureTable[Frequency]"):
                msg = "Input Feature Table is not of type 'FeatureTable[Frequency]'!"
                raise ValueError(msg)

            if (str(rep_seqs_artifact.type) != "FeatureData[Sequence]"):
                msg = "Input Representative Sequences is not of type 'FeatureData[Sequence]'!"
                raise ValueError(msg)

        except ValueError as err:
            message = str(err)

            return 400, message

        return 200, "Imported data good!"
Example #6
0
def load_mp_data():
    """Loads data from the QIIME 2 moving pictures tutorial for visualization.

    It's assumed that this data is already stored in docs/moving-pictures/, aka
    the PREFIX_DIR global variable set above, which should be located relative
    to where this function is being run from. If this directory or the data
    files within it cannot be accessed, this function will (probably) break.

    Returns
    -------
    (tree, table, md, fmd, ordination)
        tree: Artifact with semantic type Phylogeny[Rooted]
            Phylogenetic tree.
        table: Artifact with semantic type FeatureTable[Frequency]
            Feature table.
        md: Metadata
            Sample metadata.
        fmd: Metadata
            Feature metadata. (Although this is stored in the repository as a
            FeatureData[Taxonomy] artifact, we transform it to Metadata.)
        pcoa: Artifact with semantic type PCoAResults
            Ordination.
    """
    tree = Artifact.load(os.path.join(PREFIX_DIR, "rooted-tree.qza"))
    table = Artifact.load(os.path.join(PREFIX_DIR, "table.qza"))
    pcoa = Artifact.load(
        os.path.join(PREFIX_DIR, "unweighted_unifrac_pcoa_results.qza")
    )
    md = Metadata.load(os.path.join(PREFIX_DIR, "sample_metadata.tsv"))
    # We have to transform the taxonomy QZA to Metadata ourselves
    taxonomy = Artifact.load(os.path.join(PREFIX_DIR, "taxonomy.qza"))
    fmd = taxonomy.view(Metadata)
    return tree, table, md, fmd, pcoa
 def setUp(self):
     self.exp_demux = Artifact.load(dir_path + "/data/mock-3/exp_demux.qza")
     self.exp_deblurred = Artifact.load(dir_path +
                                        "/data/mock-3/deblurred_150nt.qza")
     self.exp_deblur_biom = self.exp_deblurred.view(biom.Table)
     self.exp_deblurred_pt = Artifact.load(
         dir_path + "/data/mock-3/deblurred_100nt_pt.qza")
     self.num_parallel = NUM_CORES
Example #8
0
def analysis(input_fp, input_path_file, clps_df, output_fp, trim_incr, num_trims, trim_lengths):

    start = time.clock()

    if output_fp.endswith('/'):
        output_fp = output_fp[:-1]

    if(input_fp is None and input_path_file is None):
        click.echo("No inputs supplied, see --help!")
        return
    elif(input_fp is not None):
        if input_fp.endswith('/'):
            input_fp = input_fp[:-1]
        pres = dict()
        res = [f for f in os.listdir(input_fp)
               if re.match('deblurred_pre_\d+.qza', f)]
        for f in res:
            fm = f.replace("_",".")
            fm = fm.split(".")
            length = int(fm[len(fm)-2])
            artifact = Artifact.load(input_fp + "/" + f)
            pres[length] = artifact
        pre_artifacts = [pres[x] for x in sorted(pres.keys())]
        pre_artifacts.reverse()

        posts = dict()
        res = [f for f in os.listdir(input_fp)
               if re.match('deblurred_pt_\d+.qza', f)]
        for f in res:
            fm = f.replace("_",".")
            fm = fm.split(".")
            length = int(fm[len(fm)-2])
            artifact = Artifact.load(input_fp + "/" + f)
            posts[length] = artifact
        post_artifacts = [posts[x] for x in sorted(posts.keys())]
        post_artifacts.reverse()

        clps_df = pd.read_csv(input_fp + "/collapse.csv")
    else:
        if clps_df is None:
            click.echo("Supply collapse path!")
            return
        paths = pd.read_csv(input_path_file, header=None)
        pre_artifacts = [load_artifact(x) for x in paths.iloc[:,0]]
        post_artifacts = [load_artifact(x) for x in paths.iloc[:,1]]
        clps_df = pd.read_csv(clps_df)


    click.echo("{}s for loading qza's for analysis"\
               .format(str(time.clock() - start)))

    if len(trim_lengths) == 0:
        trim_lengths = None

    return analysis_art(pre_artifacts, post_artifacts, clps_df,
                        trim_incr, num_trims, output_fp, trim_lengths)
Example #9
0
 def test_reconstruct_fragment_rep_seqs(self):
     recon_map = Artifact.import_data(
         'FeatureData[SidleReconstruction]', 
         pd.DataFrame(data=[['seq01|seq02'], 
                            ['seq01|seq02'], 
                            ['seq03|seq04'], 
                            ['seq03|seq04'], 
                            ['seq05']],
                   index=pd.Index(['seq01', 'seq02', 'seq03', 'seq04', 
                                   'seq05'], name='db-seq'),
                   columns=['clean_name'])
         )
     recon_summary = Artifact.import_data(
         'FeatureData[ReconstructionSummary]',
         Metadata(pd.DataFrame(data=[[1, 2, 2, 0, 'asv01|asv02'],
                                     [2, 3, 1.5, np.std([1, 2], ddof=1), 
                                      'asv03|asv04'],
                                     [2, 2, 1, 0, 'asv07|asv08']],
                              index=pd.Index(['seq01|seq02', 'seq03|seq04', 
                                              'seq05'], name='feature-id'),
                             columns=['num-regions', 'total-kmers-mapped', 
                                      'mean-kmer-per-region', 
                                      'stdv-kmer-per-region', 
                                      'mapped-asvs']))
     )
     aligned_seqs = Artifact.import_data(
         'FeatureData[AlignedSequence]', 
         skbio.TabularMSA([
             DNA('CTAGTCATGCGAAGCGGCTCAGGATGATGATGAAGAC-------------------'
                 '--------------', metadata={'id': 'seq01'}),
             DNA('CTAGTCATGCGAAGCGGCTCAGGATGATGATGAAGAC-------------------'
                 '--------------', metadata={'id': 'seq02'}),
             DNA('CATAGTCATWTCCGCGTTGGAGTTATGATGATGAWACCACCTCGTCCCAGTTCCGC'
                 'GCTTCTGACGTGC-', metadata={'id': 'seq03'}),
             DNA('------------------GGAGTTATGATGA--AGACCACCTCGTCCCAGTTCCGC'
                 'GCTTCTGACGTGCC', metadata={'id': 'seq04'}),
             DNA('CATAGTCATCGTTTATGTATGCCCATGATGATGCGAGCACCTCGTATGGATGTAGA'
                 'GCCACTGACGTGCG', metadata={'id': 'seq05'}),
         ])
     )
     known = pd.Series(
         data=['GCGAAGCGGCTCAGG',
               'WTCCGCGTTGGAGTTATGATGATGAGACCACCTCGTCCCAGTTCCGCGCTTC'],
         index=pd.Index(['seq01|seq02', 'seq03|seq04']),
         )
     test = sidle.reconstruct_fragment_rep_seqs(
         region=['Bludhaven', 'Gotham'],
         kmer_map=[Artifact.load(os.path.join(self.base_dir, 
                                 'frag_r1_db_map.qza')),
                   Artifact.load(os.path.join(self.base_dir, 
                                 'frag_r2_db_map.qza'))],
         reconstruction_map=recon_map, 
         reconstruction_summary=recon_summary, 
         aligned_sequences=aligned_seqs,
         ).representative_fragments
     pdt.assert_series_equal(known, test.view(pd.Series).astype(str))
Example #10
0
def pre_trims(input_fp, trim_length, trim_incr,
              num_trims, output_fp, num_cores):
    """Quality filters and then pre_trims sequences to various pre-trim lengths
    Saves qza's if specified. With naming format "deblurred_pre_<length>nt.qza

    Parameters
    ----------
    input_fp: path
        Path to qza of demuxed sequences
    trim_length: int, optional
        Length to trim to. If not supplied, longest possible length is used.
        This takes a while so do supply if possible
    trim_incr: int, optional
        Percent amount to decrement by.
    num_trims: int, optional
        Number of different lengths to trim to. Each trim_incr % less.
    output_fp: path, optional
        Path to output deblurred qza files
    num_cores: int, optional
        Number of cores to parallelize deblur

    Returns
    -------
    list of length trim_lengths of deblurred seq artifacts
    """
    start = time.clock()
    click.echo("Importing seq data from " + input_fp)
    input_artifact = Artifact.load(input_fp)

    if output_fp.endswith('/'):
        output_fp = output_fp[:-1]

    click.echo("{}s for importing for pre".format(str(time.clock() - start)))
    return pre_trims_art(input_artifact, trim_length, trim_incr, num_trims,
                         output_fp, num_cores)
Example #11
0
def post_trims(input_fp, output_fp, trim_lengths,
               output_name, time_out, time_out_append, partition_count,
               input_biom_fp, save_biom):
    start = time.clock()

    if output_fp.endswith('/'):
        output_fp = output_fp[:-1]

    if(input_fp is None and input_biom_fp is None):
        click.echo("No input given! See --help")
        return
    elif (input_fp is None):
        input_artifact = None
        click.echo("Loading biom table")
        input_biom = biom.load_table(input_biom_fp)
    else:
        click.echo("Importing seq data from " + input_fp)
        input_artifact = Artifact.load(input_fp)
        input_biom=None


    click.echo("{}s for importing for post_trims".format(str(time.clock() - start)))
    click.echo("partition_count: {}".format(partition_count))
    return post_trims_art(output_fp, input_artifact,
                          trim_lengths, output_name, time_out, time_out_append,
                          partition_count, input_biom, save_biom)
Example #12
0
def get_itol_barchart(fdata: pd.DataFrame, table_file: str, metadata_file: str,
                      metadata_column: str, output_file: str):
    '''Generate a table in QIIME 2 artifact format which can be directly
    parsed by iTOL and yield a multi-bar chart.
    '''
    # load sample feature table
    table = Artifact.load(table_file)

    # extract BIOM table
    table = table.view(biom.Table)

    # load sample metadata
    meta = Metadata.load(metadata_file)

    # generate a sample Id to category map
    column = meta.get_column(metadata_column).drop_missing_values()
    catmap = column.to_series().to_dict()

    # collapse feature table by category
    # note: when multiple samples map to one category, take **mean**
    table = table.collapse(lambda i, _: catmap[i], norm=True, axis='sample')

    # import BIOM table into QIIME 2 and save
    res = Artifact.import_data('FeatureTable[Frequency]', table)
    res.save(output_file)
Example #13
0
def read_results(path):
    """Read the results from a MICOM simulation.

    Parameters:
    -----------
    path : str
        The path to a MicomResults artifact.

    Returns:
    --------
    MicomResultsData
        A named tuple with the following attributes:

        growth_rates : pd.DataFrame
        The growth rates for each taxon and sample.

        exchange_fluxes : pd.DataFrame
        The exchange fluxes for each metabolite, sample and taxon. Fluxes
        that denote trasnport from and into the environment are denoted with
        the taxon `medium.
    """
    from qiime2 import Artifact

    art = Artifact.load(path)
    return art.view(MicomResultsData)
Example #14
0
def load_qiime2_artifact(feature_table):
    """
    Load the output of QIIME2 DADA2 (QIIME2 feature table artifact) into Python

    ** Will throw errors if the artifact type is NOT FeatureTable[Frequency] **
    You may check Artifact type by checking the "type" property of the Artifact
    object after loading the artifact via 'Artifact.load(artifact)'
    """
    # Make sure input actually exists
    if not(os.path.isfile(feature_table)):
        msg = "Input file '{in_file}' does NOT exist!".format(
                in_file=feature_table)
        raise FileNotFoundError(msg)

    try:
        feature_table_artifact = Artifact.load(feature_table)

        # Check Artifact type
        if(str(feature_table_artifact.type) != "FeatureTable[Frequency]"):
            msg = "Input QIIME2 Artifact is not of the type 'FeatureTable[Frequency]'!"
            raise ValueError(msg)

        feature_table_df = feature_table_artifact.view(pd.DataFrame)

        return feature_table_df
    except ValueError as err:
        logger.error(err)
        raise
    except Exception as err:
        logger.error(err)
        raise
Example #15
0
    def test_variadic_inputs(self):
        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')
        output_path = os.path.join(self.tempdir, 'output.qza')

        ints1 = Artifact.import_data('IntSequence1', [1, 2, 3]).save(
            os.path.join(self.tempdir, 'ints1.qza'))
        ints2 = Artifact.import_data('IntSequence2', [4, 5, 6]).save(
            os.path.join(self.tempdir, 'ints2.qza'))
        set1 = Artifact.import_data('SingleInt', 7).save(
            os.path.join(self.tempdir, 'set1.qza'))
        set2 = Artifact.import_data('SingleInt', 8).save(
            os.path.join(self.tempdir, 'set2.qza'))

        result = self.runner.invoke(command, [
            'variadic-input-method', '--i-ints', ints1, '--i-ints', ints2,
            '--i-int-set', set1, '--i-int-set', set2, '--p-nums', '9',
            '--p-nums', '10', '--p-opt-nums', '11', '--p-opt-nums', '12',
            '--p-opt-nums', '13', '--o-output', output_path, '--verbose'
        ])

        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(output_path))

        output = Artifact.load(output_path)
        self.assertEqual(output.view(list), list(range(1, 14)))
Example #16
0
def convert2table(infile, ofile):
    artifact = Artifact.load(infile)
    stats = artifact.view(pd.DataFrame)
    stats.index = stats.index.astype(str)
    ofile_path = abspath(ofile)
    stats.to_csv(ofile_path, sep='\t', index=1)
    return ofile_path
Example #17
0
    def test_repeated_multiple_option(self):
        input_path = os.path.join(self.tempdir, 'ints.qza')
        artifact = Artifact.import_data(IntSequence1, [0, 42, 43], list)
        artifact.save(input_path)

        metadata_path1 = os.path.join(self.tempdir, 'metadata1.tsv')
        with open(metadata_path1, 'w') as f:
            f.write('id\tcol1\nid1\tfoo\nid2\tbar\n')
        metadata_path2 = os.path.join(self.tempdir, 'metadata2.tsv')
        with open(metadata_path2, 'w') as f:
            f.write('id\tcol2\nid1\tbaz\nid2\tbaa\n')

        output_path = os.path.join(self.tempdir, 'out.qza')

        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')

        result = self.runner.invoke(command, [
            'identity-with-metadata', '--i-ints', input_path, '--o-out',
            output_path, '--m-metadata-file', metadata_path1,
            '--m-metadata-file', metadata_path2, '--verbose'
        ])

        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(output_path))
        self.assertEqual(Artifact.load(output_path).view(list), [0, 42, 43])
Example #18
0
    def test_variadic_inputs(self):
        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')
        output_path = os.path.join(self.tempdir, 'output.qza')

        ints1 = Artifact.import_data('IntSequence1', [1, 2, 3]).save(
            os.path.join(self.tempdir, 'ints1.qza'))
        ints2 = Artifact.import_data('IntSequence2', [4, 5, 6]).save(
            os.path.join(self.tempdir, 'ints2.qza'))
        set1 = Artifact.import_data('SingleInt', 7).save(
            os.path.join(self.tempdir, 'set1.qza'))
        set2 = Artifact.import_data('SingleInt', 8).save(
            os.path.join(self.tempdir, 'set2.qza'))

        result = self.runner.invoke(
            command,
            ['variadic-input-method', '--i-ints', ints1, '--i-ints', ints2,
             '--i-int-set', set1, '--i-int-set', set2, '--p-nums', '9',
             '--p-nums', '10', '--p-opt-nums', '11', '--p-opt-nums', '12',
             '--p-opt-nums', '13', '--o-output', output_path, '--verbose'])

        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(output_path))

        output = Artifact.load(output_path)
        self.assertEqual(output.view(list), list(range(1, 14)))
Example #19
0
 def setUp(self):
     super().setUp()
     self.demux_seqs = SingleLanePerSampleSingleEndFastqDirFmt(
         self.get_data_path('sample_seqs_other'), 'r')
     self.ref_ar = Artifact.load(
         self.get_data_path('../../assets/test_reference.qza'))
     self.ref = self.ref_ar.view(DNAFASTAFormat)
Example #20
0
    def test_repeated_multiple_option(self):
        input_path = os.path.join(self.tempdir, 'ints.qza')
        artifact = Artifact.import_data(IntSequence1, [0, 42, 43], list)
        artifact.save(input_path)

        metadata_path1 = os.path.join(self.tempdir, 'metadata1.tsv')
        with open(metadata_path1, 'w') as f:
            f.write('id\tcol1\nid1\tfoo\nid2\tbar\n')
        metadata_path2 = os.path.join(self.tempdir, 'metadata2.tsv')
        with open(metadata_path2, 'w') as f:
            f.write('id\tcol2\nid1\tbaz\nid2\tbaa\n')

        output_path = os.path.join(self.tempdir, 'out.qza')

        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')

        result = self.runner.invoke(
            command, ['identity-with-metadata', '--i-ints', input_path,
                      '--o-out', output_path, '--m-metadata-file',
                      metadata_path1, '--m-metadata-file', metadata_path2,
                      '--verbose'])

        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(output_path))
        self.assertEqual(Artifact.load(output_path).view(list), [0, 42, 43])
Example #21
0
def summarize(input_file, verbose=False):
    """Extract summary or verbose data from an Artifact file.

    This command automatically detects the input file's semantic type and
    then extracts summary or verbose data from it.

    Currently, the command supports the following semantic types:
    FeatureTable[Frequency], FeatureTable[RelativeFrequency],
    FeatureData[Sequence], FeatureData[AlignedSequence],
    FeatureData[Taxonomy], DistanceMatrix.

    Parameters
    ----------
    input_file : str
        Path to the input Artifact file.
    verbose : bool, default: False
        Print a verbose version of the results.
    """
    artifact = Artifact.load(input_file)

    if str(artifact.type) in [
            "FeatureTable[Frequency]", "FeatureTable[RelativeFrequency]"
    ]:
        _parse_feature_table(artifact, verbose)
    elif str(artifact.type) in [
            "FeatureData[Sequence]", "FeatureData[AlignedSequence]"
    ]:
        _parse_feature_data(artifact, verbose)
    elif str(artifact.type) in ["FeatureData[Taxonomy]"]:
        _parse_feature_data2(artifact, verbose)
    elif str(artifact.type) in ["DistanceMatrix"]:
        _parse_distance_matrix(artifact, verbose)
    else:
        raise TypeError(f"Unsupported Artifact type: '{artifact.type}'")
Example #22
0
def convert2otutab(infile, ofile):
    artifact = Artifact.load(infile)
    dada2_df = artifact.view(qiime2.Metadata).to_dataframe()
    dada2_df.index = dada2_df.index.astype(str)
    ofile_path = abspath(ofile)
    dada2_df.to_csv(ofile_path, sep='\t', index=1)
    return ofile_path
Example #23
0
def convert_qiime2_2_skbio(pcoa_artifact):
    """
    Convert QIIME2 PCoA artifact to skbio OrdinationResults object.

    ** Will throw errors if the artifact type is NOT PCoAResults **
    You may check Artifact type by checking the "type" property of the Artifact
    object after loading the artifact via 'Artifact.load(artifact)'
    """
    try:
        pcoa_artifact = Artifact.load(pcoa_artifact)

        # Check Artifact type
        if (str(pcoa_artifact.type) != "PCoAResults"):
            msg = "Input QIIME2 Artifact is not of the type 'PCoAResults'!"
            raise AXIOME3Error(msg)

        pcoa = pcoa_artifact.view(ordination.OrdinationResults)
    except AXIOME3Error:
        raise

    # Rename PCoA coordinates index (so left join can be performed later)
    coords = pcoa.samples

    coords.index.names = ['SampleID']

    # Rename columns to have more meaningful names
    num_col = coords.shape[1]
    col_names = ['Axis ' + str(i) for i in range(1, num_col + 1)]
    coords.columns = col_names

    pcoa.samples = coords

    return pcoa
Example #24
0
    def test_no_optional_artifacts_provided(self):
        result = self._run_command(
            'optional-artifacts-method', '--i-ints', self.ints1,
            '--p-num1', 42, '--o-output', self.output, '--verbose')

        self.assertEqual(result.exit_code, 0)
        self.assertEqual(Artifact.load(self.output).view(list),
                         [0, 42, 43, 42])
Example #25
0
def load_classifier(c_path):
    # c_path = '/home/liaoth/data2/gg-13-8-99-nb-classifier.qza'
    if not c_path.endswith('.qza'):
        classifier = Artifact.load(c_path)
    else:
        pass
        # todo: implement a classifier training module.
    return classifier
def tutorial(dir_name):
    print('\n', 'Running the Artifact API tutorial section', '\n')
    table = dir_name + '/table.qza'
    unrarefied_table = Artifact.load(table)
    rarefy_result = feature_table.methods.rarefy(table=unrarefied_table, sampling_depth=100)
    rarefied_table = rarefy_result.rarefied_table
    biom_table = rarefied_table.view(biom.Table)
    print(biom_table.head())
Example #27
0
    def test_no_optional_artifacts_provided(self):
        result = self._run_command('optional-artifacts-method', '--i-ints',
                                   self.ints1, '--p-num1', '42', '--o-output',
                                   self.output, '--verbose')

        self.assertEqual(result.exit_code, 0)
        self.assertEqual(
            Artifact.load(self.output).view(list), [0, 42, 43, 42])
Example #28
0
def qiime_to_biom(input_fp, output_fp):
    """Converts a .qza file to .biom file"""

    as_artifact = Artifact.load(input_fp)
    as_biom = as_artifact.view(biom.Table)
    as_json = as_biom.to_json(generated_by="deblur-testing")
    with open(output_fp, "w") as f:
        f.write(as_json)
Example #29
0
def load_artifact(artifact):
    '''It takes a qiime2 artifact and create a pandas dataframe from it'''

    import pandas as pd
    from qiime2 import Artifact

    artifact = Artifact.load(artifact)
    return artifact.view(pd.DataFrame)
    def setUp(self):
        print("Importing metadata for expected")
        self.exp_barcode_metadata = \
            Metadata.load(dir_path + "/data/mock-3/sample-metadata.tsv")

        self.exp_demux = Artifact.load(dir_path + "/data/mock-3/exp_demux.qza")
        self.exp_out = [self.exp_demux, self.exp_barcode_metadata]
        self.working_dir_fp = dir_path + "/data/mock-3"
Example #31
0
def _parse_input(input, temp_dir):
    """Parse the input QIIME 2 object and export the files."""
    if isinstance(input, qiime2.Artifact):
        fn = f'{temp_dir}/dokdo-temporary.qza'
        input.save(fn)
        input = fn
        Artifact.load(input).export_data(temp_dir)
    elif isinstance(input, qiime2.Visualization):
        fn = f'{temp_dir}/dokdo-temporary.qzv'
        input.save(fn)
        input = fn
        Visualization.load(input).export_data(temp_dir)
    elif isinstance(input, str) and input.endswith('.qza'):
        Artifact.load(input).export_data(temp_dir)
    elif isinstance(input, str) and input.endswith('.qzv'):
        Visualization.load(input).export_data(temp_dir)
    else:
        pass
Example #32
0
def check_artifact_type(artifact_path, artifact_type):
    q2_artifact = Artifact.load(artifact_path)

    # Raise ValueError if not appropriate type
    if(str(q2_artifact.type) != ARTIFACT_TYPES[artifact_type]):
        msg = "Input QIIME2 Artifact is not of the type '{}'".format(ARTIFACT_TYPES[artifact_type])
        raise AXIOME3Error(msg)

    return q2_artifact
Example #33
0
def cross_validate_classifier(ref_taxa, ref_seqs, classifier_spec, obs_dir,
                              results_dir, intermediate_dir, n_jobs, log_file,
                              log_level, confidence, classifier_directory):

    classifier_spec = classifier_spec.read()

    # set up logging
    setup_logging(log_level, log_file)
    logging.info(locals())

    # load folds
    taxon_defaults_file = join(intermediate_dir, 'taxon_defaults.json')
    with open(taxon_defaults_file) as fh:
        taxon_defaults = json.load(fh)
    folds = glob.glob(join(intermediate_dir, 'fold-*'))
    logging.info('Got folds')

    # load ref_seq
    _, ref_seqs = load_references(ref_taxa, ref_seqs)
    ref_seqs = Artifact.import_data('FeatureData[Sequence]',
                                    DNAIterator(ref_seqs))

    # for each fold
    for fold in folds:
        # load new file for different folds
        weights_file = join(fold, 'weights.qza')
        training_taxa_file = join(fold, 'train_taxa.qza')

        # load the simulated test samples
        test_samples = load_simulated_samples(fold, results_dir)

        # load the test seqs, training taxa, traing seqs, and weights
        weights = Artifact.load(weights_file)
        #test_seqs = Artifact.load(test_seqs_file)
        train_taxa = Artifact.load(training_taxa_file)

        # train the weighted classifier and classify the test samples
        classification = classify_samples_sklearn(test_samples, train_taxa,
                                                  ref_seqs, classifier_spec,
                                                  confidence, n_jobs, weights)
        # save the classified taxonomy artifacts
        save_observed(classifier_directory, test_samples, classification,
                      obs_dir)
        logging.info('Done ' + fold)
def cluster_features(query_table: biom.Table, closed_reference_table: biom.Table,
                                         query_sequences: DNAFASTAFormat,
                                         reference_sequences: pd.Series, thr: float = 0.97,
                                         threads: int = 1, output_log_file: str = None) -> (
        biom.Table, DNAFASTAFormat, DNAFASTAFormat):
    reference_sequences_fasta = get_reference_seqs_from_ids(closed_reference_table, reference_sequences)
    results = cluster_features_closed_reference(sequences=query_sequences, table=query_table,
                                                reference_sequences=reference_sequences_fasta,
                                                perc_identity=thr, threads=threads)

    clustered_table_biom = results[0]

    clustered_sequences_pd = Artifact.load(str(results[1])).view(pd.Series)
    unmatched_sequences_pd = Artifact.load(str(results[2])).view(pd.Series)

    with tempfile.mktemp() as tmp_fp:
        logger_ins = LOG(tmp_fp).get_logger('clustering_features')
        logger_ins.info("The number of OTUs in the reference database is", _15(reference_sequences_fasta).size)
        logger_ins.info("The number of unmatched sequence to the reference alignment is", unmatched_sequences_pd.size)
        logger_ins.info("The number of matched sequences to the reference alignment is", clustered_sequences_pd.size)
        logger_ins.info("Before applying clustering, the total number of counts "
                        "in the original feature table was", np.sum(query_table.sum()))
        logger_ins.info("Before applying clustering, the number of non-zero elements"
                        " of the underlying feature table is", query_table.nnz)
        logger_ins.info("After applying clustering, the total number of counts "
                        "in the original feature table was", np.sum(clustered_table_biom.sum()))
        logger_ins.info("After applying clustering, the number of non-zero elements"
                        " of the underlying feature table is", clustered_table_biom.nnz)
        logger_ins.info("The percent of total counts retained is",
                        np.sum(query_table.sum()) / np.sum(clustered_table_biom.sum()) * 100, "%s")

        query_samples = clustered_table_biom.ids('sample')
        closed_reference_features = closed_reference_table.ids('observation')
        clustered_table_biom = closed_reference_table.merge(clustered_table_biom)
        clustered_table_biom.filter(ids_to_keep=query_samples, axis='sample', inplace=True)
        if len(set(closed_reference_features) - set(clustered_table_biom.ids('sample'))) != 0:
            raise ValueError(
                "Merging two tables failed! There are less features in the final table than expected!"
            )
        if output_log_file:
            shutil.copy(tmp_fp, output_log_file)
    return clustered_table_biom, results[1], results[2]
Example #35
0
    def test_without_inputs_or_parameters(self):
        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')
        output_path = os.path.join(self.tempdir, 'output.qza')

        result = self.runner.invoke(
            command, ['no-input-method', '--o-out', output_path, '--verbose'])

        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(output_path))

        artifact = Artifact.load(output_path)
        self.assertEqual(artifact.view(dict), {'foo': '42'})
Example #36
0
    def test_multiple_metadata(self):
        for command in ('identity-with-metadata-category',
                        'identity-with-optional-metadata-category'):
            result = self._run_command(
                command, '--i-ints', self.input_artifact, '--o-out',
                self.output_artifact, '--m-metadata-file', self.metadata_file1,
                '--m-metadata-file', self.metadata_file2, '--m-metadata-file',
                self.metadata_artifact, '--m-metadata-category', 'col2',
                '--verbose')

            exp_yaml = "metadata: !metadata '%s:metadata.tsv'" % (
                Artifact.load(self.metadata_artifact).uuid)
            self._assertMetadataOutput(result, exp_tsv='0\tbaz\n',
                                       exp_yaml=exp_yaml)
Example #37
0
    def _assertMetadataOutput(self, result, *, exp_tsv, exp_yaml):
        self.assertEqual(result.exit_code, 0)

        artifact = Artifact.load(self.output_artifact)
        action_dir = artifact._archiver.provenance_dir / 'action'

        if exp_tsv is None:
            self.assertFalse((action_dir / 'metadata.tsv').exists())
        else:
            with (action_dir / 'metadata.tsv').open() as fh:
                self.assertEqual(fh.read(), exp_tsv)

        with (action_dir / 'action.yaml').open() as fh:
            self.assertIn(exp_yaml, fh.read())
Example #38
0
    def test_qza_extension(self):
        qiime_cli = RootCommand()
        command = qiime_cli.get_command(ctx=None, name='dummy-plugin')

        # build output parameter arguments and expected output file names
        left_path = os.path.join(self.tempdir, 'left')
        expected_left_path = os.path.join(self.tempdir, 'left.qza')
        right_path = os.path.join(self.tempdir, 'right')
        expected_right_path = os.path.join(self.tempdir, 'right.qza')

        result = self.runner.invoke(
            command, ['split-ints', '--i-ints', self.artifact1_path,
                      '--o-left', left_path, '--o-right', right_path,
                      '--verbose'])
        # command completes successfully and creates the correct
        # output files
        self.assertEqual(result.exit_code, 0)
        self.assertTrue(os.path.exists(expected_left_path))
        self.assertTrue(os.path.exists(expected_right_path))
        # results are correct
        left = Artifact.load(expected_left_path)
        right = Artifact.load(expected_right_path)
        self.assertEqual(left.view(list), [0])
        self.assertEqual(right.view(list), [42, 43])
Example #39
0
    def test_multiple_metadata(self):
        for command in ('identity-with-metadata',
                        'identity-with-optional-metadata'):
            result = self._run_command(
                command, '--i-ints', self.input_artifact, '--o-out',
                self.output_artifact, '--m-metadata-file',
                self.metadata_file_alt_id_header, '--m-metadata-file',
                self.metadata_file2, '--m-metadata-file',
                self.metadata_artifact, '--verbose')

            exp_tsv = (
                'id\tcol1\tcol2\ta\tb\n'
                '#q2:types\tcategorical\tcategorical\tcategorical\tcategorical'
                '\n0\tfoo\tbaz\tdog\tcat\n'
            )
            exp_yaml = "metadata: !metadata '%s:metadata.tsv'" % (
                Artifact.load(self.metadata_artifact).uuid)
            self._assertMetadataOutput(result, exp_tsv=exp_tsv,
                                       exp_yaml=exp_yaml)