Exemple #1
0
def main():
    # Parse arguments
    options = get_args()

    seq_loc, tax_loc = get_file_loc(options.gene, options.perc_id)

    # Training feature classifiers with q2-feature-classifier
    # https://docs.qiime2.org/2019.1/tutorials/feature-classifier/
    silva_132 = Artifact.import_data('FeatureData[Sequence]', seq_loc )

    silva_132_taxonomy = Artifact.import_data('FeatureData[Taxonomy]', tax_loc,
                                           view_type = 'HeaderlessTSVTaxonomyFormat')
   
    # extract reference reads
    # V3-V4: 341f: CCTACGGGNGGCWGCAG; 806r: GACTACHVGGGTATCTAATCC
    ref_seqs_s = extract_reads(sequences = silva_132,
                           f_primer = options.f_primer,
                           r_primer = options.r_primer)

    # train the classifier
    silva_classifier = fit_classifier_naive_bayes(reference_reads = ref_seqs_s.reads,
                                              reference_taxonomy = silva_132_taxonomy)

    # save the classifier
    silva_classifier.classifier.save("silva132_" + options.perc_id + "_v3v4_" + options.gene)
Exemple #2
0
    def setUp(self):
        self.temp_dir = tempfile.TemporaryDirectory(
            prefix='q2-clawback-test-temp-')

        filterwarnings('ignore', 'The TaxonomicClassifier ', UserWarning)
        self.reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))
        self.taxonomy = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv'))
        classifier = fit_classifier_naive_bayes(self.reads, self.taxonomy)
        self.classifier = classifier.classifier
    def setUp(self):
        super().setUp()

        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))
        taxonomy = Artifact.import_data('FeatureData[Taxonomy]',
                                        self.get_data_path('taxonomy.tsv'))
        classifier = fit_classifier_naive_bayes(reads, taxonomy)
        pipeline = classifier.classifier.view(Pipeline)
        transformer = self.get_transformer(
            Pipeline, TaxonomicClassiferTemporaryPickleDirFmt)
        self._sklp = transformer(pipeline)
        sklearn_pipeline = self._sklp.sklearn_pipeline.view(PickleFormat)
        self.sklearn_pipeline = str(sklearn_pipeline)
    def setUp(self):
        super().setUp()

        reads = Artifact.import_data(
            'FeatureData[Sequence]',
            self.get_data_path('se-dna-sequences.fasta'))
        taxonomy = Artifact.import_data(
            'FeatureData[Taxonomy]', self.get_data_path('taxonomy.tsv'))
        classifier = fit_classifier_naive_bayes(reads, taxonomy)
        pipeline = classifier.classifier.view(Pipeline)
        transformer = self.get_transformer(
            Pipeline, TaxonomicClassiferTemporaryPickleDirFmt)
        self._sklp = transformer(pipeline)
        sklearn_pipeline = self._sklp.sklearn_pipeline.view(PickleFormat)
        self.sklearn_pipeline = str(sklearn_pipeline)
"""
The following system was used to create two niave bayes classifiers
for the two Silva reference datbases. This must
be run inside a qiime2 container an example definition is provided
This is an extremely resource intensive process and can take
in excess of 7 hours, and use 42 GB of ram

The classifiers created during this process have been provided in
the one drive link
https://1drv.ms/u/s!Aus7JUVmM6BTgbsirlMW6ddWK-bn7Q?e=wNHBJD
"""

import qiime2
from qiime2.plugins.feature_classifier.methods import extract_reads, fit_classifier_naive_bayes

# Import the refernce database files
seqs = qiime2.Artifact.import_data("FeatureData[Sequence]",
                                   "99_Silva_111_rep_set.fasta")
taxa = qiime2.Artifact.import_data("FeatureData[Taxonomy]",
                                   "99_Silva_111_taxa_map.txt")
seqs138 = qiime2.Artifact.load("silva-138-99-seqs.qza")
taxa138 = qiime2.Artifact.load("silva-138-99-tax.qza")

#Create and save the classifiers
classifier = fit_classifier_naive_bayes(seqs, taxa, verbose=True)
classifier.classifier.save("Silva_111")
classifier = fit_classifier_naive_bayes(seqs138, taxa138, verbose=True)
classifier.classifier.save("Silva_138")
Exemple #6
0
def train_classifier(q2_refseq, q2_taxa):
    # train the classifier
    clf = methods.fit_classifier_naive_bayes(reference_reads=q2_refseq,
                                             reference_taxonomy=q2_taxa)
    return clf