예제 #1
0
    def test_rna_fasta_format_id_starts_with_space(self):
        filepath = self.get_data_path(
            'dna-sequences-id-starts-with-space.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(ValidationError, '1 starts with a space'):
            format.validate()
예제 #2
0
    def test_rna_fasta_format_consecutive_IDs(self):
        filepath = self.get_data_path('dna-sequences-consecutive-ids.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(
                ValidationError, 'consecutive descriptions.*1'):
            format.validate()
예제 #3
0
    def test_rna_fasta_format_invalid_characters(self):
        filepath = self.get_data_path('not-rna-sequences.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(ValidationError, "Invalid character '1' "
                                                     ".*0 on line 2"):
            format.validate()
예제 #4
0
    def test_rna_fasta_format_empty_file(self):
        filepath = os.path.join(self.temp_dir.name, 'empty')
        with open(filepath, 'w') as fh:
            fh.write('\n')
        format = RNAFASTAFormat(filepath, mode='r')

        format.validate()
예제 #5
0
 def test_cull_seqs_rna_default_params(self):
     # Test default params work with RNA seqs as input
     rna_path = self.get_data_path('cleanseq-test-1-rna.fasta')
     rna_seqs = RNAFASTAFormat(rna_path, mode='r').view(DNAIterator)
     obs = cull_seqs(rna_seqs)
     obs_ids = {seq.metadata['id'] for seq in obs.view(DNAIterator)}
     exp_ids = {'Ambig2', 'cleanseq'}
     self.assertEqual(obs_ids, exp_ids)
예제 #6
0
def get_silva_data(ctx,
                   version='138.1',
                   target='SSURef_NR99',
                   include_species_labels=False,
                   rank_propagation=True,
                   ranks=None,
                   download_sequences=True):
    # download data from SILVA
    print('Downloading raw files may take some time... get some coffee.')
    queries = _assemble_silva_data_urls(version, target, download_sequences)
    results = _retrieve_data_from_silva(queries)
    # parse taxonomy
    parse_taxonomy = ctx.get_action('rescript', 'parse_silva_taxonomy')
    taxonomy, = parse_taxonomy(taxonomy_tree=results['taxonomy tree'],
                               taxonomy_map=results['taxonomy map'],
                               taxonomy_ranks=results['taxonomy ranks'],
                               include_species_labels=include_species_labels,
                               ranks=ranks,
                               rank_propagation=rank_propagation)
    # if skipping sequences, need to output an empty sequence file.
    if not download_sequences:
        results['sequences'] = qiime2.Artifact.import_data(
            'FeatureData[RNASequence]', RNAFASTAFormat())
    return results['sequences'], taxonomy
예제 #7
0
    def test_rna_fasta_format_no_id(self):
        filepath = self.get_data_path('dna-sequences-no-id.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(ValidationError, '1.*missing an ID'):
            format.validate()
예제 #8
0
    def test_rna_fasta_format_duplicate_ids(self):
        filepath = self.get_data_path('rna-sequences-with-duplicate-ids.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(ValidationError, '6.*duplicate.*1'):
            format.validate()
예제 #9
0
    def test_rna_fasta_format_bom_fails(self):
        filepath = self.get_data_path('dna-with-bom-fails.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(ValidationError, 'First line'):
            format.validate()
예제 #10
0
    def test_rna_fasta_format_corrupt_characters(self):
        filepath = self.get_data_path('dna-sequences-corrupt-characters.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(ValidationError, 'utf-8.*2'):
            format.validate()
예제 #11
0
    def test_rna_fasta_format_missing_initial_ID(self):
        filepath = self.get_data_path('dna-sequences-first-line-not-id.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(ValidationError, 'First line'):
            format.validate()
예제 #12
0
    def test_rna_fasta_format_validate_negative(self):
        filepath = self.get_data_path('not-rna-sequences')
        format = RNAFASTAFormat(filepath, mode='r')

        with self.assertRaisesRegex(ValidationError, 'RNAFASTA'):
            format.validate()
예제 #13
0
    def test_rna_fasta_format_bom_passes(self):
        filepath = self.get_data_path('rna-with-bom-passes.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        format.validate()
예제 #14
0
    def test_rna_fasta_format_validate_positive(self):
        filepath = self.get_data_path('rna-sequences.fasta')
        format = RNAFASTAFormat(filepath, mode='r')

        format.validate()