コード例 #1
0
 def test_autotune_reads_per_batch_more_jobs_than_reads(self):
     self.assertEqual(
         _autotune_reads_per_batch(self.seq_path, n_jobs=1105), 1)
コード例 #2
0
def extract_reads(sequences: DNASequencesDirectoryFormat, f_primer: str,
                  r_primer: str, trunc_len: int = 0, trim_left: int = 0,
                  identity: float = 0.8, min_length: int = 50,
                  max_length: int = 0, n_jobs: int = 1,
                  batch_size: int = 'auto') -> DNAFASTAFormat:
    """Extract the read selected by a primer or primer pair. Only sequences
    which match the primers at greater than the specified identity are returned

    Parameters
    ----------
    sequences : DNASequencesDirectoryFormat
        An aligned list of skbio.sequence.DNA query sequences
    f_primer : skbio.sequence.DNA
        Forward primer sequence
    r_primer : skbio.sequence.DNA
        Reverse primer sequence
    trunc_len : int, optional
        Read is cut to trunc_len if trunc_len is positive. Applied before
        trim_left.
    trim_left : int, optional
        `trim_left` nucleotides are removed from the 5' end if trim_left is
        positive. Applied after trunc_len.
    identity : float, optional
        Minimum combined primer match identity threshold. Default: 0.8
    min_length: int, optional
        Minimum amplicon length. Shorter amplicons are discarded. Default: 50
    max_length: int, optional
        Maximum amplicon length. Longer amplicons are discarded.
    n_jobs: int, optional
        Number of seperate processes to break the task into.
    batch_size: int, optional
        Number of samples to be processed in one batch.
    Returns
    -------
    q2_types.DNAFASTAFormat
        containing the reads
    """
    if min_length > trunc_len - trim_left and trunc_len > 0:
        raise ValueError('The minimum length setting is greater than the '
                         'length of the truncated sequences. This will cause '
                         'all sequences to be removed from the dataset. To '
                         'proceed, set a min_length ≤ trunc_len - trim_left.')
    n_jobs = effective_n_jobs(n_jobs)
    if batch_size == 'auto':
        batch_size = _autotune_reads_per_batch(
            sequences.file.view(DNAFASTAFormat), n_jobs)
    sequences = sequences.file.view(DNAIterator)
    ff = DNAFASTAFormat()
    with open(str(ff), 'a') as fh:
        with Parallel(n_jobs) as parallel:
            for chunk in _chunks(sequences, batch_size):
                amplicons = parallel(delayed(_gen_reads)(sequence, f_primer,
                                                         r_primer, trunc_len,
                                                         trim_left, identity,
                                                         min_length,
                                                         max_length)
                                     for sequence in chunk)
                for amplicon in amplicons:
                    if amplicon is not None:
                        skbio.write(amplicon, format='fasta', into=fh)
    if os.stat(str(ff)).st_size == 0:
        raise RuntimeError("No matches found")
    return ff
コード例 #3
0
 def test_autotune_reads_per_batch_zero_jobs(self):
     with self.assertRaisesRegex(
             ValueError, "Value other than zero must be specified"):
         _autotune_reads_per_batch(self.seq_path, n_jobs=0)
コード例 #4
0
 def test_autotune_reads_per_batch_ceil(self):
     self.assertEqual(
         _autotune_reads_per_batch(self.seq_path, n_jobs=5), 221)
コード例 #5
0
 def test_autotune_reads_per_batch(self):
     self.assertEqual(
         _autotune_reads_per_batch(self.seq_path, n_jobs=4), 276)
コード例 #6
0
 def test_autotune_reads_per_batch_disable_if_single_job(self):
     self.assertEqual(
         _autotune_reads_per_batch(self.seq_path, n_jobs=1), 500000)
コード例 #7
0
 def test_autotune_reads_per_batch_more_jobs_than_reads(self):
     self.assertEqual(
         _autotune_reads_per_batch(self.seq_path, n_jobs=1105), 1)
コード例 #8
0
 def test_autotune_reads_per_batch_ceil(self):
     self.assertEqual(
         _autotune_reads_per_batch(self.seq_path, n_jobs=5), 221)
コード例 #9
0
 def test_autotune_reads_per_batch_zero_jobs(self):
     with self.assertRaisesRegex(
             ValueError, "Value other than zero must be specified"):
         _autotune_reads_per_batch(self.seq_path, n_jobs=0)
コード例 #10
0
 def test_autotune_reads_per_batch_disable_if_single_job(self):
     self.assertEqual(
         _autotune_reads_per_batch(self.seq_path, n_jobs=1), 20000)
コード例 #11
0
 def test_autotune_reads_per_batch(self):
     self.assertEqual(
         _autotune_reads_per_batch(self.seq_path, n_jobs=4), 276)