def test_unaligned_muscle(self): if not functions.which('muscle'): _LOG.warning('muscle not found... skipping tests.') return seq1 = SeqRecord(Seq('AC--GTNAC-TYATR'), id='1') seq2 = SeqRecord(Seq('ACN-GTAAC--CATT'), id='2') # 'ACGTNACTYATR' # 'ACNGTAACCATT' diffs, l = seqstats.get_differences(seq1, seq2, aligned = False, ignore_gaps = False, aligner_tools = ['muscle']) e = {11: ('R', 'T'), 3: ('T', 'G'), 6: ('C', 'A'), 7: ('T', 'C')} self.assertEqual(diffs, e) self.assertEqual(l, 12) seq1 = SeqRecord(Seq('ATCCGT'), id='1') seq2 = SeqRecord(Seq('ACCGT'), id='2') diffs, l = seqstats.get_differences(seq1, seq2, aligned = False, ignore_gaps = True, aligner_tools = ['muscle']) self.assertEqual(diffs, {}) self.assertEqual(l, 5) diffs, l = seqstats.get_differences(seq1, seq2, aligned = False, ignore_gaps = False, aligner_tools = ['muscle']) self.assertEqual(diffs, {1: ('T', '-')}) self.assertEqual(l, 6)
def test_unaligned_muscle(self): if not functions.which('muscle'): _LOG.warning('muscle not found... skipping tests.') return distance_iter = seqstats.sample_distance_iter( seq_iter = self.seqs, sample_size = 2, per_site = False, aligned = False, ignore_gaps = True, aligner_tools = ['muscle']) for i, (seq1, seq2, d, drc) in enumerate(distance_iter): self.assertEqual( self.expected[seq1.id][seq2.id], d) self.assertEqual(i, 5) distance_iter = seqstats.sample_distance_iter( seq_iter = self.seqs, sample_size = 2, per_site = True, aligned = False, ignore_gaps = True, aligner_tools = ['muscle']) for i, (seq1, seq2, d, drc) in enumerate(distance_iter): self.assertAlmostEqual( self.expected[seq1.id][seq2.id] / 4.0, d) self.assertEqual(i, 5)
def test_unaligned_muscle(self): if not functions.which('muscle'): _LOG.warning('muscle not found... skipping tests.') return seq1 = SeqRecord(Seq('AC--GTNAC-TYATR'), id='1') seq2 = SeqRecord(Seq('ACN-GTAAC--CATT'), id='2') # 'ACGTNACTYATR' # 'ACNGTAACCATT' d = seqstats.distance(seq1, seq2, per_site = False, aligned = False, ignore_gaps = False, aligner_tools = ['muscle']) self.assertEqual(d, 4) dps = seqstats.distance(seq1, seq2, per_site = True, aligned = False, ignore_gaps = False, aligner_tools = ['muscle']) self.assertAlmostEqual(dps, 4 / float(12)) seq1 = SeqRecord(Seq('ATCCGT'), id='1') seq2 = SeqRecord(Seq('ACCGT'), id='2') d = seqstats.distance(seq1, seq2, per_site = False, aligned = False, ignore_gaps = True, aligner_tools = ['muscle']) self.assertEqual(d, 0) dps = seqstats.distance(seq1, seq2, per_site = True, aligned = False, ignore_gaps = True, aligner_tools = ['muscle']) self.assertEqual(dps, 0.0) d = seqstats.distance(seq1, seq2, per_site = False, aligned = False, ignore_gaps = False, aligner_tools = ['muscle']) self.assertEqual(d, 1) dps = seqstats.distance(seq1, seq2, per_site = True, aligned = False, ignore_gaps = False, aligner_tools = ['muscle']) self.assertEqual(dps, 1 / float(6))
def test_rev_comp_gappy_muscle(self): if not functions.which('muscle'): _LOG.warning('muscle not found... skipping tests.') return self.rc_seqs = dataio.get_buffered_seq_iter([self.gappy_path]) d, e = seqsum.summarize_distances(self.rc_seqs, sample_size=5, per_site=False, aligned=False, ignore_gaps=True, do_full_alignment=False, aligner_tools=['muscle'], full_aligner_tools=None) self.assertTrue(len(e) < 1) self.assertEqual(len(d), 31)
def __init__(self, exe='muscle', out_path=None, **kwargs): self.__class__.count += 1 self.name = self.__class__.__name__ + '-' + str(self.count) self.exe = functions.which(exe) if not self.exe: raise errors.ExternalToolNotFoundError( 'Cannot find muscle executable') _LOG.debug('{0}: Using exe {1!r}'.format(self.name, self.exe)) self.kwargs = kwargs if ('input' in self.kwargs) or ('out' in self.kwargs): raise ValueError('MuscleAligner does not accept keyword ' 'arguments `input`/`out`.') if out_path: out_path = expand_path(out_path) self.out_path = out_path self.cmd = None
def __init__(self, exe='mafft', out_path=None, **kwargs): self.__class__.count += 1 self.name = self.__class__.__name__ + '-' + str(self.count) self.exe = functions.which(exe) if not self.exe: raise errors.ExternalToolNotFoundError( 'Cannot find mafft executable') _LOG.debug('{0}: Using exe {1!r}'.format(self.name, self.exe)) self.kwargs = kwargs if 'input' in self.kwargs: raise ValueError('MafftAligner does not accept the keyword ' 'argument `input`.') if not self.kwargs: self.kwargs = {'auto': True} self.out_path = out_path self.cmd = None
def __init__(self, exe = 'muscle', out_path = None, **kwargs): self.__class__.count += 1 self.name = self.__class__.__name__ + '-' + str(self.count) self.exe = functions.which(exe) if not self.exe: raise errors.ExternalToolNotFoundError( 'Cannot find muscle executable') _LOG.debug('{0}: Using exe {1!r}'.format(self.name, self.exe)) self.kwargs = kwargs if ('input' in self.kwargs) or ('out' in self.kwargs): raise ValueError('MuscleAligner does not accept keyword ' 'arguments `input`/`out`.') if out_path: out_path = expand_path(out_path) self.out_path = out_path self.cmd = None
def __init__(self, exe = 'mafft', out_path = None, **kwargs): self.__class__.count += 1 self.name = self.__class__.__name__ + '-' + str(self.count) self.exe = functions.which(exe) if not self.exe: raise errors.ExternalToolNotFoundError( 'Cannot find mafft executable') _LOG.debug('{0}: Using exe {1!r}'.format(self.name, self.exe)) self.kwargs = kwargs if 'input' in self.kwargs: raise ValueError('MafftAligner does not accept the keyword ' 'argument `input`.') if not self.kwargs: self.kwargs = {'auto': True} self.out_path = out_path self.cmd = None
def test_full_alignment_muscle(self): if not functions.which('muscle'): _LOG.warning('muscle not found... skipping tests.') return d, e = seqsum.summarize_distances(self.seqs, sample_size=0, per_site=False, aligned=False, ignore_gaps=True, do_full_alignment=True, aligner_tools=['muscle']) self.assertEqual(e, []) self.assertEqual(sorted(d.keys()), sorted(self.expected_means.keys())) for k in iterkeys(d): self.assertEqual(d[k].maximum, self.expected_maxs[k]) self.assertAlmostEqual(d[k].mean, self.expected_means[k])
def test_rev_comp_error_mafft_full(self): if not functions.which('mafft'): _LOG.warning('mafft not found... skipping tests.') return self.rc_seqs = dataio.get_buffered_seq_iter([self.rc_path]) d, e = seqsum.summarize_distances(self.rc_seqs, sample_size=0, per_site=False, aligned=False, ignore_gaps=False, do_full_alignment=True, aligner_tools=['mafft'], full_aligner_tools=['mafft']) self.assertEqual(len(e), 11) for rce in e: self.assertTrue('Homo_sapiens' in rce) self.assertEqual(len(d), 12)