def setUp(self): """Initialize values to be used in tests """ self.d1 = DNASequence('GATTACA', id="d1") self.d2 = DNASequence('TTG', id="d2") self.d1_lower = DNASequence('gattaca', id="d1") self.d2_lower = DNASequence('ttg', id="d2") self.r1 = RNASequence('GAUUACA', id="r1") self.r2 = RNASequence('UUG', id="r2") self.r3 = RNASequence('U-----UGCC--', id="r3") self.i1 = DNASequence('GATXACA', id="i1") self.seqs1 = [self.d1, self.d2] self.seqs1_lower = [self.d1_lower, self.d2_lower] self.seqs2 = [self.r1, self.r2, self.r3] self.seqs3 = self.seqs1 + self.seqs2 self.seqs1_t = [('d1', 'GATTACA'), ('d2', 'TTG')] self.seqs2_t = [('r1', 'GAUUACA'), ('r2', 'UUG'), ('r3', 'U-----UGCC--')] self.seqs3_t = self.seqs1_t + self.seqs2_t self.s1 = SequenceCollection(self.seqs1) self.s1_lower = SequenceCollection(self.seqs1_lower) self.s2 = SequenceCollection(self.seqs2) self.s3 = SequenceCollection(self.seqs3) self.empty = SequenceCollection([]) self.invalid_s1 = SequenceCollection([self.i1])
def setUp(self): """Setup for Fasta tests.""" self.strings = ['AAAA', 'CCCC', 'gggg', 'uuuu'] self.fasta_no_label = '>0\nAAAA\n>1\nCCCC\n>2\ngggg\n>3\nuuuu' self.fasta_with_label =\ '>1st\nAAAA\n>2nd\nCCCC\n>3rd\nGGGG\n>4th\nUUUU' self.fasta_with_label_lw2 =\ '>1st\nAA\nAA\n>2nd\nCC\nCC\n>3rd\nGG\nGG\n>4th\nUU\nUU' self.alignment_dict = { '1st': 'AAAA', '2nd': 'CCCC', '3rd': 'GGGG', '4th': 'UUUU' } self.sequence_objects_a = [ DNASequence('ACTCGAGATC', 'seq1'), DNASequence('GGCCT', 'seq2') ] self.sequence_objects_b = [ BiologicalSequence('ACTCGAGATC', 'seq1'), BiologicalSequence('GGCCT', 'seq2') ] seqs = [ DNASequence("ACC--G-GGTA..", id="seq1"), DNASequence("TCC--G-GGCA..", id="seqs2") ] self.alignment = Alignment(seqs)
def test_reverse_complement(self): self.assertEqual(self.b1.reverse_complement(), DNASequence("TGTAATC")) self.assertEqual(self.b2.reverse_complement(), DNASequence("GGTACCGGT")) self.assertRaises(BiologicalSequenceError, self.b3.reverse_complement) self.assertEqual(self.b4.reverse_complement(), DNASequence("NVHDBMRSWYK"))
def __call__(self, seq_path, result_path=None, log_path=None, failure_path=None): # load candidate sequences seq_file = open(seq_path, 'U') candidate_sequences = parse_fasta(seq_file) # load template sequences template_alignment = [] template_alignment_fp = self.Params['template_filepath'] for seq_id, seq in parse_fasta(open(template_alignment_fp)): # replace '.' characters with '-' characters template_alignment.append((seq_id, seq.replace('.', '-').upper())) template_alignment = Alignment.from_fasta_records(template_alignment, DNASequence, validate=True) # initialize_logger logger = NastLogger(log_path) # get function for pairwise alignment method pairwise_alignment_f = pairwise_alignment_methods[ self.Params['pairwise_alignment_method']] pynast_aligned, pynast_failed = pynast_seqs( candidate_sequences, template_alignment, min_pct=self.Params['min_pct'], min_len=self.Params['min_len'], align_unaligned_seqs_f=pairwise_alignment_f, logger=logger, temp_dir=get_qiime_temp_dir()) logger.record(str(self)) for i, seq in enumerate(pynast_failed): skb_seq = DNASequence(str(seq), id=seq.Name) pynast_failed[i] = skb_seq pynast_failed = SequenceCollection(pynast_failed) for i, seq in enumerate(pynast_aligned): skb_seq = DNASequence(str(seq), id=seq.Name) pynast_aligned[i] = skb_seq pynast_aligned = Alignment(pynast_aligned) if failure_path is not None: fail_file = open(failure_path, 'w') fail_file.write(pynast_failed.to_fasta()) fail_file.close() if result_path is not None: result_file = open(result_path, 'w') result_file.write(pynast_aligned.to_fasta()) result_file.close() return None else: return pynast_aligned
def test_nondegenerates_gap_mixed_case(self): exp = [ DNASequence('-A.a'), DNASequence('-A.c'), DNASequence('-C.a'), DNASequence('-C.c') ] obs = sorted(DNASequence('-M.m').nondegenerates(), key=str) self.assertEqual(obs, exp)
def test_nondegenerates_mixed_degens(self): exp = [ DNASequence('AGC'), DNASequence('AGT'), DNASequence('GGC'), DNASequence('GGT') ] obs = sorted(DNASequence('RGY').nondegenerates(), key=str) self.assertEqual(obs, exp)
def test_subalignment(self): """subalignment functions as expected """ # keep seqs by ids actual = self.a1.subalignment(seqs_to_keep=['d1', 'd3']) expected = Alignment([self.d1, self.d3]) self.assertEqual(actual, expected) # keep seqs by indices actual = self.a1.subalignment(seqs_to_keep=[0, 2]) expected = Alignment([self.d1, self.d3]) self.assertEqual(actual, expected) # keep seqs by ids (invert) actual = self.a1.subalignment(seqs_to_keep=['d1', 'd3'], invert_seqs_to_keep=True) expected = Alignment([self.d2]) self.assertEqual(actual, expected) # keep seqs by indices (invert) actual = self.a1.subalignment(seqs_to_keep=[0, 2], invert_seqs_to_keep=True) expected = Alignment([self.d2]) self.assertEqual(actual, expected) # keep positions actual = self.a1.subalignment(positions_to_keep=[0, 2, 3]) d1 = DNASequence('.AC', id="d1") d2 = DNASequence('TAC', id="d2") d3 = DNASequence('.AC', id="d3") expected = Alignment([d1, d2, d3]) self.assertEqual(actual, expected) # keep positions (invert) actual = self.a1.subalignment(positions_to_keep=[0, 2, 3], invert_positions_to_keep=True) d1 = DNASequence('.C-GTTGG..', id="d1") d2 = DNASequence('TCGGT-GGCC', id="d2") d3 = DNASequence('-C-GTTGC--', id="d3") expected = Alignment([d1, d2, d3]) self.assertEqual(actual, expected) # keep seqs and positions actual = self.a1.subalignment(seqs_to_keep=[0, 2], positions_to_keep=[0, 2, 3]) d1 = DNASequence('.AC', id="d1") d3 = DNASequence('.AC', id="d3") expected = Alignment([d1, d3]) self.assertEqual(actual, expected) # keep seqs and positions (invert) actual = self.a1.subalignment(seqs_to_keep=[0, 2], positions_to_keep=[0, 2, 3], invert_seqs_to_keep=True, invert_positions_to_keep=True) d2 = DNASequence('TCGGT-GGCC', id="d2") expected = Alignment([d2]) self.assertEqual(actual, expected)
def test_validate_lengths(self): """ """ self.assertTrue(self.a1._validate_lengths()) self.assertTrue(self.a2._validate_lengths()) self.assertTrue(self.empty._validate_lengths()) self.assertTrue( Alignment([DNASequence('TTT', id="d1")])._validate_lengths()) self.assertFalse( Alignment( [DNASequence('TTT', id="d1"), DNASequence('TT', id="d2")])._validate_lengths())
def test_to_phylip(self): """to_phylip functions as expected """ d1 = DNASequence('..ACC-GTTGG..', id="d1") d2 = DNASequence('TTACCGGT-GGCC', id="d2") d3 = DNASequence('.-ACC-GTTGC--', id="d3") a = Alignment([d1, d2, d3]) phylip_str, id_map = a.to_phylip(map_labels=False) self.assertEqual(id_map, {'d1': 'd1', 'd3': 'd3', 'd2': 'd2'}) expected = "\n".join([ "3 13", "d1 ..ACC-GTTGG..", "d2 TTACCGGT-GGCC", "d3 .-ACC-GTTGC--" ]) self.assertEqual(phylip_str, expected)
def test_to_phylip_map_labels(self): """to_phylip functions as expected with label mapping """ d1 = DNASequence('..ACC-GTTGG..', id="d1") d2 = DNASequence('TTACCGGT-GGCC', id="d2") d3 = DNASequence('.-ACC-GTTGC--', id="d3") a = Alignment([d1, d2, d3]) phylip_str, id_map = a.to_phylip(map_labels=True, label_prefix="s") self.assertEqual(id_map, {'s1': 'd1', 's3': 'd3', 's2': 'd2'}) expected = "\n".join([ "3 13", "s1 ..ACC-GTTGG..", "s2 TTACCGGT-GGCC", "s3 .-ACC-GTTGC--" ]) self.assertEqual(phylip_str, expected)
def test_is_valid(self): """is_valid functions as expected """ self.assertTrue(self.a1.is_valid()) self.assertTrue(self.a2.is_valid()) self.assertTrue(self.empty.is_valid()) # invalid because of length mismatch d1 = DNASequence('..ACC-GTTGG..', id="d1") d2 = DNASequence('TTACCGGT-GGC', id="d2") self.assertFalse(Alignment([d1, d2]).is_valid()) # invalid because of invalid charaters d1 = DNASequence('..ACC-GTXGG..', id="d1") d2 = DNASequence('TTACCGGT-GGCC', id="d2") self.assertFalse(Alignment([d1, d2]).is_valid())
def setUp(self): self.d1 = DNASequence('..ACC-GTTGG..', id="d1") self.d2 = DNASequence('TTACCGGT-GGCC', id="d2") self.d3 = DNASequence('.-ACC-GTTGC--', id="d3") self.r1 = RNASequence('UUAU-', id="r1") self.r2 = RNASequence('ACGUU', id="r2") self.seqs1 = [self.d1, self.d2, self.d3] self.seqs2 = [self.r1, self.r2] self.seqs1_t = [('d1', '..ACC-GTTGG..'), ('d2', 'TTACCGGT-GGCC'), ('d3', '.-ACC-GTTGC--')] self.seqs2_t = [('r1', 'UUAU-'), ('r2', 'ACGUU')] self.a1 = Alignment(self.seqs1) self.a2 = Alignment(self.seqs2) self.empty = Alignment([])
def test_majority_consensus(self): """majority_consensus functions as expected """ d1 = DNASequence('TTT', id="d1") d2 = DNASequence('TT-', id="d2") d3 = DNASequence('TC-', id="d3") a1 = Alignment([d1, d2, d3]) self.assertEqual(a1.majority_consensus(), DNASequence('TT-')) d1 = DNASequence('T', id="d1") d2 = DNASequence('A', id="d2") a1 = Alignment([d1, d2]) self.assertTrue(a1.majority_consensus() in [DNASequence('T'), DNASequence('A')]) self.assertEqual(self.empty.majority_consensus(), '')
def test_init_validate(self): """initialization with validation functions as expected """ Alignment(self.seqs1, validate=True) # invalid DNA character invalid_seqs1 = [ self.d1, self.d2, self.d3, DNASequence('.-ACC-GTXGC--', id="i1") ] self.assertRaises(SequenceCollectionError, Alignment, invalid_seqs1, validate=True) # invalid lengths (they're not all equal) invalid_seqs2 = [ self.d1, self.d2, self.d3, DNASequence('.-ACC-GTGC--', id="i2") ] self.assertRaises(SequenceCollectionError, Alignment, invalid_seqs2, validate=True)
def setUp(self): self.empty = DNASequence('') self.b1 = DNASequence('GATTACA') self.b2 = DNASequence('ACCGGTACC', id="test-seq-2", description="A test sequence") self.b3 = DNASequence('ACCGGUACC', id="bad-seq-1", description="Not a DNA sequence") self.b4 = DNASequence('MRWSYKVHDBN', id="degen", description="All of the degenerate bases") self.b5 = DNASequence('.G--ATTAC-A...')
def test_complement(self): self.assertEqual(self.b1.complement(), DNASequence("CTAATGT")) self.assertEqual(self.b2.complement(), DNASequence("TGGCCATGG")) self.assertRaises(BiologicalSequenceError, self.b3.complement) self.assertEqual(self.b4.complement(), DNASequence("KYWSRMBDHVN")) self.assertEqual(self.b5.complement(), DNASequence(".C--TAATG-T..."))
def test_is_reverse_complement(self): self.assertFalse(self.b1.is_reverse_complement(self.b1)) self.assertTrue(self.b1.is_reverse_complement(DNASequence('TGTAATC'))) self.assertTrue( self.b4.is_reverse_complement(DNASequence('NVHDBMRSWYK')))
def test_nondegenerates_invalid(self): with self.assertRaises(BiologicalSequenceError): list(DNASequence('AZA').nondegenerates())
def test_nondegenerates_all_degens(self): # Same chars. exp = [ DNASequence('CC'), DNASequence('CG'), DNASequence('GC'), DNASequence('GG') ] # Sort based on sequence string, as order is not guaranteed. obs = sorted(DNASequence('SS').nondegenerates(), key=str) self.assertEqual(obs, exp) # Different chars. exp = [ DNASequence('AC'), DNASequence('AG'), DNASequence('GC'), DNASequence('GG') ] obs = sorted(DNASequence('RS').nondegenerates(), key=str) self.assertEqual(obs, exp) # Odd number of chars. obs = list(DNASequence('NNN').nondegenerates()) self.assertEqual(len(obs), 4**3)