def main(): # ------ Parse Command line ------ parser = _build_option_parser() (opts, args) = parser.parse_args(sys.argv[1:]) if args: parser.error("Unparsable arguments: %s " % args) seqs = opts.reader.read(opts.fin) if opts.trans_seg: seqs = SeqList([mask_low_complexity(s) for s in seqs]) if opts.subsample is not None: from random import random frac = opts.subsample ss = [] for s in seqs: if random() < frac: ss.append(s) seqs = SeqList(ss) if opts.reverse: seqs = SeqList([s.reverse() for s in seqs]) if opts.complement: seqs = SeqList([Seq(s, alphabet=nucleic_alphabet) for s in seqs]) seqs = SeqList([s.complement() for s in seqs]) opts.writer.write(opts.fout, seqs)
def test_tally(self): # 1234567890123456789012345678 s0 = Seq("ACTTT", nucleic_alphabet) s1 = Seq("ACCCC", nucleic_alphabet) s2 = Seq("GGGG", nucleic_alphabet) seqs = SeqList([s0, s1, s2], nucleic_alphabet) counts = seqs.tally() assert counts == [2, 5, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] seqs = SeqList( [Seq("AAACD", nucleic_alphabet), Seq("AAACD", nucleic_alphabet)]) self.assertRaises(ValueError, seqs.tally)
def test_repr(self): s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet) seqs = SeqList([s0, s1, s2]) repr(seqs)
def test_create(self): # 1234567890123456789012345678 s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet) seqs = SeqList([s0, s1, s2]) self.assertEqual(len(seqs), 3)
def test_profile(self): a = Alphabet("ABCD") s0 = Seq("ABCDD", a) s1 = Seq("AAAAD", a) s2 = Seq("AAABD", a) s3 = Seq("AAACD", a) seqs = SeqList([s0, s1, s2, s3], a) tally = seqs.profile() self.assertEqual(list(tally[0]), [4, 0, 0, 0]) self.assertEqual(list(tally[1]), [3, 1, 0, 0]) self.assertEqual(list(tally[2]), [3, 0, 1, 0]) self.assertEqual(list(tally[3]), [1, 1, 1, 1]) self.assertEqual(list(tally[4]), [0, 0, 0, 4]) self.assertEqual(tally[4, 'D'], 4) seqs = SeqList([Seq("AAACD", a), Seq("AAACDA", a)], a) self.assertRaises(ValueError, seqs.profile) seqs = SeqList([Seq("AAACD", a), Seq("AAACD", a)]) self.assertRaises(ValueError, seqs.profile)
def test_create_annotated(self): s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet) seqs = SeqList([s0, s1, s2], alphabet=nucleic_alphabet, name="alsdf", description='a') self.assertEqual(seqs.name, 'alsdf') self.assertEqual(seqs.description, 'a') self.assertEqual(seqs.alphabet, nucleic_alphabet)
def test_ords(self): s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s1 = Seq("ACGTURYSDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet) seqs = SeqList([s0, s1, s2], nucleic_alphabet) seqs.ords() # self.assertEqual( a.shape, (3, 28) ) # Fails if seqs are of different lengths # FIXME? # s3 = Seq("ACGTUR", nucleic_alphabet ) # seqs2 = SeqList( [ s0,s1,s3,s2], nucleic_alphabet) # self.assertRaises(ValueError, seqs2.ords ) # Use a different alphabet seqs.ords(nucleic_alphabet) # No alphabet seqs3 = SeqList([s0, s1, s2]) seqs3.ords(alphabet=Alphabet("ABC")) # Fail if no alphabet self.assertRaises(ValueError, seqs3.ords)
def test_create_empty(self): s0 = Seq("ACGTURYBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s1 = Seq("ACGTURYSWKMBDHVNACGTURYSWKMBDHVN", nucleic_alphabet) s2 = Seq("ACGTURSWKMBDHVNACGTURKMBDHVN", nucleic_alphabet) seqs = SeqList() seqs.append(s0) seqs.extend((s1, s2)) self.assertEqual(len(seqs), 3) self.assertEqual(type(seqs), SeqList)
def test_isaligned(self): a = Alphabet("ABCD") s0 = Seq("ABCDD", a) s1 = Seq("AAAAD", a) s2 = Seq("AAABD", a) s3 = Seq("AAACD", a) seqs = SeqList([s0, s1, s2, s3], a) assert seqs.isaligned() seqs = SeqList([s0, s1, s2, s3], Alphabet("ABCDE")) assert not seqs.isaligned()