Example #1
0
 def test_no_labels(self):
     """parse_fasta should return empty list from file w/o seqs"""
     # should fail if strict (the default)
     self.assertRaises(RecordError, list,
                       parse_fasta(self.labels, strict=True))
     # if not strict, should skip the records
     self.assertEqual(list(parse_fasta(self.labels, strict=False)), [])
Example #2
0
 def test_empty(self):
     """parse_fasta should return empty list from 'file' w/o labels
     """
     self.assertEqual(list(parse_fasta(self.empty)), [])
     self.assertEqual(list(parse_fasta(self.nolabels, strict=False)),
                      [])
     self.assertRaises(RecordError, list, parse_fasta(self.nolabels))
Example #3
0
 def test_no_labels(self):
     """parse_fasta should return empty list from file w/o seqs"""
     # should fail if strict (the default)
     self.assertRaises(RecordError, list,
                       parse_fasta(self.labels, strict=True))
     # if not strict, should skip the records
     self.assertEqual(list(parse_fasta(self.labels, strict=False)),
                      [])
Example #4
0
 def test_parse_fasta_ignore_comment(self):
     """parse_fasta correct ignores label comments when requested
     """
     in_ = '>1\nCAG\n>2 some other info\nCCAG\n>3 \nA'.split('\n')
     # ignore_comment = False
     actual = list(parse_fasta(in_))
     expected = [('1', 'CAG'), ('2 some other info', 'CCAG'), ('3', 'A')]
     self.assertEqual(actual, expected)
     # ignore_comment = True
     actual = list(parse_fasta(in_, ignore_comment=True))
     expected = [('1', 'CAG'), ('2', 'CCAG'), ('3', 'A')]
     self.assertEqual(actual, expected)
Example #5
0
 def test_parse_fasta_ignore_comment(self):
     """parse_fasta correct ignores label comments when requested
     """
     in_ = '>1\nCAG\n>2 some other info\nCCAG\n>3 \nA'.split('\n')
     # ignore_comment = False
     actual = list(parse_fasta(in_))
     expected = [('1', 'CAG'), ('2 some other info', 'CCAG'), ('3', 'A')]
     self.assertEqual(actual, expected)
     # ignore_comment = True
     actual = list(parse_fasta(in_, ignore_comment=True))
     expected = [('1', 'CAG'), ('2', 'CCAG'), ('3', 'A')]
     self.assertEqual(actual, expected)
Example #6
0
    def test_single(self):
        """parse_fasta should read single record as (label, seq) tuple
        """
        f = list(parse_fasta(self.oneseq))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ('abc', 'UCAG'))

        f = list(parse_fasta(self.multiline))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ('xyz', 'UUUUCCAAAAAG'))
Example #7
0
    def test_single(self):
        """parse_fasta should read single record as (label, seq) tuple
        """
        f = list(parse_fasta(self.oneseq))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ('abc', 'UCAG'))

        f = list(parse_fasta(self.multiline))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ('xyz', 'UUUUCCAAAAAG'))
Example #8
0
def combine_alignments(fp1, fp2):
    """take two filepointers, combine the files"""
    seqs1 = dict(parse_fasta(fp1))
    seqs2 = dict(parse_fasta(fp2))

    if set(seqs1).intersection(set(seqs2)):
        raise ValueError("Conflicting sequence ids in fp1 and fp2")

    combined = seqs1
    combined.update(seqs2)

    return combined
    def test_generate_lane_mask(self):

        sample_alignment = """>1
        AAAAT
        >2
        AAAGG
        >3
        AACCC
        >4
        A----""".split('\n')
        aln = Alignment.from_fasta_records(parse_fasta(sample_alignment), DNA)

        actual_lanemask = generate_lane_mask(aln, 0.00)
        self.assertEqual(actual_lanemask, "11111")
        actual_lanemask = generate_lane_mask(aln, 0.10)
        self.assertEqual(actual_lanemask, "11100")
        actual_lanemask = generate_lane_mask(aln, 0.20)
        self.assertEqual(actual_lanemask, "11100")
        actual_lanemask = generate_lane_mask(aln, 0.40)
        self.assertEqual(actual_lanemask, "11000")
        actual_lanemask = generate_lane_mask(aln, 0.60)
        self.assertEqual(actual_lanemask, "11000")
        actual_lanemask = generate_lane_mask(aln, 0.80)
        self.assertEqual(actual_lanemask, "10000")
        actual_lanemask = generate_lane_mask(aln, 1.00)
        self.assertEqual(actual_lanemask, "00000")
    def test_generate_lane_mask(self):

        sample_alignment = """>1
        AAAAT
        >2
        AAAGG
        >3
        AACCC
        >4
        A----""".split('\n')
        aln = Alignment.from_fasta_records(parse_fasta(sample_alignment), DNA)

        actual_lanemask = generate_lane_mask(aln, 0.00)
        self.assertEqual(actual_lanemask, "11111")
        actual_lanemask = generate_lane_mask(aln, 0.10)
        self.assertEqual(actual_lanemask, "11100")
        actual_lanemask = generate_lane_mask(aln, 0.20)
        self.assertEqual(actual_lanemask, "11100")
        actual_lanemask = generate_lane_mask(aln, 0.40)
        self.assertEqual(actual_lanemask, "11000")
        actual_lanemask = generate_lane_mask(aln, 0.60)
        self.assertEqual(actual_lanemask, "11000")
        actual_lanemask = generate_lane_mask(aln, 0.80)
        self.assertEqual(actual_lanemask, "10000")
        actual_lanemask = generate_lane_mask(aln, 1.00)
        self.assertEqual(actual_lanemask, "00000")
Example #11
0
    def test_parse_fasta_label_to_name(self):
        exp = [('brofist', 'a'), ('brofist', 'caggac'), ('brofist', 'cg')]

        # the most powerful fasta label converter known to mankind
        obs = list(
            parse_fasta(self.threeseq, label_to_name=lambda _: 'brofist'))

        self.assertEqual(obs, exp)
Example #12
0
 def test_multiple(self):
     """parse_fasta should read multiline records correctly"""
     f = list(parse_fasta(self.threeseq))
     self.assertEqual(len(f), 3)
     a, b, c = f
     self.assertEqual(a, ('123', 'a'))
     self.assertEqual(b, ('abc', 'caggac'))
     self.assertEqual(c, ('456', 'cg'))
Example #13
0
 def test_multiple(self):
     """parse_fasta should read multiline records correctly"""
     f = list(parse_fasta(self.threeseq))
     self.assertEqual(len(f), 3)
     a, b, c = f
     self.assertEqual(a, ('123', 'a'))
     self.assertEqual(b, ('abc', 'caggac'))
     self.assertEqual(c, ('456', 'cg'))
Example #14
0
    def test_parse_fasta_label_to_name(self):
        exp = [('brofist', 'a'), ('brofist', 'caggac'), ('brofist', 'cg')]

        # the most powerful fasta label converter known to mankind
        obs = list(parse_fasta(self.threeseq,
                   label_to_name=lambda _: 'brofist'))

        self.assertEqual(obs, exp)
Example #15
0
    def test_gt_bracket_in_seq(self):
        """parse_fasta handles alternate finder function

            this test also illustrates how to use the parse_fasta
            to handle "sequences" that start with a > symbol, which can
            happen when we abuse the parse_fasta to parse
            fasta-like sequence quality files.
        """
        oneseq_w_gt = '>abc\n>CAG\n'.split('\n')

        def get_two_line_records(infile):
            line1 = None
            for line in infile:
                if line1 is None:
                    line1 = line
                else:
                    yield (line1, line)
                    line1 = None
        f = list(parse_fasta(oneseq_w_gt, finder=get_two_line_records))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ('abc', '>CAG'))
Example #16
0
    def test_gt_bracket_in_seq(self):
        """parse_fasta handles alternate finder function

            this test also illustrates how to use the parse_fasta
            to handle "sequences" that start with a > symbol, which can
            happen when we abuse the parse_fasta to parse
            fasta-like sequence quality files.
        """
        oneseq_w_gt = '>abc\n>CAG\n'.split('\n')

        def get_two_line_records(infile):
            line1 = None
            for line in infile:
                if line1 is None:
                    line1 = line
                else:
                    yield (line1, line)
                    line1 = None

        f = list(parse_fasta(oneseq_w_gt, finder=get_two_line_records))
        self.assertEqual(len(f), 1)
        a = f[0]
        self.assertEqual(a, ('abc', '>CAG'))
Example #17
0
 def test_multiple_bad_strict(self):
     with self.assertRaises(RecordError):
         list(parse_fasta(self.twogood))
Example #18
0
 def test_multiple_bad_not_strict(self):
     f = list(parse_fasta(self.twogood, strict=False))
     self.assertEqual(len(f), 2)
     a, b = f
     self.assertEqual(a, ('abc', 'caggac'))
Example #19
0
 def test_multiple_bad_strict(self):
     with self.assertRaises(RecordError):
         list(parse_fasta(self.twogood))
Example #20
0
 def test_multiple_bad_not_strict(self):
     f = list(parse_fasta(self.twogood, strict=False))
     self.assertEqual(len(f), 2)
     a, b = f
     self.assertEqual(a, ('abc', 'caggac'))
Example #21
0
 def test_empty(self):
     """parse_fasta should return empty list from 'file' w/o labels
     """
     self.assertEqual(list(parse_fasta(self.empty)), [])
     self.assertEqual(list(parse_fasta(self.nolabels, strict=False)), [])
     self.assertRaises(RecordError, list, parse_fasta(self.nolabels))