Ejemplo n.º 1
0
class TestHitList(test.Case):
    def setUp(self):

        super(TestHitList, self).setUp()

        self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)

        self.h1.add_alignment('A-CD', 'A-CD')
        self.h2.add_alignment('BCD', 'BCD')

        self.hits = HHpredHitList([self.h1, self.h2])

    def testLength(self):
        self.assertEqual(len(self.hits), 2)

    def testSort(self):
        hits = HHpredHitList([self.h2, self.h1])

        self.assertEqual(hits[0], self.h2)
        self.assertEqual(hits[1], self.h1)
        hits.sort()
        self.assertEqual(hits[0], self.h1)
        self.assertEqual(hits[1], self.h2)

    def testIterator(self):
        self.assertEqual(list(self.hits), [self.h1, self.h2])
Ejemplo n.º 2
0
class TestHitList(test.Case):

    def setUp(self):
        
        super(TestHitList, self).setUp()
                
        self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)

        self.h1.add_alignment('A-CD', 'A-CD')        
        self.h2.add_alignment('BCD', 'BCD')
        
        self.hits = HHpredHitList([self.h1, self.h2])
        
    def testLength(self):
        self.assertEqual(len(self.hits), 2)
        
    def testSort(self):
        hits = HHpredHitList([self.h2, self.h1])

        self.assertEqual(hits[0], self.h2)
        self.assertEqual(hits[1], self.h1)
        hits.sort()
        self.assertEqual(hits[0], self.h1)
        self.assertEqual(hits[1], self.h2)
        
    def testIterator(self):
        self.assertEqual(list(self.hits), [self.h1, self.h2])
Ejemplo n.º 3
0
    def setUp(self):

        super(TestHitList, self).setUp()

        self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)

        self.h1.add_alignment('A-CD', 'A-CD')
        self.h2.add_alignment('BCD', 'BCD')

        self.hits = HHpredHitList([self.h1, self.h2])
Ejemplo n.º 4
0
    def setUp(self):
        
        super(TestHitList, self).setUp()
                
        self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)

        self.h1.add_alignment('A-CD', 'A-CD')        
        self.h2.add_alignment('BCD', 'BCD')
        
        self.hits = HHpredHitList([self.h1, self.h2])
Ejemplo n.º 5
0
class TestHit(test.Case):

    def setUp(self):
        
        super(TestHit, self).setUp()
                
        self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)
    
    def testEquals(self):
        hit = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.assertTrue(self.h1.equals(hit))
        
    def testSurpasses(self):
        self.assertTrue(self.h1.surpasses(HHpredHit(5, 'hitX', 2, 3, 3, 4, 0.5, 10)))
        self.assertTrue(self.h1.surpasses(HHpredHit(5, 'hitX', 2, 5, 3, 6, 0.3, 10)))
        
    def testIncludes(self):
        self.assertTrue(self.h1.includes(HHpredHit(5, 'hit1', 2, 3, 3, 4, 0.5, 10)))
        self.assertTrue(self.h1.includes(HHpredHit(5, 'hit1', 2, 4, 3, 5, 0.3, 10)))
        self.assertTrue(self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10), tolerance=3))
        self.assertFalse(self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10), tolerance=2))        
        self.assertTrue(self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10), tolerance=1))
        self.assertFalse(self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10), tolerance=0))
        
    def testLength(self):
        self.assertEqual(self.h1.length, 4)
        
    def testAlignment(self):
        
        q, s = 'AB-D', 'A-CD'
        self.h1.add_alignment(q, s)
        
        # query, subject
        self.assertEqual(self.h1.alignment.query, q)
        self.assertEqual(self.h1.alignment.subject, s)
        
        # segments
        segments = list(self.h1.alignment.segments)

        self.assertEqual(len(segments), 2)
        self.assertEqual(segments[0].qstart, 3)
        self.assertEqual(segments[0].qend, 3)
        self.assertEqual(segments[0].start, 2)
        self.assertEqual(segments[0].end, 2)        
        self.assertEqual(segments[1].qstart, 5)
        self.assertEqual(segments[1].qend, 5)
        self.assertEqual(segments[1].start, 4)
        self.assertEqual(segments[1].end, 4)
                        
        # to_a3m
        a3m = self.h1.alignment.to_a3m().format(headers=False)
        self.assertEqual(a3m.strip(), 'ABD\nA-cD')
Ejemplo n.º 6
0
 def testIncludes(self):
     self.assertTrue(
         self.h1.includes(HHpredHit(5, 'hit1', 2, 3, 3, 4, 0.5, 10)))
     self.assertTrue(
         self.h1.includes(HHpredHit(5, 'hit1', 2, 4, 3, 5, 0.3, 10)))
     self.assertTrue(
         self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10),
                          tolerance=3))
     self.assertFalse(
         self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10),
                          tolerance=2))
     self.assertTrue(
         self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10),
                          tolerance=1))
     self.assertFalse(
         self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10),
                          tolerance=0))
Ejemplo n.º 7
0
 def testSurpasses(self):
     self.assertTrue(
         self.h1.surpasses(HHpredHit(5, 'hitX', 2, 3, 3, 4, 0.5, 10)))
     self.assertTrue(
         self.h1.surpasses(HHpredHit(5, 'hitX', 2, 5, 3, 6, 0.3, 10)))
Ejemplo n.º 8
0
 def testEquals(self):
     hit = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
     self.assertTrue(self.h1.equals(hit))
Ejemplo n.º 9
0
    def setUp(self):

        super(TestHit, self).setUp()

        self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)
Ejemplo n.º 10
0
class TestHit(test.Case):
    def setUp(self):

        super(TestHit, self).setUp()

        self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)

    def testEquals(self):
        hit = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
        self.assertTrue(self.h1.equals(hit))

    def testSurpasses(self):
        self.assertTrue(
            self.h1.surpasses(HHpredHit(5, 'hitX', 2, 3, 3, 4, 0.5, 10)))
        self.assertTrue(
            self.h1.surpasses(HHpredHit(5, 'hitX', 2, 5, 3, 6, 0.3, 10)))

    def testIncludes(self):
        self.assertTrue(
            self.h1.includes(HHpredHit(5, 'hit1', 2, 3, 3, 4, 0.5, 10)))
        self.assertTrue(
            self.h1.includes(HHpredHit(5, 'hit1', 2, 4, 3, 5, 0.3, 10)))
        self.assertTrue(
            self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10),
                             tolerance=3))
        self.assertFalse(
            self.h1.includes(HHpredHit(5, 'hit1', 2, 8, 3, 9, 0.3, 10),
                             tolerance=2))
        self.assertTrue(
            self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10),
                             tolerance=1))
        self.assertFalse(
            self.h1.includes(HHpredHit(5, 'hit1', 1, 5, 2, 6, 0.3, 10),
                             tolerance=0))

    def testLength(self):
        self.assertEqual(self.h1.length, 4)

    def testAlignment(self):

        q, s = 'AB-D', 'A-CD'
        self.h1.add_alignment(q, s)

        # query, subject
        self.assertEqual(self.h1.alignment.query, q)
        self.assertEqual(self.h1.alignment.subject, s)

        # segments
        segments = list(self.h1.alignment.segments)

        self.assertEqual(len(segments), 2)
        self.assertEqual(segments[0].qstart, 3)
        self.assertEqual(segments[0].qend, 3)
        self.assertEqual(segments[0].start, 2)
        self.assertEqual(segments[0].end, 2)
        self.assertEqual(segments[1].qstart, 5)
        self.assertEqual(segments[1].qend, 5)
        self.assertEqual(segments[1].start, 4)
        self.assertEqual(segments[1].end, 4)

        # to_a3m
        a3m = self.h1.alignment.to_a3m().format(headers=False)
        self.assertEqual(a3m.strip(), 'ABD\nA-cD')
Ejemplo n.º 11
0
 def setUp(self):
     
     super(TestHit, self).setUp()
             
     self.h1 = HHpredHit(1, 'hit1', 2, 5, 3, 6, 0.5, 10)
     self.h2 = HHpredHit(2, 'hit2', 3, 5, 4, 6, 0.2, 10)
Ejemplo n.º 12
0
    def _parse(self, stream, header_only):

        qlen = None
        in_hits = False
        in_alis = False
        has_alis = False
        c_rank = 0
        header = {}
        hits = {}
        alis = {}

        for line in stream:

            if not in_hits and not in_alis:

                if line.replace(' ', '').startswith('NoHitProbE-value'):
                    in_hits = True
                    continue
                elif line.strip() == '':
                    continue
                else:  # parse header data (stuff above the hits table)
                    columns = line.strip().split(None, 1)
                    if len(columns) == 2:

                        identifier, data = columns
                        if identifier in ('Query', 'Command'):
                            data = data.strip()
                        elif identifier == 'Neff':
                            data = float(data)
                        elif identifier in ('Searched_HMMs', 'Match_columns'):
                            data = int(data)

                        header[identifier] = data

                        if identifier == 'Match_columns':
                            qlen = data

            if in_hits and not header_only:
                if not line.strip():  # suboptimal way to handle block switch
                    in_hits = False
                    in_alis = True
                    if self.alignments:
                        continue
                    else:
                        break
                elif line.strip() == 'Done':
                    in_hits = False
                    in_alis = False
                    break

                description = line[:34].split()
                rank = int(description[0])
                id = description[1]

                pos = line[85:94].strip()
                start, end = map(int, pos.split('-'))

                qpos = line[75:84].strip()
                qstart, qend = map(int, qpos.split('-'))

                probability = float(line[35:40]) / 100.0

                hit = HHpredHit(rank, id, start, end, qstart, qend,
                                probability, qlen)

                hit.evalue = float(line[41:48])
                hit.pvalue = float(line[49:56])
                hit.score = float(line[57:63])
                hit.ss_score = float(line[64:69])

                hit.slength = int(line[94:].replace('(', '').replace(')', ''))

                hits[hit.rank] = hit
                alis[hit.rank] = {'q': [], 's': []}

            elif in_alis and not header_only:
                if line.startswith('Done'):
                    in_alis = False
                    break

                elif line.startswith('No '):
                    c_rank = int(line[3:])
                    if c_rank not in hits:
                        raise HHOutputFormatError(
                            'Alignment {0}. refers to a non-existing hit'.
                            format(c_rank))

                elif line.startswith('>'):
                    hits[c_rank].name = line[1:].strip()

                elif line.startswith('Probab='):
                    for pair in line.split():
                        key, value = pair.split('=')
                        if key == 'Identities':
                            hits[c_rank].identity = float(
                                value.replace('%', ''))
                        elif key == 'Similarity':
                            hits[c_rank].similarity = float(value)
                        elif key == 'Sum_probs':
                            hits[c_rank].prob_sum = float(value)

                elif line.startswith('Q ') and not line[:11].rstrip() in (
                        'Q Consensus', 'Q ss_pred', 'Q ss_conf', 'Q ss_dssp'):
                    for residue in line[22:]:
                        if residue.isspace() or residue.isdigit():
                            break
                        else:
                            alis[c_rank]['q'].append(residue)
                            has_alis = True

                elif line.startswith('T ') and not line[:11].rstrip() in (
                        'T Consensus', 'T ss_pred', 'T ss_conf', 'T ss_dssp'):
                    for residue in line[22:]:
                        if residue.isspace() or residue.isdigit():
                            break
                        else:
                            alis[c_rank]['s'].append(residue)

        if self.alignments and has_alis:
            for rank in alis:
                try:
                    hits[rank].add_alignment(alis[rank]['q'], alis[rank]['s'])

                except (KeyError, ValueError) as er:
                    raise HHOutputFormatError(
                        'Corrupt alignment at hit No {0}.\n {1}'.format(
                            rank, er))

        del alis

        hits = HHpredHitList(hits.values())

        hits.sort()

        ## add data obtained from the header to the HHpredHitList
        for identifier, data in header.items():
            if identifier == 'Query':
                hits.query_name = data
            elif identifier == 'Match_columns':
                hits.match_columns = data
            elif identifier == 'No_of_seqs':
                hits.no_of_seqs = data
            elif identifier == 'Neff':
                hits.neff = data
            elif identifier == 'Searched_HMMs':
                hits.searched_hmms = data
            elif identifier == 'Date':
                hits.date = data
            elif identifier == 'Command':
                hits.command = data

        return hits
Ejemplo n.º 13
0
    def _parse(self, stream, header_only):

        qlen = None
        in_hits = False
        in_alis = False
        has_alis = False
        c_rank = 0
        header = {}
        hits = {}
        alis = {}

        for line in stream:

            if not in_hits and not in_alis:

                if line.replace(' ', '').startswith('NoHitProbE-value'):
                    in_hits = True
                    continue
                elif line.strip() == '':
                    continue
                else:  # parse header data (stuff above the hits table)
                    columns = line.strip().split(None, 1)
                    if len(columns) == 2:

                        identifier, data = columns
                        if identifier in ('Query', 'Command'):
                            data = data.strip()
                        elif identifier == 'Neff':
                            data = float(data)
                        elif identifier in ('Searched_HMMs', 'Match_columns'):
                            data = int(data)

                        header[identifier] = data

                        if identifier == 'Match_columns':
                            qlen = data

            if in_hits and not header_only:
                if not line.strip():  # suboptimal way to handle block switch
                    in_hits = False
                    in_alis = True
                    if self.alignments:
                        continue
                    else:
                        break
                elif line.strip() == 'Done':
                    in_hits = False
                    in_alis = False
                    break

                description = line[:34].split()
                rank = int(description[0]) 
                id = description[1]

                pos = line[85:94].strip()
                start, end = map(int, pos.split('-'))

                qpos = line[75:84].strip()
                qstart, qend = map(int, qpos.split('-'))

                probability = float(line[35:40]) / 100.0

                hit = HHpredHit(rank, id, start, end, qstart, qend, probability, qlen)

                hit.evalue = float(line[41:48])
                hit.pvalue = float(line[49:56])
                hit.score = float(line[57:63])
                hit.ss_score = float(line[64:69])

                hit.slength = int(line[94:].replace('(', '').replace(')', ''))

                hits[hit.rank] = hit
                alis[hit.rank] = {'q': [], 's': []}

            elif in_alis and not header_only:
                if line.startswith('Done'):
                    in_alis = False
                    break
                
                elif line.startswith('No '):
                    c_rank = int(line[3:])
                    if c_rank not in hits:
                        raise HHOutputFormatError('Alignment {0}. refers to a non-existing hit'.format(c_rank))
                    
                elif line.startswith('>'):
                    hits[c_rank].name = line[1:].strip()
                    
                elif line.startswith('Probab='):
                    for pair in line.split():
                        key, value = pair.split('=')
                        if key == 'Identities':
                            hits[c_rank].identity = float(
                                value.replace('%', ''))
                        elif key == 'Similarity':
                            hits[c_rank].similarity = float(value)
                        elif key == 'Sum_probs':
                            hits[c_rank].prob_sum = float(value)
                            
                elif line.startswith('Q ') and not line[:11].rstrip() in ('Q Consensus', 'Q ss_pred','Q ss_conf', 'Q ss_dssp'):
                    for residue in line[22:]:
                        if residue.isspace() or residue.isdigit():
                            break
                        else:
                            alis[c_rank]['q'].append(residue)
                            has_alis = True
                            
                elif line.startswith('T ') and not line[:11].rstrip() in ('T Consensus', 'T ss_pred','T ss_conf', 'T ss_dssp'):
                    for residue in line[22:]:
                        if residue.isspace() or residue.isdigit():
                            break
                        else:
                            alis[c_rank]['s'].append(residue)

        if self.alignments and has_alis:
            for rank in alis:
                try:
                    hits[rank].add_alignment(alis[rank]['q'], alis[rank]['s'])
                    
                except (KeyError, ValueError) as er:
                    raise HHOutputFormatError('Corrupt alignment at hit No {0}.\n {1}'.format(rank, er))

        del alis

        hits = HHpredHitList(hits.values())

        hits.sort()

        ## add data obtained from the header to the HHpredHitList
        for identifier, data in header.items():
            if identifier == 'Query':
                hits.query_name = data
            elif identifier == 'Match_columns':
                hits.match_columns = data
            elif identifier == 'No_of_seqs':
                hits.no_of_seqs = data
            elif identifier == 'Neff':
                hits.neff = data
            elif identifier == 'Searched_HMMs':
                hits.searched_hmms = data
            elif identifier == 'Date':
                hits.date = data
            elif identifier == 'Command':
                hits.command = data

        return hits