Esempio n. 1
0
    def test_min_scores_filter(self):
        'We can keep the hits scores above the given one'
        blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml'))

        #with evalue
        filters = [{'kind': 'score_threshold',
                    'score_key': 'expect',
                    'max_score': 1e-34,
                   }]
        expected = {'cCL1Contig2': 2, 'cCL1Contig3': 0,
                     'cCL1Contig4': 2, 'cCL1Contig5': 2}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        _check_match_summary(match_summary, expected)

        #with similartiry
        filters = [{'kind': 'score_threshold',
                    'score_key': 'similarity',
                    'min_score': 92,
                   }]
        expected = {'cCL1Contig2': 0, 'cCL1Contig3': 0,
                     'cCL1Contig4': 1, 'cCL1Contig5': 2}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        _check_match_summary(match_summary, expected)
Esempio n. 2
0
    def _look_for_blast_matches(self, seqrecords, blastdb):
        "it makes the blast and filters the results"
        blasts, blast_fhand = _do_blast_2(blastdb, seqrecords, self.program, params=self.params)
        # print open(blast_fhand.name).read()
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        indexed_match_parts = {}
        for blast in blasts:
            query = blast["query"]
            for match in blast["matches"]:
                subject = match["subject"]
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match["match_parts"],
                        query_length=query["length"],
                        subject_length=subject["length"],
                        align_completely=SUBJECT,
                    )
                match_parts = match["match_parts"]
                try:
                    indexed_match_parts[query["name"]].extend(match_parts)
                except KeyError:
                    indexed_match_parts[query["name"]] = match_parts

        blast_fhand.close()
        return indexed_match_parts
Esempio n. 3
0
    def _look_for_blast_matches(self, seqrecords, blastdb, dbtype):
        "it makes the blast and filters the results"
        blasts, blast_fhand = _do_blast_2(
            blastdb, seqrecords, self.program, params=self.params, dbtype=dbtype, remote=self._remote
        )
        # print open(blast_fhand.name).read()
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        blasts = {blast["query"]["name"]: blast for blast in blasts}
        blast_fhand.close()
        return blasts
Esempio n. 4
0
    def test_min_length_filter(self):
        'We can keep the hits length above the given one'
        blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml'))

        #with the min length given in base pairs
        filters = [{'kind': 'min_length',
                    'min_num_residues': 500,
                    'length_in_query': True
                   }]
        expected = {'cCL1Contig2': 3, 'cCL1Contig3': 0,
                     'cCL1Contig4': 1, 'cCL1Contig5': 1}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        _check_match_summary(match_summary, expected)

        #with the min length given in query
        filters = [{'kind': 'min_length',
                    'min_percentage': 70,
                    'length_in_query': True
                   }]
        expected = {'cCL1Contig2': 0, 'cCL1Contig3': 0,
                     'cCL1Contig4': 2, 'cCL1Contig5': 0}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        #print match_summary
        _check_match_summary(match_summary, expected)

        #with the min length given in subject %
        filters = [{'kind': 'min_length',
                    'min_percentage': 0.002,
                    'length_in_query': False
                   }]
        expected = {'cCL1Contig2': 3, 'cCL1Contig3': 0,
                     'cCL1Contig4': 1, 'cCL1Contig5': 2}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        _check_match_summary(match_summary, expected)
Esempio n. 5
0
 def test_best_scores_filter(self):
     'We can keep the hits with the bests expects'
     blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml'))
     filters = [{'kind': 'best_scores',
                 'score_key': 'expect',
                 'max_score': 1e-4,
                 'score_tolerance': 10
                }]
     expected = {'cCL1Contig2': 2, 'cCL1Contig3': 1,
                  'cCL1Contig4': 1, 'cCL1Contig5': 2}
     blasts = BlastParser(fhand=blast_file)
     filtered_blasts = filter_alignments(blasts, config=filters)
     match_summary = _summarize_matches(filtered_blasts)
     _check_match_summary(match_summary, expected)
Esempio n. 6
0
    def _look_for_blast_matches(self, seqrecords, blastdb, dbtype):
        'it makes the blast and filters the results'
        blasts, blast_fhand = _do_blast_2(blastdb,
                                          seqrecords,
                                          self.program,
                                          params=self.params,
                                          dbtype=dbtype,
                                          remote=self._remote)
        # print open(blast_fhand.name).read()
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        blasts = {blast['query']['name']: blast for blast in blasts}
        blast_fhand.close()
        return blasts
Esempio n. 7
0
    def test_blast_no_result(self):
        'It test that the xml output can be and empty string'
        blast_file = NamedTemporaryFile()
        blasts = BlastParser(fhand=blast_file)

        filters = [{'kind': 'best_scores',
                    'score_key': 'expect',
                    'max_score': 1e-4,
                    'score_tolerance': 10
                   }]
        filt_b = filter_alignments(blasts, config=filters,)
        try:
            filt_b.next()
            self.fail()
        except StopIteration:
            pass
Esempio n. 8
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)],
              open(dbpath, 'w'),
              out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath,
                                          oligos,
                                          params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match['match_parts'],
                        query_length=oligo['length'],
                        subject_length=read['length'],
                        align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Esempio n. 9
0
    def _look_for_blast_matches(self, seq_fpath, oligos):
        "It looks for the oligos in the given sequence files"
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], [open(dbpath, "w")], out_format="fasta", copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params, program=self.program)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        one_oligo = True if len(oligos) == 1 else False
        for blast in blasts:
            oligo = blast["query"]
            for match in blast["matches"]:
                read = match["subject"]
                if self.elongate_for_global:
                    elongate_match_parts_till_global(
                        match["match_parts"],
                        query_length=oligo["length"],
                        subject_length=read["length"],
                        align_completely=QUERY,
                    )

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match["match_parts"]
                if one_oligo:
                    indexed_match_parts[read["name"]] = match_parts
                else:
                    try:
                        indexed_match_parts[read["name"]].extend(match_parts)
                    except KeyError:
                        indexed_match_parts[read["name"]] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Esempio n. 10
0
    def _look_for_blast_matches(self, seq_fpath, oligos, seqs_type):
        'It looks for the oligos in the given sequence files'
        # we need to keep the blast_fhands, because they're temp files and
        # otherwise they might be removed
        temp_dir = TemporaryDir()
        dbpath = os.path.join(temp_dir.name, os.path.basename(seq_fpath))
        seqio([open(seq_fpath)], open(dbpath, 'w'), out_format='fasta',
              copy_if_same_format=False)

        blasts, blast_fhand = _do_blast_2(dbpath, oligos, params=self.params,
                                          program=self.program,
                                          dbtype=seqs_type)
        if self.filters is not None:
            blasts = filter_alignments(blasts, config=self.filters)

        # Which are the regions covered in each sequence?
        indexed_match_parts = {}
        for blast in blasts:
            oligo = blast['query']
            for match in blast['matches']:
                read = match['subject']
                if self.elongate_for_global:
                    elongate_match_parts_till_global(match['match_parts'],
                                                 query_length=oligo['length'],
                                                 subject_length=read['length'],
                                                 align_completely=QUERY)

                # match_parts = [m['match_parts'] for m in blast['matches']]
                match_parts = match['match_parts']
                try:
                    indexed_match_parts[read['name']].extend(match_parts)
                except KeyError:
                    indexed_match_parts[read['name']] = match_parts

        temp_dir.close()
        blast_fhand.close()
        return indexed_match_parts
Esempio n. 11
0
    def test_min_length_mapper(self):
        'We can filter the matches according to their length'
        filter1 = {'kind': 'min_length',
                   'min_num_residues': 100,
                   'length_in_query': True,
                   }

        align1 = {'matches': [{'match_parts':[{'query_start':0,
                                               'query_end':100,
                                               'subject_start':0,
                                               'subject_end':100},
                                              {'query_start':0,
                                               'query_end':50,
                                               'subject_start':0,
                                               'subject_end':50}, ]},
                              {'match_parts':[{'query_start':0, 'query_end':50,
                                               'subject_start':0,
                                               'subject_end':100, }]},
                             ]
                 }
        alignments = [align1]

        filtered_alignments = list(filter_alignments(alignments,
                                                     config=[filter1]))
        expected_align1 = {'matches': [{'start':0, 'end':100,
                                        'subject_start':0,
                                        'subject_end':100, },
                                      ]
                          }
        _check_blast(filtered_alignments[0], expected_align1)
        assert len(filtered_alignments) == 1
        assert len(filtered_alignments[0]['matches'][0]['match_parts']) == 2

        # Now filtering every match part
        filter1 = {'kind': 'min_length',
                   'min_num_residues': 100,
                   'length_in_query': True,
                   'filter_match_parts': True
                   }
        filtered_alignments = list(filter_alignments(alignments,
                                                     config=[filter1]))
        expected_align1 = {'matches': [{'start':0, 'end':100,
                                        'subject_start':0,
                                        'subject_end':100, },
                                      ]
                          }
        _check_blast(filtered_alignments[0], expected_align1)
        assert len(filtered_alignments) == 1
        assert len(filtered_alignments[0]['matches'][0]['match_parts']) == 1

        filter_ = {'kind': 'min_length',
                    'min_num_residues': 100,
                    'length_in_query': False,
                    }
        filtered_alignments = list(filter_alignments(alignments,
                                                     config=[filter_]))
        expected_align1 = {'matches': [{'start':0, 'end':100,
                                        'subject_start':0,
                                        'subject_end':100, },
                                        {'start':0, 'end':50,
                                        'subject_start':0,
                                        'subject_end':100, },
                                      ]
                          }
        _check_blast(filtered_alignments[0], expected_align1)
        assert len(filtered_alignments) == 1

        filter_ = {'kind': 'min_length',
                  'min_percentage': 90,
                  'length_in_query': True,
                 }
        align1 = {'query': {'length': 100},
                  'matches': [{'match_parts':[{'query_start':0, 'query_end':90,
                                               'subject_start':0,
                                               'subject_end':100, }]},
                              {'match_parts':[{'query_start':0, 'query_end':50,
                                               'subject_start':0,
                                               'subject_end':100, }]},
                             ]
                 }
        alignments = [align1]

        filtered_alignments = list(filter_alignments(alignments,
                                                     config=[filter_]))
        expected_align1 = {'matches': [{'start':0, 'end':90,
                                        'subject_start':0,
                                        'subject_end':100, },
                                      ]
                          }
        _check_blast(filtered_alignments[0], expected_align1)
        assert len(filtered_alignments) == 1

        filter_ = {'kind': 'min_length',
                   'min_percentage': 90,
                   'length_in_query': False,
                  }
        align1 = {'matches': [{'subject': {'length': 100},
                               'match_parts':[{'query_start':0,
                                               'query_end':100,
                                               'subject_start':0,
                                               'subject_end':90, }]},
                              {'subject': {'length': 100},
                               'match_parts':[{'query_start':0,
                                               'query_end':100,
                                               'subject_start':0,
                                               'subject_end':89, }]},
                             ]
                 }
        alignments = [align1]

        filtered_alignments = list(filter_alignments(alignments,
                                                     config=[filter_]))
        expected_align1 = {'matches': [{'start':0, 'end':100,
                                        'subject_start':0, 'subject_end':90, },
                                      ]
                          }
        _check_blast(filtered_alignments[0], expected_align1)
        assert len(filtered_alignments) == 1
Esempio n. 12
0
    def test_max_score_mapper(self):
        filter1 = {'kind': 'best_scores',
                   'score_key': 'expect',
                   'max_score': 1e-3
                   }

        align1 = {'matches': [{'scores':{'expect':1e-4},
                               'start':0,
                               'end':100,
                               'subject_start':0,
                               'subject_end':100,
                               'match_parts':[{'scores':{'expect':1e-4},
                                              'query_start':0, 'query_end':10,
                                              'subject_start':0,
                                              'subject_end':10,
                                             },
                                             {'scores':{'expect':5e-4},
                                              'query_start':30, 'query_end':40,
                                              'subject_start':30,
                                              'subject_end':40,
                                             },
                                             {'scores':{'expect':1e-3},
                                              'query_start':50, 'query_end':60,
                                              'subject_start':50,
                                              'subject_end':60,
                                             },
                                             {'scores':{'expect':1e-2},
                                             'query_start':80, 'query_end':100,
                                              'subject_start':80,
                                              'subject_end':100,
                                             }
                                            ],
                               },
                               {'scores':{'expect':1e-3},
                               'match_parts':[{'scores':{'expect':1e-3}}],
                               },
                               {'scores':{'expect':1e-2},
                                'match_parts':[{'scores':{'expect':1e-2}}],
                               }
                             ]
                 }
        align2 = {'matches': [{'scores':{'expect':1e-2},
                               'match_parts':[{'scores':{'expect':1e-2}}],
                              }]}
        alignments = [align1, align2]
        filtered_alignments = list(filter_alignments(alignments,
                                                     config=[filter1]))
        expected_align1 = {'matches': [{'scores':{'expect':1e-4},
                               'start':0,
                               'end':60,
                               'subject_start':0,
                               'subject_end':60,
                               'match_parts':[{'scores':{'expect':1e-4},
                                              'query_start':0, 'query_end':10,
                                              'subject_start':0,
                                              'subject_end':10,
                                             },
                                             {'scores':{'expect':5e-4},
                                              'query_start':30, 'query_end':40,
                                              'subject_start':30,
                                              'subject_end':40,
                                             },
                                             {'scores':{'expect':1e-3},
                                              'query_start':50, 'query_end':60,
                                              'subject_start':50,
                                              'subject_end':60,
                                             },
                                            ],
                               },
                               {'scores':{'expect':1e-3},
                               'match_parts':[{'scores':{'expect':1e-3}}],
                               },
                             ]
                 }
        _check_blast(filtered_alignments[0], expected_align1)
        assert len(filtered_alignments) == 1
Esempio n. 13
0
    def test_min_score_mapper(self):
        'We keep the matches with the scores above the threshold'
        filter1 = {'kind': 'score_threshold',
                   'score_key': 'score',
                   'min_score': 100,
                   }

        align1 = {'matches': [{'scores':{'score':400},
                               'start':0,
                               'end':100,
                               'subject_start':0,
                               'subject_end':100,
                               'match_parts':[{'scores':{'score':400},
                                              'query_start':0, 'query_end':10,
                                              'subject_start':0,
                                              'subject_end':10,
                                             },
                                             {'scores':{'score':300},
                                              'query_start':30, 'query_end':40,
                                              'subject_start':30,
                                              'subject_end':40,
                                             },
                                             {'scores':{'score':50},
                                              'query_start':50, 'query_end':60,
                                              'subject_start':50,
                                              'subject_end':60,
                                             },
                                             {'scores':{'score':40},
                                             'query_start':80, 'query_end':100,
                                              'subject_start':80,
                                              'subject_end':100,
                                             }
                                            ],
                               },
                               {'scores':{'score':20},
                               'match_parts':[{'scores':{'score':20}}],
                               },
                               {'scores':{'score':90},
                                'match_parts':[{'scores':{'score':90}}],
                               }
                             ]
                 }
        align2 = {'matches': [{'scores':{'score':20},
                               'match_parts':[{'scores':{'score':20}}],
                              }]}
        alignments = [align1, align2]
        filtered_alignments = list(filter_alignments(alignments,
                                                     config=[filter1]))
        expected_align1 = {'matches': [{'scores':{'score':400},
                               'start':0,
                               'end':40,
                               'subject_start':0,
                               'subject_end':40,
                               'match_parts':[{'scores':{'score':400},
                                              'query_start':0, 'query_end':10,
                                              'subject_start':0,
                                              'subject_end':10,
                                             },
                                             {'scores':{'score':300},
                                              'query_start':30, 'query_end':40,
                                              'subject_start':30,
                                              'subject_end':40,
                                              },
                                            ],
                               },
                             ]
                 }
        _check_blast(filtered_alignments[0], expected_align1)
        assert len(filtered_alignments) == 1