def test_min_scores_filter(self): 'We can keep the hits scores above the given one' blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml')) #with evalue filters = [{'kind': 'score_threshold', 'score_key': 'expect', 'max_score': 1e-34, }] expected = {'cCL1Contig2': 2, 'cCL1Contig3': 0, 'cCL1Contig4': 2, 'cCL1Contig5': 2} blasts = BlastParser(fhand=blast_file) filtered_blasts = filter_alignments(blasts, config=filters) match_summary = _summarize_matches(filtered_blasts) _check_match_summary(match_summary, expected) #with similartiry filters = [{'kind': 'score_threshold', 'score_key': 'similarity', 'min_score': 92, }] expected = {'cCL1Contig2': 0, 'cCL1Contig3': 0, 'cCL1Contig4': 1, 'cCL1Contig5': 2} blasts = BlastParser(fhand=blast_file) filtered_blasts = filter_alignments(blasts, config=filters) match_summary = _summarize_matches(filtered_blasts) _check_match_summary(match_summary, expected)
def _do_blast_2(db_fpath, queries, program, dbtype=None, blast_format=None, params=None, remote=False): '''It returns an alignment result with the blast. It is an alternative interface to the one based on fpaths. db_fpath should be a plain sequence file. queries should be a SeqRecord list. If an alternative blast output format is given it should be tabular, so blast_format is a list of fields. ''' query_fhand = write_seqs(queries, file_format='fasta') query_fhand.flush() if remote: blastdb = db_fpath fmt = 'XML' if blast_format is None else blast_format.upper() else: blastdb = get_or_create_blastdb(db_fpath, dbtype=dbtype) if blast_format is None: blast_format = [ 'query', 'subject', 'query_length', 'subject_length', 'query_start', 'query_end', 'subject_start', 'subject_end', 'expect', 'identity', ] fmt = generate_tabblast_format(blast_format) if params is None: params = {} params['outfmt'] = fmt blast_fhand = tempfile.NamedTemporaryFile(suffix='.blast') do_blast(query_fhand.name, blastdb, program, blast_fhand.name, params, remote=remote) if remote: blasts = BlastParser(blast_fhand) else: blasts = TabularBlastParser(blast_fhand, blast_format) return blasts, blast_fhand
def test_no_filter(self): 'It test the blast parser' blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml')) parser = BlastParser(fhand=blast_file) match_summary = _summarize_matches(parser) #lcl|2_0 cCL1Contig2 #lcl|3_0 cCL1Contig3 #lcl|4_0 cCL1Contig4 #lcl|5_0 cCL1Contig5 expected = {'cCL1Contig2': 3, 'cCL1Contig3': 1, 'cCL1Contig4': 5, 'cCL1Contig5': 8} _check_match_summary(match_summary, expected)
def test_min_length_filter(self): 'We can keep the hits length above the given one' blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml')) #with the min length given in base pairs filters = [{'kind': 'min_length', 'min_num_residues': 500, 'length_in_query': True }] expected = {'cCL1Contig2': 3, 'cCL1Contig3': 0, 'cCL1Contig4': 1, 'cCL1Contig5': 1} blasts = BlastParser(fhand=blast_file) filtered_blasts = filter_alignments(blasts, config=filters) match_summary = _summarize_matches(filtered_blasts) _check_match_summary(match_summary, expected) #with the min length given in query filters = [{'kind': 'min_length', 'min_percentage': 70, 'length_in_query': True }] expected = {'cCL1Contig2': 0, 'cCL1Contig3': 0, 'cCL1Contig4': 2, 'cCL1Contig5': 0} blasts = BlastParser(fhand=blast_file) filtered_blasts = filter_alignments(blasts, config=filters) match_summary = _summarize_matches(filtered_blasts) #print match_summary _check_match_summary(match_summary, expected) #with the min length given in subject % filters = [{'kind': 'min_length', 'min_percentage': 0.002, 'length_in_query': False }] expected = {'cCL1Contig2': 3, 'cCL1Contig3': 0, 'cCL1Contig4': 1, 'cCL1Contig5': 2} blasts = BlastParser(fhand=blast_file) filtered_blasts = filter_alignments(blasts, config=filters) match_summary = _summarize_matches(filtered_blasts) _check_match_summary(match_summary, expected)
def test_best_scores_filter(self): 'We can keep the hits with the bests expects' blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml')) filters = [{'kind': 'best_scores', 'score_key': 'expect', 'max_score': 1e-4, 'score_tolerance': 10 }] expected = {'cCL1Contig2': 2, 'cCL1Contig3': 1, 'cCL1Contig4': 1, 'cCL1Contig5': 2} blasts = BlastParser(fhand=blast_file) filtered_blasts = filter_alignments(blasts, config=filters) match_summary = _summarize_matches(filtered_blasts) _check_match_summary(match_summary, expected)
def test_blast_parser(self): 'It test the blast parser' blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml')) parser = BlastParser(fhand=blast_file) expected_results = [ {'query':{'name':'cCL1Contig2', 'description':"<unknown description>", 'length':1924}, 'matches':[ {'subject':{'name':'chr18', 'description':'No definition line found', 'length':19691255}, 'scores':{'expect':4.60533e-35}, 'match_parts':[{'query_start':276, 'query_end':484, 'query_strand':-1, 'subject_start':477142, 'subject_end':477350, 'subject_strand':1, 'scores':{'expect': 4.60533e-35, 'similarity':84.2, 'identity': 84.2} }], } ] }, {'query':{'name':'cCL1Contig3', 'description':"<unknown description>", 'length':629}, }, {}, {} ] n_blasts = 0 for index, blast in enumerate(parser): _check_blast(blast, expected_results[index]) n_blasts += 1 assert n_blasts == 4 #with the subject id given in the xml blast expected_results = [ {'query':{'name':'cCL1Contig2', 'description':'<unknown description>', 'length':1924}}, {}, {}, {}] parser = BlastParser(fhand=blast_file) for index, blast in enumerate(parser): _check_blast(blast, expected_results[index]) # Check using def as acceion in all the blasts #It changes depending on the blast output format. depends on version blast_file = open(os.path.join(TEST_DATA_DIR, 'melon_tair.xml')) parser = BlastParser(fhand=blast_file) assert parser.next()['matches'][0]['subject']['name'] == 'tair1'
def test_blast_no_result(self): 'It test that the xml output can be and empty string' blast_file = NamedTemporaryFile() blasts = BlastParser(fhand=blast_file) filters = [{'kind': 'best_scores', 'score_key': 'expect', 'max_score': 1e-4, 'score_tolerance': 10 }] filt_b = filter_alignments(blasts, config=filters,) try: filt_b.next() self.fail() except StopIteration: pass