Example #1
0
    def test_min_scores_filter(self):
        'We can keep the hits scores above the given one'
        blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml'))

        #with evalue
        filters = [{'kind': 'score_threshold',
                    'score_key': 'expect',
                    'max_score': 1e-34,
                   }]
        expected = {'cCL1Contig2': 2, 'cCL1Contig3': 0,
                     'cCL1Contig4': 2, 'cCL1Contig5': 2}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        _check_match_summary(match_summary, expected)

        #with similartiry
        filters = [{'kind': 'score_threshold',
                    'score_key': 'similarity',
                    'min_score': 92,
                   }]
        expected = {'cCL1Contig2': 0, 'cCL1Contig3': 0,
                     'cCL1Contig4': 1, 'cCL1Contig5': 2}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        _check_match_summary(match_summary, expected)
Example #2
0
def _do_blast_2(db_fpath,
                queries,
                program,
                dbtype=None,
                blast_format=None,
                params=None,
                remote=False):
    '''It returns an alignment result with the blast.

    It is an alternative interface to the one based on fpaths.
    db_fpath should be a plain sequence file.
    queries should be a SeqRecord list.
    If an alternative blast output format is given it should be tabular, so
    blast_format is a list of fields.
    '''

    query_fhand = write_seqs(queries, file_format='fasta')
    query_fhand.flush()

    if remote:
        blastdb = db_fpath
        fmt = 'XML' if blast_format is None else blast_format.upper()
    else:
        blastdb = get_or_create_blastdb(db_fpath, dbtype=dbtype)
        if blast_format is None:
            blast_format = [
                'query',
                'subject',
                'query_length',
                'subject_length',
                'query_start',
                'query_end',
                'subject_start',
                'subject_end',
                'expect',
                'identity',
            ]
        fmt = generate_tabblast_format(blast_format)

    if params is None:
        params = {}
    params['outfmt'] = fmt

    blast_fhand = tempfile.NamedTemporaryFile(suffix='.blast')
    do_blast(query_fhand.name,
             blastdb,
             program,
             blast_fhand.name,
             params,
             remote=remote)
    if remote:
        blasts = BlastParser(blast_fhand)
    else:
        blasts = TabularBlastParser(blast_fhand, blast_format)

    return blasts, blast_fhand
Example #3
0
 def test_no_filter(self):
     'It test the blast parser'
     blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml'))
     parser = BlastParser(fhand=blast_file)
     match_summary = _summarize_matches(parser)
     #lcl|2_0 cCL1Contig2
     #lcl|3_0 cCL1Contig3
     #lcl|4_0 cCL1Contig4
     #lcl|5_0 cCL1Contig5
     expected = {'cCL1Contig2': 3, 'cCL1Contig3': 1,
                 'cCL1Contig4': 5, 'cCL1Contig5': 8}
     _check_match_summary(match_summary, expected)
Example #4
0
    def test_min_length_filter(self):
        'We can keep the hits length above the given one'
        blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml'))

        #with the min length given in base pairs
        filters = [{'kind': 'min_length',
                    'min_num_residues': 500,
                    'length_in_query': True
                   }]
        expected = {'cCL1Contig2': 3, 'cCL1Contig3': 0,
                     'cCL1Contig4': 1, 'cCL1Contig5': 1}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        _check_match_summary(match_summary, expected)

        #with the min length given in query
        filters = [{'kind': 'min_length',
                    'min_percentage': 70,
                    'length_in_query': True
                   }]
        expected = {'cCL1Contig2': 0, 'cCL1Contig3': 0,
                     'cCL1Contig4': 2, 'cCL1Contig5': 0}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        #print match_summary
        _check_match_summary(match_summary, expected)

        #with the min length given in subject %
        filters = [{'kind': 'min_length',
                    'min_percentage': 0.002,
                    'length_in_query': False
                   }]
        expected = {'cCL1Contig2': 3, 'cCL1Contig3': 0,
                     'cCL1Contig4': 1, 'cCL1Contig5': 2}
        blasts = BlastParser(fhand=blast_file)
        filtered_blasts = filter_alignments(blasts, config=filters)
        match_summary = _summarize_matches(filtered_blasts)
        _check_match_summary(match_summary, expected)
Example #5
0
 def test_best_scores_filter(self):
     'We can keep the hits with the bests expects'
     blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml'))
     filters = [{'kind': 'best_scores',
                 'score_key': 'expect',
                 'max_score': 1e-4,
                 'score_tolerance': 10
                }]
     expected = {'cCL1Contig2': 2, 'cCL1Contig3': 1,
                  'cCL1Contig4': 1, 'cCL1Contig5': 2}
     blasts = BlastParser(fhand=blast_file)
     filtered_blasts = filter_alignments(blasts, config=filters)
     match_summary = _summarize_matches(filtered_blasts)
     _check_match_summary(match_summary, expected)
Example #6
0
    def test_blast_parser(self):
        'It test the blast parser'
        blast_file = open(os.path.join(TEST_DATA_DIR, 'blast.xml'))
        parser = BlastParser(fhand=blast_file)

        expected_results = [
            {'query':{'name':'cCL1Contig2',
                      'description':"<unknown description>",
                      'length':1924},
             'matches':[
                 {'subject':{'name':'chr18',
                             'description':'No definition line found',
                             'length':19691255},
                  'scores':{'expect':4.60533e-35},
                  'match_parts':[{'query_start':276, 'query_end':484,
                                  'query_strand':-1,
                                  'subject_start':477142,
                                  'subject_end':477350,
                                  'subject_strand':1,
                                  'scores':{'expect':    4.60533e-35,
                                            'similarity':84.2,
                                            'identity':  84.2}
                                 }],
                 }
             ]
            },
            {'query':{'name':'cCL1Contig3',
                      'description':"<unknown description>",
                      'length':629},
            },
            {}, {}
        ]
        n_blasts = 0
        for index, blast in enumerate(parser):
            _check_blast(blast, expected_results[index])
            n_blasts += 1
        assert n_blasts == 4

        #with the subject id given in the xml blast
        expected_results = [
            {'query':{'name':'cCL1Contig2',
                      'description':'<unknown description>',
                      'length':1924}}, {}, {}, {}]
        parser = BlastParser(fhand=blast_file)
        for index, blast in enumerate(parser):
            _check_blast(blast, expected_results[index])

        # Check using def as acceion in all the blasts
        #It changes depending on the blast output format. depends on version
        blast_file = open(os.path.join(TEST_DATA_DIR, 'melon_tair.xml'))
        parser = BlastParser(fhand=blast_file)
        assert parser.next()['matches'][0]['subject']['name'] == 'tair1'
Example #7
0
    def test_blast_no_result(self):
        'It test that the xml output can be and empty string'
        blast_file = NamedTemporaryFile()
        blasts = BlastParser(fhand=blast_file)

        filters = [{'kind': 'best_scores',
                    'score_key': 'expect',
                    'max_score': 1e-4,
                    'score_tolerance': 10
                   }]
        filt_b = filter_alignments(blasts, config=filters,)
        try:
            filt_b.next()
            self.fail()
        except StopIteration:
            pass