Example #1
0
    def test_format_database_command(self):
        '''Test command to format database made correctly for blast+ and blastall'''
        b_all_nuc = blast.Blast('ref.fasta', 'qry.fasta', blastall=True)
        b_all_pro = blast.Blast('ref.fasta', 'qry.fasta', blast_type='blastp', blastall=True)
        b_plus_nuc = blast.Blast('ref.fasta', 'qry.fasta')
        b_plus_pro = blast.Blast('ref.fasta', 'qry.fasta', blast_type='blastp')

        blastall_nuc = 'formatdb -p F -i ref.fasta'
        blastall_pro = 'formatdb -p T -i ref.fasta'
        blastplus_nuc = 'makeblastdb -dbtype nucl -in ref.fasta'
        blastplus_pro = 'makeblastdb -dbtype prot -in ref.fasta'
        
        self.assertEqual(b_all_nuc.format_database_command(), blastall_nuc)
        self.assertEqual(b_all_pro.format_database_command(), blastall_pro)
        self.assertEqual(b_plus_nuc.format_database_command(), blastplus_nuc)
        self.assertEqual(b_plus_pro.format_database_command(), blastplus_pro)

        query_file = 'tmp.test_blast_db_exists.fa'
        b_blastn = blast.Blast(query_file, 'ref.fasta')
        files = [query_file + '.' + x for x in ['nin', 'nhr', 'nsq']]
        for f in files:
            open(f, 'w').close()
        self.assertEqual(b_blastn.format_database_command(), None)
        for f in files:
            os.unlink(f)
Example #2
0
    def test_make_options_string(self):
        '''Test options set correctly'''
        test_objs = [
            blast.Blast('ref', 'qry'),
            blast.Blast('ref', 'qry', evalue=0.1),
            blast.Blast('ref', 'qry', word_size=42),
            blast.Blast('ref', 'qry', no_filter=True),
            blast.Blast('ref', 'qry', extra_options='-x 1'),
            blast.Blast('ref', 'qry', blast_type='blastp'),
            blast.Blast('ref', 'qry', blastall=True),
            blast.Blast('ref', 'qry', blastall=True, evalue=0.1),
            blast.Blast('ref', 'qry', blastall=True, word_size=42),
            blast.Blast('ref', 'qry', blastall=True, no_filter=True),
        ]

        correct = [
            '-outfmt 6',
            '-outfmt 6 -evalue 0.1',
            '-outfmt 6 -word_size 42',
            '-outfmt 6 -dust no',
            '-outfmt 6 -x 1',
            '-outfmt 6 -seg yes',
            '-m 8',
            '-m 8 -e 0.1',
            '-m 8 -W 42',
            '-m 8 -F F'
        ]

        for i in range(len(correct)):
            self.assertEqual(correct[i], test_objs[i]._make_options_string())
Example #3
0
    def test_make_io_string(self):
        '''Test input/output files string'''
        test_objs = [
            blast.Blast('ref', 'qry'),
            blast.Blast('ref', 'qry', blastall=True),
        ]

        correct = [
            '-db ref -query qry -out blast.out',
            '-d ref -i qry -o blast.out',
        ]
 
        for i in range(len(correct)):
            self.assertEqual(correct[i], test_objs[i]._make_io_string())
Example #4
0
    def test_get_run_command(self):
        '''Test command to run blast made OK'''
        b = blast.Blast('qry.fasta', 'ref.fasta', evalue=0.1)
        expected = ' '.join([
            b._make_blast_type_string(),
            b._make_io_string(),
            b._make_options_string()
        ])

        self.assertEqual(expected, b.get_run_command())
Example #5
0
    def test_blast_db_exists(self):
        '''Test detection or not of blast database'''
        query_file = 'tmp.test_blast_db_exists.fa'
        b_blastn = blast.Blast(query_file, 'ref.fasta')
        b_blastp = blast.Blast(query_file, 'ref.fasta', blast_type='blastp')

        nuc_suffixes = ['nin', 'nhr', 'nsq']
        nuc_suffixes2 = ['00.' + x for x in nuc_suffixes]
        pro_suffixes = ['pin', 'phr', 'psq']
        pro_suffixes2 = ['00.' + x for x in pro_suffixes]

        open(query_file, 'w').close()
        self.assertFalse(b_blastn.blast_db_exists())
        self.assertFalse(b_blastp.blast_db_exists())

        tuples = [
            (nuc_suffixes, b_blastn, b_blastp),
            (nuc_suffixes2, b_blastn, b_blastp),
            (pro_suffixes, b_blastp, b_blastn),
            (pro_suffixes2, b_blastp, b_blastn)
        ]
                  
        for suffixes, blast1, blast2 in tuples:
            print((suffixes, blast1.blast_type, blast2.blast_type))
            for suff in suffixes:
                open(query_file + '.' + suff, 'w').close()

            self.assertTrue(blast1.blast_db_exists())
            self.assertFalse(blast2.blast_db_exists())
     
            for suff in suffixes:
                missing_file = query_file + '.' + suff
                os.unlink(missing_file)
                self.assertFalse(blast1.blast_db_exists())
                open(missing_file, 'w').close()
                
            for suff in suffixes:
                os.unlink(query_file + '.' + suff)
            
        os.unlink(query_file)
Example #6
0
    def __init__(self, options, farm_blast_script):
        if options.outdir is None:
            if options.blastall:
                version = 'blastall'
            else:
                version = 'blast_plus'

            options.outdir = '.'.join([
                'Farm_blast',
                os.path.basename(options.reference),
                os.path.basename(options.query), version, options.blast_type,
                'out'
            ])

        self.outdir = os.path.abspath(options.outdir)
        self.reference = os.path.abspath(options.reference)
        self.query = os.path.abspath(options.query)
        self.bsub_queue = options.bsub_queue
        self.farm_blast_script = farm_blast_script
        self.test = options.test
        self.union_for_act = options.act

        self.blast = blast.Blast(self.reference,
                                 'query.split.INDEX',
                                 outfile='tmp.array.out.INDEX',
                                 blastall=options.blastall,
                                 blast_type=options.blast_type,
                                 evalue=options.evalue,
                                 word_size=options.word_size,
                                 no_filter=options.no_filter,
                                 extra_options=options.blast_options)

        self.setup_script = '01.setup.sh'
        self.start_array_script = '02.run_array.sh'
        self.combine_script = '03.combine.sh'
        if options.bsub_name_prefix is None:
            self.bsub_name_prefix = 'farm_blast:' + self.outdir
        else:
            self.bsub_name_prefix = options.bsub_name_prefix

        if options.no_bsub:
            self.no_bsub = True
            self.memory_units = 'MB'
        else:
            self.no_bsub = False
            self.memory_units = None

        self.debug = options.debug
        self.split_bases_tolerance = options.split_bases_tolerance

        self.files_to_delete = [
            'tmp.array.*',
            'query.split.*',
            'blast.out.tmp.gz',
            '02.array.id',
            '03.combine.sh.id',
        ]

        if not options.blast_mem:
            if self.blast.blastall and self.blast.blast_type == 'tblastx':
                self.array_mem = 5
            else:
                self.array_mem = 0.5

            if self.blast.no_filter:
                self.array_mem *= 2
        else:
            self.array_mem = options.blast_mem

        if not options.split_bases:
            if self.blast.blastall and self.blast.blast_type == 'tblastx':
                self.split_bases = 200000
            else:
                self.split_bases = 500000
        else:
            self.split_bases = options.split_bases
Example #7
0
    def test_make_blast_type_string(self):
        '''Test blast type string'''
        test_objs = [
            (blast.Blast('ref', 'qry'), 'blastn -task blastn'),
            (blast.Blast('ref', 'qry', blast_type='blastn'), 'blastn -task blastn'),
            (blast.Blast('ref', 'qry', blast_type='blastn-short'), 'blastn -task blastn-short'),
            (blast.Blast('ref', 'qry', blast_type='dc-megablast'), 'blastn -task dc-megablast'),
            (blast.Blast('ref', 'qry', blast_type='megablast'), 'blastn -task megablast'),
            (blast.Blast('ref', 'qry', blast_type='rmblastn'), 'blastn -task rmblastn'),
            (blast.Blast('ref', 'qry', blast_type='blastx'), 'blastx'),
            (blast.Blast('ref', 'qry', blast_type='blastp'), 'blastp -task blastp'),
            (blast.Blast('ref', 'qry', blast_type='blastp-short'), 'blastp -task blastp-short'),
            (blast.Blast('ref', 'qry', blast_type='deltablast'), 'blastp -task deltablast'),
            (blast.Blast('ref', 'qry', blast_type='tblastn'), 'tblastn'),
            (blast.Blast('ref', 'qry', blast_type='tblastx'), 'tblastx'),
            (blast.Blast('ref', 'qry', blastall=True), 'blastall -p blastn'),
            (blast.Blast('ref', 'qry', blastall=True, blast_type='blastn'), 'blastall -p blastn'),
            (blast.Blast('ref', 'qry', blastall=True, blast_type='blastx'), 'blastall -p blastx'),
            (blast.Blast('ref', 'qry', blastall=True, blast_type='blastp'), 'blastall -p blastp'),
            (blast.Blast('ref', 'qry', blastall=True, blast_type='tblastn'), 'blastall -p tblastn'),
            (blast.Blast('ref', 'qry', blastall=True, blast_type='tblastx'), 'blastall -p tblastx'),
            (blast.Blast('ref', 'qry', blastall=True, blast_type='megablast'), 'blastall -p blastn -n T')
        ]
        
        for t in test_objs:
            self.assertEqual(t[1], t[0]._make_blast_type_string())

        with self.assertRaises(blast.Error):
            b = blast.Blast('ref', 'qry', blast_type='oops')
Example #8
0
 def test_check_blast_type(self):
     '''Check dies if blast_type not recognised'''
     with self.assertRaises(blast.Error):
         b = blast.Blast('query.fasta', 'ref.fasta', blast_type='oops')