Ejemplo n.º 1
0
def qiime_blast_seqs(seqs,
                     blast_constructor=Blastall,
                     blast_program='blastn',
                     blast_db=None,
                     refseqs=None,
                     refseqs_fp=None,
                     blast_mat_root=None,
                     params={},
                     WorkingDir=None,
                     seqs_per_blast_run=1000,
                     HALT_EXEC=False):
    """Blast list of sequences.

    seqs: a list (or object with list-like interace) of (seq_id, seq) 
     tuples (e.g., the output of MinimalFastaParser)
    
    """
    assert blast_db or refseqs_fp or refseqs, \
     'Must provide either a blast_db or a fasta '+\
     'filepath containing sequences to build one.'

    if refseqs_fp:
        blast_db, db_files_to_remove =\
         build_blast_db_from_fasta_path(refseqs_fp,output_dir=WorkingDir)
    elif refseqs:
        blast_db, db_files_to_remove =\
         build_blast_db_from_fasta_file(refseqs,output_dir=WorkingDir)
    else:
        db_files_to_remove = []

    params["-d"] = blast_db
    params["-p"] = blast_program

    blast_app = blast_constructor(params=params,
                                  blast_mat_root=blast_mat_root,
                                  InputHandler='_input_as_seq_id_seq_pairs',
                                  WorkingDir=WorkingDir,
                                  SuppressStderr=True,
                                  HALT_EXEC=HALT_EXEC)

    current_seqs = []
    blast_results = BlastResult([])
    for seq in seqs:
        current_seqs.append(seq)
        if len(current_seqs) % seqs_per_blast_run == 0:
            if blast_results:
                blast_results.update(\
                 BlastResult(blast_app(current_seqs)['StdOut']))
            else:
                blast_results = BlastResult(blast_app(current_seqs)['StdOut'])
            current_seqs = []

    # clean-up run: blast the remaining sequences
    blast_results.update(\
     BlastResult(blast_app(current_seqs)['StdOut']))

    remove_files(db_files_to_remove)

    return blast_results
Ejemplo n.º 2
0
def blastn(seqs,
           blast_db="nt",
           e_value="1e-20",
           max_hits=200,
           working_dir="/tmp",
           blast_mat_root=None,
           extra_params={}):
    """
    Returns BlastResult from input seqs, using blastn.
    
    Need to add doc string   
    """

    # set up params to use with blastp
    params = {
        # matrix
        "-M": "BLOSUM62",

        # max procs
        "-a": "1",

        # expectation
        "-e": e_value,

        # max seqs to show
        "-b": max_hits,

        # max one line descriptions
        "-v": max_hits,

        # program
        "-p": "blastn"
    }
    params.update(extra_params)

    # blast
    blast_res = blast_seqs(seqs,
                           Blastall,
                           blast_mat_root=blast_mat_root,
                           blast_db=blast_db,
                           params=params,
                           add_seq_names=False,
                           WorkingDir=working_dir)

    # get prot id map
    if blast_res['StdOut']:
        lines = [x for x in blast_res['StdOut']]
        return BlastResult(lines)

    return None
Ejemplo n.º 3
0
 def setUp(self):
     
     self.blast_lines = BLAST_LINES
     self.blast_result=BlastResult(self.blast_lines)
     
     self.subjectdb_fp = get_tmp_filename_as_str(\
                       prefix='ExcludeByBlastTests_',suffix='.fasta')
     self.query_fp = get_tmp_filename_as_str(\
                       prefix='ExcludeByBlastTests_',suffix='.fasta')
     self.query2_fp = get_tmp_filename_as_str(\
                       prefix='ExcludeByBlastTests_',suffix='.fasta')
     
     open(self.subjectdb_fp,"w").writelines(TEST_BLAST_DB_LINES)
     open(self.query_fp,"w").writelines(TEST_BLAST_DB_LINES)
     open(self.query2_fp,"w").writelines(TEST_BLAST_DB2_LINES)
 
     self._paths_to_clean_up = [self.subjectdb_fp,self.query_fp,\
         self.query2_fp]
Ejemplo n.º 4
0
    def setUp(self):

        self.blast_lines = BLAST_LINES
        self.blast_result = BlastResult(self.blast_lines)

        _, self.subjectdb_fp = mkstemp(prefix='ExcludeByBlastTests_',
                                       suffix='.fasta')
        close(_)
        _, self.query_fp = mkstemp(prefix='ExcludeByBlastTests_',
                                   suffix='.fasta')
        close(_)
        _, self.query2_fp = mkstemp(prefix='ExcludeByBlastTests_',
                                    suffix='.fasta')
        close(_)

        open(self.subjectdb_fp, "w").writelines(TEST_BLAST_DB_LINES)
        open(self.query_fp, "w").writelines(TEST_BLAST_DB_LINES)
        open(self.query2_fp, "w").writelines(TEST_BLAST_DB2_LINES)

        self._paths_to_clean_up = [self.subjectdb_fp, self.query_fp,
                                   self.query2_fp]