Esempio n. 1
0
    def _get_blast_hits(self, blast_db, seqs):
        """ blast each seq in seqs against blast_db and retain good hits
        """
        max_evalue = self.Params['Max E value']
        min_percent_identity = self.Params['Min percent identity']
        if min_percent_identity < 1.0:
            min_percent_identity *= 100.0
        seq_ids = [s[0] for s in seqs]
        result = {}

        blast_result = blast_seqs(
            seqs, Blastall, blast_db=blast_db,
            params={'-p': 'blastn', '-n': 'T'},
            add_seq_names=False)

        if blast_result['StdOut']:
            lines = [x for x in blast_result['StdOut']]
            blast_result = BlastResult(lines)
        else:
            return {}.fromkeys(seq_ids, [])

        for seq_id in seq_ids:
            blast_result_id = seq_id.split()[0]
            try:
                result[seq_id] = [(e['SUBJECT ID'], float(e['E-VALUE']))
                                  for e in blast_result[blast_result_id][0]
                                  if (float(e['E-VALUE']) <= max_evalue and
                                      float(e['% IDENTITY']) >= min_percent_identity)]
            except KeyError:
                result[seq_id] = []

        return result
Esempio n. 2
0
    def _get_blast_hits(self, blast_db, seqs):
        """ blast each seq in seqs against blast_db and retain good hits
        """
        max_evalue = self.Params['Max E value']
        min_percent_identity = self.Params['Min percent identity']
        if min_percent_identity < 1.0:
            min_percent_identity *= 100.0
        seq_ids = [s[0] for s in seqs]
        result = {}

        blast_result = blast_seqs(
            seqs, Blastall, blast_db=blast_db,
            params={'-p': 'blastn', '-n': 'T'},
            add_seq_names=False)

        if blast_result['StdOut']:
            lines = [x for x in blast_result['StdOut']]
            blast_result = BlastResult(lines)
        else:
            return {}.fromkeys(seq_ids, [])

        for seq_id in seq_ids:
            blast_result_id = seq_id.split()[0]
            try:
                result[seq_id] = [(e['SUBJECT ID'], float(e['E-VALUE']))
                                  for e in blast_result[blast_result_id][0]
                                  if (float(e['E-VALUE']) <= max_evalue and
                                      float(e['% IDENTITY']) >= min_percent_identity)]
            except KeyError:
                result[seq_id] = []

        return result
Esempio n. 3
0
def blast_genome(seqs,
                 blast_db,
                 e_value,
                 max_hits,
                 word_size,
                 working_dir,
                 blast_mat_root,
                 extra_params=[],
                 DEBUG=True):
    """Blast sequences against all genes in a genome

    seqs -- input sequences as strings
    blast_db -- path to blast database
    e_value -- e_value (float)
    max_hits -- maximum sequences detected by BLAST  to show
    word_size -- word size for initial BLAST screen.
    blast_mat_root -- location of BLAST matrix files
    extra_params -- additional paramters to pass to BLAST
    DEBUG -- display verbose debugging outout
    """

    # set up params to use with blastp or
    params = {
        # matrix
        "-M": "BLOSUM62",

        # max procs
        "-a": "1",

        # expectation
        "-e": e_value,

        # max seqs to show
        "-b": max_hits,

        # Word size
        "-W": word_size,

        # max one line descriptions
        "-v": max_hits,

        # tabular output
        "-m": "9",

        # program
        "-p": "blastn"
    }
    params.update(extra_params)

    output = blast_seqs(seqs,
                        Blastall,
                        blast_db=blast_db,
                        params=params,
                        WorkingDir=working_dir,
                        add_seq_names=False,
                        blast_mat_root=blast_mat_root)

    raw_output = [x for x in output['StdOut']]
    return raw_output
def blast_genome(seqs, blast_db, e_value, max_hits, word_size, working_dir,
                 blast_mat_root, extra_params=[], DEBUG=True):
    """Blast sequences against all genes in a genome

    seqs -- input sequences as strings
    blast_db -- path to blast database
    e_value -- e_value (float)
    max_hits -- maximum sequences detected by BLAST  to show
    word_size -- word size for initial BLAST screen.
    blast_mat_root -- location of BLAST matrix files
    extra_params -- additional paramters to pass to BLAST
    DEBUG -- display verbose debugging outout
    """

    # set up params to use with blastp or
    params = {
        # matrix
        "-M": "BLOSUM62",

        # max procs
        "-a": "1",

        # expectation
        "-e": e_value,

        # max seqs to show
        "-b": max_hits,

        # Word size
        "-W": word_size,

        # max one line descriptions
        "-v": max_hits,

        # tabular output
        "-m": "9",

        # program
        "-p": "blastn"
    }
    params.update(extra_params)

    output = blast_seqs(seqs,
                        Blastall,
                        blast_db=blast_db,
                        params=params,
                        WorkingDir=working_dir,
                        add_seq_names=False,
                        blast_mat_root=blast_mat_root)

    raw_output = [x for x in output['StdOut']]
    return raw_output