Example #1
0
def getHits(gene):
    ''' BLAST parser using Biopython
    Input: name of blast out file in standard ouput format
    Outputs: 2 files 
    '''
    inf = open(o.blast, 'rU')
    parser = NCBIStandalone.BlastParser()
    error_parser = NCBIStandalone.BlastErrorParser(inf)
    iterator = NCBIStandalone.Iterator(inf, error_parser)
    err_iterator = NCBIStandalone.Iterator(inf, error_parser)
    #next_record =

    ## *** Parsing *** ##
    lg = len(gene)
    if o.verbose == True:
        sys.stderr.write("\nGetting hits...\n")
    for record in iterator:
        query = record.query.split(" ")[0]

        if query in gene:
            out.write("%s\n" % gene[query])
            if record.alignments is []:
                out.write("%s\tNA\tNA\tNA\n" % gene[query])
            else:
                flag = 0
                for alignment in record.alignments:
                    for hsp in alignment.hsps:
                        #-->## ** Selection Process **##
                        if float(hsp.expect) < 0.0001 and flag < 3:
                            out.write(
                                "%s\t%s\t%s\tHigh\n" %
                                (gene[query], alignment.title.split(">")[1],
                                 float(hsp.expect)))
                            flag += 1
                        elif float(hsp.expect) < 1.0 and flag < 3:
                            out.write(
                                "%s\t%s\t%s\tLow\n" %
                                (gene[query], alignment.title.split(">")[1],
                                 float(hsp.expect)))
                            flag += 1
                        elif float(hsp.expect) < 5.0 and flag < 3:
                            out.write(
                                "%s\t%s\t%s\tScare\n" %
                                (gene[query], alignment.title.split(">")[1],
                                 float(hsp.expect)))
                            flag += 1
                        elif float(hsp.expect) > 1.0 and flag < 1:
                            out.write("%s\tNA\tNA\tNA\n" % gene[query])
                            flag += 1
            del gene[query]
            if o.verbose == True:
                sys.stderr.write('\r' + '' * 0)
                sys.stderr.write(str(int((lg - len(gene)) * 100 / lg)) + '%')
                sys.stdout.flush()
        else:
            pass

    if (lg - len(gene)) != len(gene):
        sys.stderr.write("\nGenes not found:\n%s" % gene.keys())