Ejemplo n.º 1
0
def run_infernal(cmfile,
                 rnd,
                 seqs,
                 outfolder,
                 cpus=1,
                 score=0.0,
                 calibrate=False):
    if exists("%s/R%ihits.fna" % (outfolder, rnd)):
        return
    if not exists(cmfile):
        raise IOError("cmfile path provided does not exist: %s" % cmfile)
    params = {
        '--mid': True,
        '--Fmid': 0.0002,
        '--notrunc': True,
        '--toponly': True,
        '--cpu': cpus
    }  # '-g': True,
    if calibrate:
        calibrate_file(cmfile, cpus=cpus)
    result = cmsearch_from_file(cmfile, seqs, RNA, cutoff=score, params=params)
    with open("%s/R%ihits.fna" % (outfolder, rnd), 'w') as fout:
        for hit in result:
            fout.write(">%s score:%0.1f e-val:%f\n%s\n" %
                       (hit[0], hit[14], hit[15], seqs.getSeq(hit[0])))
    if exists("%s/log.txt" % outfolder):
        with open("%s/log.txt" % outfolder, 'a') as fout:
            fout.write("Round %i: %i hits\n" % (rnd, len(result)))
        group = groupinfo[0]
        if group == "fasta_groups":
            continue
        secs = time()
        skip = False
        if exists(otufolder + group + "/R1hits.txt"):
            skip = True
        #only run infernal if there were more than 100 total sequences in group
        if not skip:
            currotufolder = otufolder + group
            #create the cm file and calibrate it
            cmfile = open(currotufolder + "/infernal_" + group + ".cm", 'w')
            cmfile.write(cmbuild_from_file(currotufolder + "/bayesfold-aln.sto"))
            cmfile.close()
            cmfile = currotufolder + "/infernal_" + group + ".cm"
            calibrate_file(cmfile, cpus=args.c)

            #Run all rounds of selection through infernal at once
            #make a pool of workers, one for each cpu available
            manager = Manager()
            lock = manager.Lock()
            #calculate maximal amount of CPU power and theads we can use
            procs = int(floor(args.c/args.r))
            if procs == 0:
                procs = 1
            poolsize = args.r
            if args.c < args.r:
                poolsize = args.c
            pool = Pool(processes=poolsize)
            extracpus = args.c - (procs*poolsize)
            for i in range(args.r, 0, -1):
        log = logfile.readlines()
        logfile.close()
        print ''.join(log)
        seqs = int(log[1].split()[0])
        skip = False
        if exists(otufolder + group + "/R1hits.txt"):
            skip = True
        #only run infernal if there were more than 100 total sequences in group
        if seqs > 99 and not skip:
            currotufolder = otufolder + group
            #create the cm file and calibrate it
            cmfile = open(currotufolder + "/infernal_" + group + ".cm", 'w')
            cmfile.write(cmbuild_from_file(currotufolder + "/locarnap-aln.sto"))
            cmfile.close()
            cmfile = currotufolder + "/infernal_" + group + ".cm"
            calibrate_file(cmfile)

            #Run all rounds of selection through infernal at once
            #make a pool of workers, one for each cpu available
            manager = Manager()
            lock = manager.Lock()
            #calculate maximal amount of CPU power and theads we can use
            procs = int(floor(args.c/args.r))
            if procs == 0:
                procs = 1
            poolsize = args.r
            if args.c < args.r:
                poolsize = args.c
            pool = Pool(processes=poolsize)
            extracpus = args.c - (procs*poolsize)
            for i in range(args.r, 0, -1):