Esempio n. 1
0
def run_infernal(cmfile,
                 rnd,
                 seqs,
                 outfolder,
                 cpus=1,
                 score=0.0,
                 calibrate=False):
    if exists("%s/R%ihits.fna" % (outfolder, rnd)):
        return
    if not exists(cmfile):
        raise IOError("cmfile path provided does not exist: %s" % cmfile)
    params = {
        '--mid': True,
        '--Fmid': 0.0002,
        '--notrunc': True,
        '--toponly': True,
        '--cpu': cpus
    }  # '-g': True,
    if calibrate:
        calibrate_file(cmfile, cpus=cpus)
    result = cmsearch_from_file(cmfile, seqs, RNA, cutoff=score, params=params)
    with open("%s/R%ihits.fna" % (outfolder, rnd), 'w') as fout:
        for hit in result:
            fout.write(">%s score:%0.1f e-val:%f\n%s\n" %
                       (hit[0], hit[14], hit[15], seqs.getSeq(hit[0])))
    if exists("%s/log.txt" % outfolder):
        with open("%s/log.txt" % outfolder, 'a') as fout:
            fout.write("Round %i: %i hits\n" % (rnd, len(result)))
Esempio n. 2
0
def run_infernal(lock, cmfile, rnd, basefolder, outfolder, cpus=1, score=0.0, mpi=False):
    try:
        seqs = 0
        #Only search unique sequences to save time
        #check if previous run has removed some sequences, load correct file
        if exists(basefolder + "R" + str(rnd) + "/R" + str(rnd) + "-Unique-Remaining.fasta"):
            seqs = LoadSeqs(basefolder + "R" + str(rnd) + "/R" + str(rnd) + "-Unique-Remaining.fasta", moltype=RNA, aligned=False)
        else:
            seqs = LoadSeqs(basefolder + "R" + str(rnd) + "/R" + str(rnd) + "-Unique.fasta", moltype=RNA, aligned=False)
        params = {'--mid': True, '--Fmid': 0.0002, '--notrunc': True, '--toponly': True, '--cpu': cpus}  # '-g': True,
        if mpi:
            params['mpi'] = True
        result = cmsearch_from_file(cmfile, seqs, RNA, cutoff=score, params=params)
        fout = open(outfolder + "/R" + str(rnd) + "hits.txt", 'w')
        fout.write(str(len(result)) + " hits\nheader,bitscore,e-value\n")
        for hit in result:
            fout.write(hit[0] + "," + str(hit[14]) + "," + str(hit[15]) + "\n")
        fout.close()
        lock.acquire()
        fout = open(outfolder + "/log.txt", 'a')
        fout.write("Round " + str(rnd) + ": " + str(len(result)) + " hits\n")
        fout.close()
        lock.release()
    except Exception, e:
        print str(e)
        lock.release()
Esempio n. 3
0
        alnout = open(otufolder + "/locarnap-aln.sto", "w")
        struct_dict = {"SS_cons": struct}
        alnout.write(stockholm_from_alignment(aln, GC_annotation=struct_dict))
        alnout.close()
        print struct
        # CLUSTER THE SECONDA

        print "Creating CM and running Infernal over all rounds"
        # create the cm file. Could call cmsearch_from_alignment but dont want to build
        # cm file multiple times since is time consuming and processor intensive
        cmfile = cmbuild_from_alignment(aln, struct, calibrate=True)
        for i in range(7, 0, -1):
            # run cmsearch over every round of SELEX
            # Only search unique sequences to save time
            seqs = LoadSeqs(
                "/Users/Ely/Desktop/Ely_selection/R" + str(i) + "/R" + str(i) + "-Unique.fasta",
                moltype=RNA,
                aligned=False,
            )
            score = log2(seqs.getNumSeqs() * len(seqs))
            print "R" + str(i) + " (" + str(score) + "):"
            args = {"--toponly": True}
            result = cmsearch_from_file(cmfile.name, seqs, RNA, cutoff=score, params=args)
            print str(len(result)) + " hits"
            fout = open(otufolder + "/R" + str(i) + "hits.txt", "w")
            fout.write("header,bitscore,e-value\n")
            for hit in result:
                fout.write(hit[0] + "," + str(hit[14]) + "," + str(hit[15]) + "\n")
            fout.close()
            # remove found sequences from the round files
     # cm file multiple times since is time consuming and processor intensive
     cmfile = cmbuild_from_alignment(aln, struct, calibrate=True)
     for i in range(7, 0, -1):
         # run cmsearch over every round of SELEX
         # Only search unique sequences to save time
         seqs = 0
         # check if previous run has removed some sequences, load correct file
         if exists(args.f + "R" + str(i) + "/R" + str(i) + "-Unique-Remaining.fasta"):
             print "Previous round run detected, runnning over remaining seqs"
             seqs = LoadSeqs(
                 args.f + "R" + str(i) + "/R" + str(i) + "-Unique-Remaining.fasta", moltype=RNA, aligned=False
             )
         else:
             seqs = LoadSeqs(args.f + "R" + str(i) + "/R" + str(i) + "-Unique.fasta", moltype=RNA, aligned=False)
         result = cmsearch_from_file(
             cmfile.name, seqs, RNA, cutoff=infernalscore, params={"--toponly": True, "--cpu": args.c}
         )
         print "R" + str(i) + ": " + str(len(result)) + " hits"
         fout = open(currotufolder + "/R" + str(i) + "hits.txt", "w")
         fout.write("header,bitscore,e-value\n")
         for hit in result:
             fout.write(hit[0] + "," + str(hit[14]) + "," + str(hit[15]) + "\n")
         fout.close()
     # clean up by removing cm file
     remove(cmfile.name)
     # remove found sequences from the round files
     print "Runtime: " + str(time() - secs) + "m"
     currgroup += 1
 # skip group if less than 100 sequences
 else:
     print "Group has less than 100 sequences, skipping infernal run"