] while len(spec_to_get) < numspec: r = random.choice(possible_spec) spec_to_get.append(r) possible_spec.remove(r) print "------" print "+Getting sequences" filelist = os.listdir(indir) numgenes = 0 for each in filelist: if each.find(".fa") == -1: continue infilename = indir + each inseqs = core.fastaGetDict(infilename) if all(s in inseqs for s in spec_to_get): #print "Getting: " + each; numgenes = numgenes + 1 outfilename = outdir + each ofile = open(outfilename, "w") ofile.write("") ofile.close() for seq in inseqs: if seq in spec_to_get: core.writeSeqOL(outfilename, inseqs[seq], seq) print "Got " + str(numgenes) + " genes." print "-----------"
continue sn = 0 finalseqs = {} for gid in tmpline: if sn == 0: outfilename = outdir + gid + ".fa" specid = gid[:6] if gid in main_seq_dict[specid]: curseq = main_seq_dict[specid][gid] if remstart == 1 and curseq[0] == "M": curseq = curseq[1:] finalseqs[gid] = curseq sn = sn + 1 if len(finalseqs) == numspec: core.filePrep(outfilename, "") for title in finalseqs: core.writeSeqOL(outfilename, finalseqs[title], ">" + title) i = i + 1 pstring = "100.0% complete." sys.stderr.write('\b' * len(pstring) + pstring) print "\n# " + core.getTime() + " Done!" print nonorth, "lines skipped." print "# ======================================================================="
] while len(spec_to_get) < numspec: r = random.choice(possible_spec) spec_to_get.append(r) possible_spec.remove(r) print "------" print "+Getting sequences" filelist = os.listdir(indir) numgenes = 0 for each in filelist: if each.find(".fa") == -1: continue infilename = indir + each inseqs = core.fastaGetDict(infilename) if all(s in inseqs for s in spec_to_get): # print "Getting: " + each; numgenes = numgenes + 1 outfilename = outdir + each ofile = open(outfilename, "w") ofile.write("") ofile.close() for seq in inseqs: if seq in spec_to_get: core.writeSeqOL(outfilename, inseqs[seq], seq) print "Got " + str(numgenes) + " genes." print "-----------"
else: for title in inseqs: pseqs[title] = pseqs[title] + inseqs[title][x]; inseqs = pseqs; else: for title in inseqs: inseqs[title] = inseqs[title].replace(r1,r2); writeseqs = {}; for title in inseqs: if seqkeep != [""]: if any(s in title for s in seqkeep): writeseqs[title] = inseqs[title]; else: writeseqs[title] = inseqs[title]; if len(writeseqs) == len(seqkeep) or seqkeep == [""]: outfile = open(outfilename, "w"); outfile.write(""); outfile.close(); for title in writeseqs: # Writes the sequences to the output file. core.writeSeqOL(outfilename, writeseqs[title], title); if not os.path.isfile(ins): pstring = "100.0% complete."; sys.stderr.write('\b' * len(pstring) + pstring); print "\n" + core.getTime() + " Done!"; print "==============================================================================================";
pseqs[title] = pseqs[title] + inseqs[title][x] else: for title in inseqs: pseqs[title] = pseqs[title] + inseqs[title][x] inseqs = pseqs else: for title in inseqs: inseqs[title] = inseqs[title].replace(r1, r2) writeseqs = {} for title in inseqs: if seqkeep != [""]: if any(s in title for s in seqkeep): writeseqs[title] = inseqs[title] else: writeseqs[title] = inseqs[title] if len(writeseqs) == len(seqkeep) or seqkeep == [""]: outfile = open(outfilename, "w") outfile.write("") outfile.close() for title in writeseqs: # Writes the sequences to the output file. core.writeSeqOL(outfilename, writeseqs[title], title) if not os.path.isfile(ins): pstring = "100.0% complete." sys.stderr.write('\b' * len(pstring) + pstring) print "\n" + core.getTime() + " Done!" print "=============================================================================================="