Esempio n. 1
0
]
while len(spec_to_get) < numspec:
    r = random.choice(possible_spec)
    spec_to_get.append(r)
    possible_spec.remove(r)
print "------"
print "+Getting sequences"
filelist = os.listdir(indir)
numgenes = 0

for each in filelist:
    if each.find(".fa") == -1:
        continue

    infilename = indir + each
    inseqs = core.fastaGetDict(infilename)

    if all(s in inseqs for s in spec_to_get):
        #print "Getting: " + each;
        numgenes = numgenes + 1
        outfilename = outdir + each
        ofile = open(outfilename, "w")
        ofile.write("")
        ofile.close()
        for seq in inseqs:
            if seq in spec_to_get:
                core.writeSeqOL(outfilename, inseqs[seq], seq)

print "Got " + str(numgenes) + " genes."
print "-----------"
Esempio n. 2
0
        continue

    sn = 0
    finalseqs = {}
    for gid in tmpline:
        if sn == 0:
            outfilename = outdir + gid + ".fa"

        specid = gid[:6]
        if gid in main_seq_dict[specid]:
            curseq = main_seq_dict[specid][gid]
            if remstart == 1 and curseq[0] == "M":
                curseq = curseq[1:]

            finalseqs[gid] = curseq

        sn = sn + 1

    if len(finalseqs) == numspec:
        core.filePrep(outfilename, "")
        for title in finalseqs:
            core.writeSeqOL(outfilename, finalseqs[title], ">" + title)

    i = i + 1

pstring = "100.0% complete."
sys.stderr.write('\b' * len(pstring) + pstring)
print "\n# " + core.getTime() + " Done!"
print nonorth, "lines skipped."
print "# ======================================================================="
Esempio n. 3
0
]
while len(spec_to_get) < numspec:
    r = random.choice(possible_spec)
    spec_to_get.append(r)
    possible_spec.remove(r)
print "------"
print "+Getting sequences"
filelist = os.listdir(indir)
numgenes = 0

for each in filelist:
    if each.find(".fa") == -1:
        continue

    infilename = indir + each
    inseqs = core.fastaGetDict(infilename)

    if all(s in inseqs for s in spec_to_get):
        # print "Getting: " + each;
        numgenes = numgenes + 1
        outfilename = outdir + each
        ofile = open(outfilename, "w")
        ofile.write("")
        ofile.close()
        for seq in inseqs:
            if seq in spec_to_get:
                core.writeSeqOL(outfilename, inseqs[seq], seq)

print "Got " + str(numgenes) + " genes."
print "-----------"
Esempio n. 4
0
File: fa_edit.py Progetto: gwct/core
					else:
						for title in inseqs:
							pseqs[title] = pseqs[title] + inseqs[title][x];
				inseqs = pseqs;
			else:
				for title in inseqs:
					inseqs[title] = inseqs[title].replace(r1,r2);		

	writeseqs = {};
	for title in inseqs:
		if seqkeep != [""]:
			if any(s in title for s in seqkeep):
				writeseqs[title] = inseqs[title];
		else:
			writeseqs[title] = inseqs[title];

	if len(writeseqs) == len(seqkeep) or seqkeep == [""]:
		outfile = open(outfilename, "w");
		outfile.write("");
		outfile.close();
		for title in writeseqs:
		# Writes the sequences to the output file.
			core.writeSeqOL(outfilename, writeseqs[title], title);


if not os.path.isfile(ins):
	pstring = "100.0% complete.";
	sys.stderr.write('\b' * len(pstring) + pstring);
print "\n" + core.getTime() + " Done!";
print "==============================================================================================";
Esempio n. 5
0
                                pseqs[title] = pseqs[title] + inseqs[title][x]
                    else:
                        for title in inseqs:
                            pseqs[title] = pseqs[title] + inseqs[title][x]
                inseqs = pseqs
            else:
                for title in inseqs:
                    inseqs[title] = inseqs[title].replace(r1, r2)

    writeseqs = {}
    for title in inseqs:
        if seqkeep != [""]:
            if any(s in title for s in seqkeep):
                writeseqs[title] = inseqs[title]
        else:
            writeseqs[title] = inseqs[title]

    if len(writeseqs) == len(seqkeep) or seqkeep == [""]:
        outfile = open(outfilename, "w")
        outfile.write("")
        outfile.close()
        for title in writeseqs:
            # Writes the sequences to the output file.
            core.writeSeqOL(outfilename, writeseqs[title], title)

if not os.path.isfile(ins):
    pstring = "100.0% complete."
    sys.stderr.write('\b' * len(pstring) + pstring)
print "\n" + core.getTime() + " Done!"
print "=============================================================================================="