def getseqlist(peakfilename, size=2000): result = [] peaklist = getpeaklist(peakfilename) for faname in falist: fa = fafile(fadir + "/" + faname + ".fa") for peak in peaklist: if not peak[0] == faname: continue subseq = fa.getsequence(peak[1], size) if not subseq == None: result.append(subseq) fa.close() return result
def extractsubseq(peakname, size=2000): fadir = "../GeneData/FA" peakfiledir = "../GeneData/GSE11431_RAW" seqdir = "../GeneData/SEQ" pseqdir = seqdir + "/" + peakname peakfilename = peakfiledir + "/" + peakname + ".txt" fafiles = {} seqfiles = {} os.mkdir(pseqdir) peakfile = peakpoint(peakfilename) chrname, peak = peakfile.getnext() while not chrname == None: if not chrname in fafiles: filename = fadir + "/" + chrname + ".fa" fafiles[chrname] = fafile(filename) if not chrname in seqfiles: seqfilename = pseqdir + "/" + chrname + ".seq" seqfiles[chrname] = open(seqfilename, 'w') #print "index is: ", peakfile.getindex() #print chrname, peak subseq = fafiles[chrname].getsequence(peak, size) if subseq == None: #raw_input() pass else: seqfiles[chrname].write(str(peak) + '\t' + subseq + '\n') chrname, peak = peakfile.getnext() peakfile.close() for fadatafile in fafiles.values(): fadatafile.close() for seqdatafile in seqfiles.values(): seqdatafile.close() return seqfiles.keys()