예제 #1
0
def getseqlist(peakfilename, size=2000):
    result = []
    peaklist = getpeaklist(peakfilename)
    for faname in falist:
        fa = fafile(fadir + "/" + faname + ".fa")
        for peak in peaklist:
            if not peak[0] == faname: continue
            subseq = fa.getsequence(peak[1], size)
            if not subseq == None: result.append(subseq)
        fa.close()
    return result
예제 #2
0
def extractsubseq(peakname, size=2000):
    fadir = "../GeneData/FA"
    peakfiledir = "../GeneData/GSE11431_RAW"
    seqdir = "../GeneData/SEQ"
    pseqdir = seqdir + "/" + peakname

    peakfilename = peakfiledir + "/" + peakname + ".txt"

    fafiles = {}
    seqfiles = {}

    os.mkdir(pseqdir)
    peakfile = peakpoint(peakfilename)
    chrname, peak = peakfile.getnext()
    while not chrname == None:
        if not chrname in fafiles:
            filename = fadir + "/" + chrname + ".fa"
            fafiles[chrname] = fafile(filename)

        if not chrname in seqfiles:
            seqfilename = pseqdir + "/" + chrname + ".seq"
            seqfiles[chrname] = open(seqfilename, 'w')

        #print "index is: ", peakfile.getindex()
        #print chrname, peak
        subseq = fafiles[chrname].getsequence(peak, size)

        if subseq == None:
            #raw_input()
            pass
        else:
            seqfiles[chrname].write(str(peak) + '\t' + subseq + '\n')

        chrname, peak = peakfile.getnext()

    peakfile.close()
    for fadatafile in fafiles.values():
        fadatafile.close()
    for seqdatafile in seqfiles.values():
        seqdatafile.close()
    return seqfiles.keys()