예제 #1
0
def calcGC(targetDic, faPath):
    tmpdic = {}
    fa = Fasta.Parse(faPath)
    for chrid in targetDic:
        print chrid
        tmpdic[chrid] = {}
        for item in targetDic[chrid]:
            seq = fa.seq[chrid][targetDic[chrid][item][0]:targetDic[chrid][item][1]].upper()
            gc = (seq.count('G') + seq.count('C') ) / len(seq)
            tmpdic[chrid][item] = gc
    return tmpdic
예제 #2
0
def main():
    try:
        reffaPath = sys.argv[1]
        refbedPath = sys.argv[2]
        prefix = sys.argv[3]
    except:
        print sys.argv[0] + ' [ref fasta path] [ref bed path] [output prefix]'
        sys.exit()

    print 'Import fasta file.'
    fa = Fasta.Parse(reffaPath)
    print 'Import bed file.'
    bedDic = importBed(refbedPath)

    tdic, cdic = {}, {}
    for n in range(1, 11):
        tdic[n] = {'A': 0, 'T': 0, 'C': 0, 'G': 0}
        cdic[n] = {'A': 0, 'T': 0, 'C': 0, 'G': 0}
        tdic[-n] = {'A': 0, 'T': 0, 'C': 0, 'G': 0}
        cdic[-n] = {'A': 0, 'T': 0, 'C': 0, 'G': 0}

    print 'Calculate bases.'
    for item in bedDic:
        seq = fa.seq[bedDic[item][0]][bedDic[item][1]:bedDic[item][2]].upper()
        seqlen = len(seq)
        for pos in range(seqlen - 21):
            k0 = seq[pos:pos + 21]
            if k0[10] in 'CT':
                k = k0
            else:
                k = revCom(k0)
            for n in range(-10, 11):
                if n == 0: continue
                if k[10] == 'T':
                    tdic[n][k[n + 10]] += 1
                elif k[10] == 'C':
                    cdic[n][k[n + 10]] += 1

    print 'Write outputs.'
    savefile = open(prefix + '.C10', 'w')
    for b in 'TGCA':
        savefile.write(b)
        for n in range(-10, 11):
            if n == 0:
                if b == 'C':
                    savefile.write('\t1')
                else:
                    savefile.write('\t0')
            else:
                savefile.write('\t%f' % (calcRatio(cdic[n], b)))
        savefile.write('\n')
    savefile.close()

    savefile = open(prefix + '.T10', 'w')
    for b in 'TGCA':
        savefile.write(b)
        for n in range(-10, 11):
            if n == 0:
                if b == 'T':
                    savefile.write('\t1')
                else:
                    savefile.write('\t0')
            else:
                savefile.write('\t%f' % (calcRatio(tdic[n], b)))
        savefile.write('\n')
    savefile.close()