예제 #1
0
def pwmmaker():
    for runnum in range(1, numofruns+1):
        for i in hamdict[runnum]:
            for n in range(1, nmotifs+1):
                for x in hamdict[runnum][i][n]:
                    kmer = hash2kmer((x),i)
                    for j in range(1,i+1):
                        if kmer[j-1] == "A":
                            pwm[runnum][i][n][j]["A"] += hamdict[runnum][i][n][x]
                        elif kmer[j-1] == "C":
                            pwm[runnum][i][n][j]["C"] += hamdict[runnum][i][n][x]
                        elif kmer[j-1] == "T":
                            pwm[runnum][i][n][j]["T"] += hamdict[runnum][i][n][x]
                        elif kmer[j-1] == "G":
                            pwm[runnum][i][n][j]["G"] += hamdict[runnum][i][n][x]
                for x in rhamdict[runnum][i][n]:
                    kmer = revComp(hash2kmer((x),i))
                    for j in range(1,i+1):
                        if kmer[j-1] == "A":
                            pwm[runnum][i][n][j]["A"] += rhamdict[runnum][i][n][x]
                        elif kmer[j-1] == "C":
                            pwm[runnum][i][n][j]["C"] += rhamdict[runnum][i][n][x]
                        elif kmer[j-1] == "T":
                            pwm[runnum][i][n][j]["T"] += rhamdict[runnum][i][n][x]
                        elif kmer[j-1] == "G":
                            pwm[runnum][i][n][j]["G"] += rhamdict[runnum][i][n][x]
예제 #2
0
def makeyaxis2c(i, k):
    yaxis2c = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    top6s = top6(k)
    for l in range(1, (len(kmercount))):
        total = sum(kmercount[l][k].values())
        if i in top6s:
            num = kmercount[l][k][i]
            rnum = kmercount[l][k][rkmer]
            yaxis2c.append((num + rnum) / total)
    return yaxis2c
예제 #3
0
def makeyaxis2b(i, k):
    yaxis2b = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    colours = colours1(k)
    for l in range(1, (len(kmercount))):
        total = sum(kmercount[l][k].values())
        if i in colours:
            num = kmercount[l][k][i]
            rnum = kmercount[l][k][rkmer]
            yaxis2b.append((num + rnum) / total)
    return yaxis2b
예제 #4
0
def listhammer():
    for runnum in range(1, numofruns+1):
        for i in kmercount[runnum]:
            hamlist[runnum][i] = {}
            rhamlist[runnum][i] = {}
            for n in range(1, nmotifs+1):
                hamlist[runnum][i][n] = []
                rhamlist[runnum][i][n] = []
                if n == 1:
                    hconsensus = max(kmercount[runnum][i], key=lambda key: kmercount[runnum][i][key])
                else:
                    temptop6 = top6all[runnum][i].copy()
                    for x in temptop6:
                        if x in removelist[runnum][i][n]:
                            temptop6.remove(x)
                            temptop6.remove(kmer2hash(revComp(hash2kmer(x, i))))
                    hconsensus = max(temptop6, key=lambda key: kmercount[runnum][i][key])
                consensus = hash2kmer(hconsensus, i)
                for x in list(kmercount[runnum][i].keys()):
                    values = hash2kmer(x,i)
                    if hamming_distance(consensus, values) <= allowham:
                        if x not in removelist[runnum][i][n]:
                            rvalue = revComp(values)
                            rx = kmer2hash(rvalue)
                            try:
                                if kmercount[runnum][i][x] >= kmercount[runnum][i][rx]:
                                    hamlist[runnum][i][n].append(x)
                                    for j in range(n+1, nmotifs+1):
                                        removelist[runnum][i][j].add(x)
                                        removelist[runnum][i][j].add(rx)
                                    if x != rx:
                                        rhamlist[runnum][i][n].append(rx)
                                        if n != nmotifs:
                                            for j in range(n+1, nmotifs+1):
                                                removelist[runnum][i][j].add(x)
                                                removelist[runnum][i][j].add(rx)
                            except:
                                hamlist[runnum][i][n].append(x)
                                for j in range(n+1, nmotifs+1):
                                    removelist[runnum][i][j].add(x)
                                    removelist[runnum][i][j].add(rx)
예제 #5
0
def makeyaxis1c(i, k):
    yaxis1c = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    top6s = top6(k)
    for l in range(1, (len(kmercount))):
        total = sum(kmercount[l][k].values())
        if i in top6s:
            num = kmercount[l][k][i]
            rnum = kmercount[l][k][rkmer]
            f = (num + rnum) / total
            freq = (f / (1 - f))
            yaxis1c.append(math.log10(freq))
    return yaxis1c
예제 #6
0
def makeyaxis2b(i, k):
    yaxis2b = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    for l in range(1, numofruns + 1):
        total = totaldict[l][k]
        if i in colours[k]:
            try:
                num = kmercount[l][k][i]
            except:
                num = 0
            try:
                rnum = kmercount[l][k][rkmer]
            except:
                rnum = 0
            yaxis2b.append((num + rnum) / total)
    return yaxis2b
예제 #7
0
def top6(k):
    top6s = []
    keys = list(kmercount[(len(kmercount) - 1)][k].keys())
    topvalpos = 0
    while len(top6s) <= 11:
        next = keys[topvalpos]
        nkmer = hash2kmer(next, k)
        nrkmer = revComp(nkmer)
        nhrkmer = kmer2hash(nrkmer)
        if next not in top6s:
            top6s.append(next)
        if nhrkmer not in top6s:
            top6s.append(nhrkmer)

        topvalpos += 1
    return top6s
예제 #8
0
def makeyaxis2c(i, k):
    yaxis2c = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    top6s = top6all[numofruns][k]
    for l in range(1, numofruns + 1):
        total = totaldict[l][k]
        if i in top6s:
            try:
                num = kmercount[l][k][i]
            except:
                num = 0
            try:
                rnum = kmercount[l][k][rkmer]
            except:
                rnum = 0
            yaxis2c.append((num + rnum) / total)
    return yaxis2c
예제 #9
0
def makeyaxis1b(i, k):
    yaxis1b = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    for l in range(1, numofruns + 1):
        total = totaldict[l][k]
        if i in colours[k]:
            try:
                num = kmercount[l][k][i]
            except:
                num = 0
            try:
                rnum = kmercount[l][k][rkmer]
            except:
                rnum = 0
            f = (num + rnum) / total
            freq = (f / (1 - f))
            yaxis1b.append(math.log10(freq))
    return yaxis1b
예제 #10
0
def makeyaxis1c(i, k):
    yaxis1c = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    top6s = top6all[numofruns][k]
    for l in range(1, numofruns + 1):
        total = totaldict[l][k]
        if i in top6s:
            try:
                num = kmercount[l][k][i]
            except:
                num = 0
            try:
                rnum = kmercount[l][k][rkmer]
            except:
                rnum = 0
            f = (num + rnum) / total
            freq = (f / (1 - f))
            yaxis1c.append(math.log10(freq))
    return yaxis1c
예제 #11
0
def grapher(k):

    xaxis = makexaxis()
    last = (len(xaxis) - 1)
    top6s = top6all[numofruns][k]

    fig = plt.figure(figsize=(10, 10))
    grid = plt.GridSpec(2, 3, wspace=0.4, hspace=0.3)

    top = fig.add_subplot(grid[:-1, :])
    top.set_xlabel("SELEX round")
    top.set_ylabel("log(f/(1-f))")
    top.set_title("Kmer frequency" + ', K: ' + str(k))
    top.set_xlim([((startround - 1) - 0.5), (numofruns + startround)])
    top.set_xticks(
        np.linspace((startround - 1) - 0.5, (numofruns + startround),
                    num=((2 * (numofruns + 1)) + 2),
                    endpoint=True))

    bottom = fig.add_subplot(grid[-1, :-1])
    bottom.set_xlabel("SELEX round")
    bottom.set_ylabel("f = (kmer/total)")
    bottom.set_title("Kmer frequency")
    bottom.set_xlim([((startround - 1) - 0.5), (numofruns + (startround - 1))])
    bottom.set_xticks(range((startround - 1), (numofruns + 1)))

    bar = fig.add_subplot(grid[-1, -1:])
    bar.set_xlabel("SELEX round")
    bar.set_ylabel("Total kmers")
    bar.set_title("Kmer total distribution")
    bar.set_xticks(makexaxis())

    colourslist = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7']

    if revcompwanted == False:
        for l in range(1, (numofruns + 1)):
            for i in kmercount[l][k]:
                num = kmercount[l][k][i]
                rkmer = kmer2hash(revComp(hash2kmer(i, k)))
                try:
                    rnum = kmercount[l][k][rkmer]
                    if num > rnum:
                        if i in top6s:
                            yaxis1c = makeyaxis1c(i, k)
                            top.plot(xaxis,
                                     yaxis1c,
                                     color=colourslist[(top6s.index(i) // 2)],
                                     linewidth=2,
                                     marker="s",
                                     markevery=None,
                                     zorder=int(kmercount[l][k][i]))
                        if i in colours[k]:
                            yaxis1b = makeyaxis1b(i, k)
                            top.plot(xaxis,
                                     yaxis1b,
                                     linewidth=1,
                                     marker=".",
                                     markevery=None,
                                     zorder=int(kmercount[l][k][i]))
                        if i not in top6s or colours[k]:
                            yaxis1a = makeyaxis1a(i, k)
                            top.plot(xaxis,
                                     yaxis1a,
                                     color='0.75',
                                     linestyle='--',
                                     linewidth=0.5,
                                     marker="x",
                                     markevery=None,
                                     alpha=0.5,
                                     zorder=0)
                except:
                    continue

        for l in range(1, (numofruns + 1)):
            for i in kmercount[l][k]:
                num = kmercount[l][k][i]
                rkmer = kmer2hash(revComp(hash2kmer(i, k)))
                try:
                    rnum = kmercount[l][k][rkmer]
                    if num > rnum:
                        if i in top6s:
                            yaxis2c = makeyaxis2c(i, k)
                            bottom.plot(xaxis,
                                        yaxis2c,
                                        color=colourslist[(top6s.index(i) //
                                                           2)],
                                        linewidth=2,
                                        marker="s",
                                        markevery=None,
                                        zorder=int(kmercount[l][k][i]))
                        if i in colours[k]:
                            yaxis2b = makeyaxis2b(i, k)
                            bottom.plot(xaxis,
                                        yaxis2b,
                                        linewidth=1,
                                        marker=".",
                                        markevery=None,
                                        zorder=int(kmercount[l][k][i]))
                        if i not in top6s or colours[k]:
                            yaxis2a = makeyaxis2a(i, k)
                            bottom.plot(xaxis,
                                        yaxis2a,
                                        color='0.75',
                                        linestyle='--',
                                        linewidth=0.5,
                                        marker="x",
                                        markevery=None,
                                        alpha=0.8,
                                        zorder=0)
                except:
                    continue

    if revcompwanted == True:
        for l in range(1, (numofruns + 1)):
            alreadytop = []
            for i in kmercount[l][k]:
                num = kmercount[l][k][i]
                rkmer = kmer2hash(revComp(hash2kmer(i, k)))
                try:
                    rnum = kmercount[l][k][rkmer]
                    if num >= rnum:
                        alreadytop.append(rkmer)
                        if i not in alreadytop:
                            if i in top6s:
                                yaxis1c = makeyaxis1c(i, k)
                                top.plot(xaxis,
                                         yaxis1c,
                                         color=colourslist[(top6s.index(i) //
                                                            2)],
                                         linewidth=2,
                                         marker="s",
                                         markevery=None,
                                         zorder=int(kmercount[l][k][i]))
                            if i in colours[k]:
                                yaxis1b = makeyaxis1b(i, k)
                                top.plot(xaxis,
                                         yaxis1b,
                                         linewidth=1,
                                         marker=".",
                                         markevery=None,
                                         zorder=int(kmercount[l][k][i]))
                            if i not in top6s or colours[k]:
                                yaxis1a = makeyaxis1a(i, k)
                                top.plot(xaxis,
                                         yaxis1a,
                                         color='0.75',
                                         linestyle='--',
                                         linewidth=0.5,
                                         marker="x",
                                         markevery=None,
                                         alpha=0.5,
                                         zorder=0)
                except:
                    continue

        for l in range(1, numofruns + 1):
            alreadybottom = []
            for i in kmercount[l][k]:
                num = kmercount[l][k][i]
                rkmer = kmer2hash(revComp(hash2kmer(i, k)))
                try:
                    rnum = kmercount[l][k][rkmer]
                    if num >= rnum:
                        alreadybottom.append(rkmer)
                        if i not in alreadybottom:
                            if i in top6s:
                                yaxis2c = makeyaxis2c(i, k)
                                bottom.plot(
                                    xaxis,
                                    yaxis2c,
                                    color=colourslist[(top6s.index(i) // 2)],
                                    linewidth=2,
                                    marker="s",
                                    markevery=None,
                                    zorder=int(kmercount[l][k][i]))
                            if i in colours[k]:
                                yaxis2b = makeyaxis2b(i, k)
                                bottom.plot(xaxis,
                                            yaxis2b,
                                            linewidth=1,
                                            marker=".",
                                            markevery=None,
                                            zorder=int(kmercount[l][k][i]))
                            if i not in top6s or colours[k]:
                                yaxis2a = makeyaxis2a(i, k)
                                bottom.plot(xaxis,
                                            yaxis2a,
                                            color='0.75',
                                            linestyle='--',
                                            linewidth=0.5,
                                            marker="x",
                                            markevery=None,
                                            alpha=0.8,
                                            zorder=0)
                except:
                    continue

    ymint, ymaxt = top.get_ylim()
    ypost = np.linspace(ymint, ymaxt, num=20, endpoint=True)

    yminb, ymaxb = bottom.get_ylim()
    yposb = np.linspace(yminb, ymaxb, num=20, endpoint=True)

    top6labels = []
    for n, i in enumerate(top6s[::2]):
        p = (n // 2)
        yaxis1c = makeyaxis1c(i, k)
        yaxis2c = makeyaxis2c(i, k)
        top.annotate((str(n + 1)), (xaxis[last], yaxis1c[last]),
                     (xaxis[last] + 0.2, ypost[-(n + 2)]),
                     size=10,
                     fontname='monospace',
                     weight='bold',
                     arrowprops=dict(color=colourslist[n],
                                     shrink=0.05,
                                     width=0.05,
                                     headwidth=0.4),
                     color=colourslist[n])
        top6labels.append(
            str(n + 1) + ". " + str(hash2kmer(i, k)) + " / " +
            revComp(str(hash2kmer(i, k))))
        bottom.annotate((str(n + 1) + ". " + str(hash2kmer(i, k))),
                        (xaxis[last], yaxis2c[last]),
                        (xaxis[last] + 0.3, yposb[-(n + 2)]),
                        size=10,
                        fontname='monospace',
                        weight='bold',
                        color=colourslist[n],
                        arrowprops=dict(color=colourslist[n],
                                        shrink=0.05,
                                        width=0.05,
                                        headwidth=0.4))

    for p, i in enumerate(top6s[::2]):
        dp = (p + 3)
        top.text(s=(str(top6labels[p])),
                 x=(numofruns - 0.5 + (startround - 1)),
                 y=(ypost[-dp]),
                 size=14,
                 fontname='monospace',
                 color=colourslist[p])

    bar.bar(xaxis, makeyaxis3(k))

    plt.savefig("figures/" + str(identifier) + "/kmer_frequency/kmerfreq_" +
                str(identifier) + "_" + str(k),
                dpi=600)
    plt.close()