Exemple #1
0
def top6split(run, x, k, n):
    values = [*khams[run][k][n][x].keys()]
    vals = []
    if n == 1:
        top6s = top6all[run][k].copy()
    else:
        top6s = top6all[run][k].copy()
        for j in top6s:
            if j in removelist[run][k][n]:
                top6s.remove(j)
                rj = kmer2hash(revComp(hash2kmer(j, k)))
                if rj in top6s:
                    top6s.remove(rj)

    for i in top6s[::2]:
        if i in values:
            vals.append(i)
            rk = kmer2hash(revComp(hash2kmer(i, k)))
            vals.append(rk)
            try:
                if rk not in khams[run][k][n][x]:
                    khams[run][k][n][x][rk] = 0
            except:
                continue
    return vals
Exemple #2
0
def scatter(run, k):
    top6(run, k)
    split = top6splitter(run, k)
    labels = []
    handels = []
    labelcolours = []
    #greys = [0.6, 0.75, 0.9]
    c = 0
    cc = 0

    for x in khams[run][k]:

        xaxis = xaxismaker(run, x, k)
        yaxis = yaxismaker(run, x, k)

        top6x = top6plotx(run, x, k)
        top6y = top6ploty(run, x, k)

        plt.scatter(xaxis, yaxis, color = '0.75', alpha=0.7, s=1)

        count = len(top6x)

        for i in range(0, count):
            plt.scatter(top6x[i], top6y[i], label=split[run][x][i], color=colours[c], s=3)
            labels.append((str(hash2kmer(split[run][x][i],k)+' '+str(khams[run][k][x][(split[run][x][i])]))))
            c += 1

        for p, j in enumerate(split[run][x]):

            xp = top6x[p]
            yp = top6y[p]

            if (p % 2)+x == x:
                plt.annotate(str(hash2kmer(j,k) + ' ' + str(khams[run][k][x][j])), (float(xp), float(yp)+0.6), color=colours[cc], fontsize=5)
            if (p % 2)+x != x:
                plt.annotate(str(hash2kmer(j,k) + ' ' + str(khams[run][k][x][j])), (float(xp), float(yp)-0.6), color=colours[cc], fontsize=5)
            cc += 1

    for z in colours[::2]:
        labelcolours.append(z)


    leg = plt.legend(labels[::2], fontsize=7)
    for i in range(0,6):
        leg.legendHandles[i].set_color(labelcolours[i])
        leg.legendHandles[i]._sizes = [8]

    plt.xlabel("Hamming distance")
    plt.ylabel("Kmer count")
    plt.title("Run number:"+' '+str(run)+'\n'+'K: '+str(k)+'\n'+'Total kmers: '+str(len(runlists[run][k])))
    #plt.show()
    plt.savefig('figures/hamdist_'+str(identifier)+"_"+str(run)+"_"+str(k), dpi=600)
    plt.close()
Exemple #3
0
def hammer(k):
    for _ in range(numofruns + 1):
        khams.append({})
    for j in range(1,len(kmercount)):
        khams[j][k] = {k: {} for k in range(k+1)}
        hconsensus = max(kmercount[j][k], key=lambda key: kmercount[j][k][key])
        consensus = hash2kmer(hconsensus, k)
        for x in list(kmercount[j][k].keys()):
            values = hash2kmer(x,k)
            rvalues = revComp(values)
            ham = hamming_distance(consensus, values)
            rham = hamming_distance(consensus, rvalues)
            if ham <= rham:
                khams[j][k][ham].update({x:kmercount[j][k][x]})
            if ham > rham:
                khams[j][k][rham].update({x:kmercount[j][k][x]})
    return khams
Exemple #4
0
def makeyaxis2c(i, k):
    yaxis2c = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    top6s = top6(k)
    for l in range(1, (len(kmercount))):
        total = sum(kmercount[l][k].values())
        if i in top6s:
            num = kmercount[l][k][i]
            rnum = kmercount[l][k][rkmer]
            yaxis2c.append((num + rnum) / total)
    return yaxis2c
Exemple #5
0
def makeyaxis2b(i, k):
    yaxis2b = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    colours = colours1(k)
    for l in range(1, (len(kmercount))):
        total = sum(kmercount[l][k].values())
        if i in colours:
            num = kmercount[l][k][i]
            rnum = kmercount[l][k][rkmer]
            yaxis2b.append((num + rnum) / total)
    return yaxis2b
Exemple #6
0
def makeyaxis1c(i, k):
    yaxis1c = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    top6s = top6(k)
    for l in range(1, (len(kmercount))):
        total = sum(kmercount[l][k].values())
        if i in top6s:
            num = kmercount[l][k][i]
            rnum = kmercount[l][k][rkmer]
            f = (num + rnum) / total
            freq = (f / (1 - f))
            yaxis1c.append(math.log10(freq))
    return yaxis1c
Exemple #7
0
def hammer():
    for _ in range(numofruns + 1):
        khams.append({})
    for run in range(1, numofruns + 1):
        for k in range(mink, maxk + 1):
            khams[run][k] = {}
            for n in range(1, nmotifs + 1):
                khams[run][k][n] = {k: {} for k in range(k + 1)}
                #print("run "+str(run)+" k "+str(k))
                #print(khams)
                if n == 1:
                    hconsensus = max(kmercount[run][k],
                                     key=lambda key: kmercount[run][k][key])
                else:
                    temptop6 = top6all[run][k].copy()
                    for j in temptop6:
                        if j in removelist[run][k][n]:
                            temptop6.remove(j)
                            if kmer2hash(revComp(hash2kmer(j, k))) in temptop6:
                                temptop6.remove(
                                    kmer2hash(revComp(hash2kmer(j, k))))
                    hconsensus = max(temptop6,
                                     key=lambda key: kmercount[run][k][key])

                consensus = hash2kmer(hconsensus, k)
                #print("Consensus: "+str(consensus))
                for x in list(kmercount[run][k].keys()):
                    if x not in removelist[run][k][n]:
                        values = hash2kmer(x, k)
                        rvalues = revComp(values)
                        ham = hamming_distance(consensus, values)
                        rham = hamming_distance(consensus, rvalues)
                        if ham <= rham:
                            khams[run][k][n][ham].update(
                                {x: kmercount[run][k][x]})
                        if ham > rham:
                            khams[run][k][n][rham].update(
                                {x: kmercount[run][k][x]})
    return khams
Exemple #8
0
def top6(run, k):
    top6s = []
    keys = list(kmercount[run][k].keys())
    topvalpos = 0
    while len(top6s) <= 11:
        next = keys[topvalpos]
        nkmer = hash2kmer(next, k)
        nrkmer = revComp(nkmer)
        nhrkmer = kmer2hash(nrkmer)
        if next not in top6s:
            top6s.append(next)
        if nhrkmer not in top6s:
            top6s.append(nhrkmer)

        topvalpos += 1
    return top6s
Exemple #9
0
def makeyaxis2b(i, k):
    yaxis2b = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    for l in range(1, numofruns + 1):
        total = totaldict[l][k]
        if i in colours[k]:
            try:
                num = kmercount[l][k][i]
            except:
                num = 0
            try:
                rnum = kmercount[l][k][rkmer]
            except:
                rnum = 0
            yaxis2b.append((num + rnum) / total)
    return yaxis2b
Exemple #10
0
def makeyaxis2c(i, k):
    yaxis2c = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    top6s = top6all[numofruns][k]
    for l in range(1, numofruns + 1):
        total = totaldict[l][k]
        if i in top6s:
            try:
                num = kmercount[l][k][i]
            except:
                num = 0
            try:
                rnum = kmercount[l][k][rkmer]
            except:
                rnum = 0
            yaxis2c.append((num + rnum) / total)
    return yaxis2c
Exemple #11
0
def makeyaxis1b(i, k):
    yaxis1b = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    for l in range(1, numofruns + 1):
        total = totaldict[l][k]
        if i in colours[k]:
            try:
                num = kmercount[l][k][i]
            except:
                num = 0
            try:
                rnum = kmercount[l][k][rkmer]
            except:
                rnum = 0
            f = (num + rnum) / total
            freq = (f / (1 - f))
            yaxis1b.append(math.log10(freq))
    return yaxis1b
Exemple #12
0
def makeyaxis1c(i, k):
    yaxis1c = ([])
    rkmer = kmer2hash(revComp(hash2kmer(i, k)))
    top6s = top6all[numofruns][k]
    for l in range(1, numofruns + 1):
        total = totaldict[l][k]
        if i in top6s:
            try:
                num = kmercount[l][k][i]
            except:
                num = 0
            try:
                rnum = kmercount[l][k][rkmer]
            except:
                rnum = 0
            f = (num + rnum) / total
            freq = (f / (1 - f))
            yaxis1c.append(math.log10(freq))
    return yaxis1c
Exemple #13
0
def xaxismaker(run, p, k, n):
    x = []
    keys = [*khams[run][k][n][p].keys()]
    if n == 1:
        top6s = top6all[run][k].copy()
    else:
        top6s = top6all[run][k].copy()
        for j in top6s:
            if j in removelist[run][k][n]:
                top6s.remove(j)
                rj = kmer2hash(revComp(hash2kmer(j, k)))
                if rj in top6s:
                    top6s.remove(rj)
    for i in top6s:
        if i in keys:
            keys.remove(i)
    size = len(keys)
    for _ in range(size):
        nval = float(p) + random.uniform(-0.3, 0.3)
        x.append(nval)
    return x
Exemple #14
0
def yaxismaker(run, x, k, n):
    y = []
    values = [*khams[run][k][n][x].keys()]
    if n == 1:
        top6s = top6all[run][k].copy()
    else:
        top6s = top6all[run][k].copy()
        for j in top6s:
            if j in removelist[run][k][n]:
                top6s.remove(j)
                rj = kmer2hash(revComp(hash2kmer(j, k)))
                if rj in top6s:
                    top6s.remove(rj)
    for i in top6s:
        if i in values:
            values.remove(i)
    for i in values:
        val = khams[run][k][n][x][i]
        nval = float(val) + random.uniform(-0.1, 0.1)
        y.append(nval)
    return y
Exemple #15
0
def top6plotx(run, p, k, n):
    x = []
    keys = [*khams[run][k][n][p].keys()]
    vals = []
    if n == 1:
        top6s = top6all[run][k].copy()
    else:
        top6s = top6all[run][k].copy()
        for j in top6s:
            if j in removelist[run][k][n]:
                top6s.remove(j)
                rj = kmer2hash(revComp(hash2kmer(j, k)))
                if rj in top6s:
                    top6s.remove(rj)
    #print("TOP6Sx")
    #print(top6s)
    for i in top6s:
        if i in keys:
            vals.append(i)
    for _ in range(len(vals)):
        nval = float(p) + random.uniform(-0.3, 0.3)
        x.append(nval)
    return x
Exemple #16
0
def grapher(k):

    xaxis = makexaxis()
    last = (len(xaxis) - 1)
    top6s = top6all[numofruns][k]

    fig = plt.figure(figsize=(10, 10))
    grid = plt.GridSpec(2, 3, wspace=0.4, hspace=0.3)

    top = fig.add_subplot(grid[:-1, :])
    top.set_xlabel("SELEX round")
    top.set_ylabel("log(f/(1-f))")
    top.set_title("Kmer frequency" + ', K: ' + str(k))
    top.set_xlim([((startround - 1) - 0.5), (numofruns + startround)])
    top.set_xticks(
        np.linspace((startround - 1) - 0.5, (numofruns + startround),
                    num=((2 * (numofruns + 1)) + 2),
                    endpoint=True))

    bottom = fig.add_subplot(grid[-1, :-1])
    bottom.set_xlabel("SELEX round")
    bottom.set_ylabel("f = (kmer/total)")
    bottom.set_title("Kmer frequency")
    bottom.set_xlim([((startround - 1) - 0.5), (numofruns + (startround - 1))])
    bottom.set_xticks(range((startround - 1), (numofruns + 1)))

    bar = fig.add_subplot(grid[-1, -1:])
    bar.set_xlabel("SELEX round")
    bar.set_ylabel("Total kmers")
    bar.set_title("Kmer total distribution")
    bar.set_xticks(makexaxis())

    colourslist = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7']

    if revcompwanted == False:
        for l in range(1, (numofruns + 1)):
            for i in kmercount[l][k]:
                num = kmercount[l][k][i]
                rkmer = kmer2hash(revComp(hash2kmer(i, k)))
                try:
                    rnum = kmercount[l][k][rkmer]
                    if num > rnum:
                        if i in top6s:
                            yaxis1c = makeyaxis1c(i, k)
                            top.plot(xaxis,
                                     yaxis1c,
                                     color=colourslist[(top6s.index(i) // 2)],
                                     linewidth=2,
                                     marker="s",
                                     markevery=None,
                                     zorder=int(kmercount[l][k][i]))
                        if i in colours[k]:
                            yaxis1b = makeyaxis1b(i, k)
                            top.plot(xaxis,
                                     yaxis1b,
                                     linewidth=1,
                                     marker=".",
                                     markevery=None,
                                     zorder=int(kmercount[l][k][i]))
                        if i not in top6s or colours[k]:
                            yaxis1a = makeyaxis1a(i, k)
                            top.plot(xaxis,
                                     yaxis1a,
                                     color='0.75',
                                     linestyle='--',
                                     linewidth=0.5,
                                     marker="x",
                                     markevery=None,
                                     alpha=0.5,
                                     zorder=0)
                except:
                    continue

        for l in range(1, (numofruns + 1)):
            for i in kmercount[l][k]:
                num = kmercount[l][k][i]
                rkmer = kmer2hash(revComp(hash2kmer(i, k)))
                try:
                    rnum = kmercount[l][k][rkmer]
                    if num > rnum:
                        if i in top6s:
                            yaxis2c = makeyaxis2c(i, k)
                            bottom.plot(xaxis,
                                        yaxis2c,
                                        color=colourslist[(top6s.index(i) //
                                                           2)],
                                        linewidth=2,
                                        marker="s",
                                        markevery=None,
                                        zorder=int(kmercount[l][k][i]))
                        if i in colours[k]:
                            yaxis2b = makeyaxis2b(i, k)
                            bottom.plot(xaxis,
                                        yaxis2b,
                                        linewidth=1,
                                        marker=".",
                                        markevery=None,
                                        zorder=int(kmercount[l][k][i]))
                        if i not in top6s or colours[k]:
                            yaxis2a = makeyaxis2a(i, k)
                            bottom.plot(xaxis,
                                        yaxis2a,
                                        color='0.75',
                                        linestyle='--',
                                        linewidth=0.5,
                                        marker="x",
                                        markevery=None,
                                        alpha=0.8,
                                        zorder=0)
                except:
                    continue

    if revcompwanted == True:
        for l in range(1, (numofruns + 1)):
            alreadytop = []
            for i in kmercount[l][k]:
                num = kmercount[l][k][i]
                rkmer = kmer2hash(revComp(hash2kmer(i, k)))
                try:
                    rnum = kmercount[l][k][rkmer]
                    if num >= rnum:
                        alreadytop.append(rkmer)
                        if i not in alreadytop:
                            if i in top6s:
                                yaxis1c = makeyaxis1c(i, k)
                                top.plot(xaxis,
                                         yaxis1c,
                                         color=colourslist[(top6s.index(i) //
                                                            2)],
                                         linewidth=2,
                                         marker="s",
                                         markevery=None,
                                         zorder=int(kmercount[l][k][i]))
                            if i in colours[k]:
                                yaxis1b = makeyaxis1b(i, k)
                                top.plot(xaxis,
                                         yaxis1b,
                                         linewidth=1,
                                         marker=".",
                                         markevery=None,
                                         zorder=int(kmercount[l][k][i]))
                            if i not in top6s or colours[k]:
                                yaxis1a = makeyaxis1a(i, k)
                                top.plot(xaxis,
                                         yaxis1a,
                                         color='0.75',
                                         linestyle='--',
                                         linewidth=0.5,
                                         marker="x",
                                         markevery=None,
                                         alpha=0.5,
                                         zorder=0)
                except:
                    continue

        for l in range(1, numofruns + 1):
            alreadybottom = []
            for i in kmercount[l][k]:
                num = kmercount[l][k][i]
                rkmer = kmer2hash(revComp(hash2kmer(i, k)))
                try:
                    rnum = kmercount[l][k][rkmer]
                    if num >= rnum:
                        alreadybottom.append(rkmer)
                        if i not in alreadybottom:
                            if i in top6s:
                                yaxis2c = makeyaxis2c(i, k)
                                bottom.plot(
                                    xaxis,
                                    yaxis2c,
                                    color=colourslist[(top6s.index(i) // 2)],
                                    linewidth=2,
                                    marker="s",
                                    markevery=None,
                                    zorder=int(kmercount[l][k][i]))
                            if i in colours[k]:
                                yaxis2b = makeyaxis2b(i, k)
                                bottom.plot(xaxis,
                                            yaxis2b,
                                            linewidth=1,
                                            marker=".",
                                            markevery=None,
                                            zorder=int(kmercount[l][k][i]))
                            if i not in top6s or colours[k]:
                                yaxis2a = makeyaxis2a(i, k)
                                bottom.plot(xaxis,
                                            yaxis2a,
                                            color='0.75',
                                            linestyle='--',
                                            linewidth=0.5,
                                            marker="x",
                                            markevery=None,
                                            alpha=0.8,
                                            zorder=0)
                except:
                    continue

    ymint, ymaxt = top.get_ylim()
    ypost = np.linspace(ymint, ymaxt, num=20, endpoint=True)

    yminb, ymaxb = bottom.get_ylim()
    yposb = np.linspace(yminb, ymaxb, num=20, endpoint=True)

    top6labels = []
    for n, i in enumerate(top6s[::2]):
        p = (n // 2)
        yaxis1c = makeyaxis1c(i, k)
        yaxis2c = makeyaxis2c(i, k)
        top.annotate((str(n + 1)), (xaxis[last], yaxis1c[last]),
                     (xaxis[last] + 0.2, ypost[-(n + 2)]),
                     size=10,
                     fontname='monospace',
                     weight='bold',
                     arrowprops=dict(color=colourslist[n],
                                     shrink=0.05,
                                     width=0.05,
                                     headwidth=0.4),
                     color=colourslist[n])
        top6labels.append(
            str(n + 1) + ". " + str(hash2kmer(i, k)) + " / " +
            revComp(str(hash2kmer(i, k))))
        bottom.annotate((str(n + 1) + ". " + str(hash2kmer(i, k))),
                        (xaxis[last], yaxis2c[last]),
                        (xaxis[last] + 0.3, yposb[-(n + 2)]),
                        size=10,
                        fontname='monospace',
                        weight='bold',
                        color=colourslist[n],
                        arrowprops=dict(color=colourslist[n],
                                        shrink=0.05,
                                        width=0.05,
                                        headwidth=0.4))

    for p, i in enumerate(top6s[::2]):
        dp = (p + 3)
        top.text(s=(str(top6labels[p])),
                 x=(numofruns - 0.5 + (startround - 1)),
                 y=(ypost[-dp]),
                 size=14,
                 fontname='monospace',
                 color=colourslist[p])

    bar.bar(xaxis, makeyaxis3(k))

    plt.savefig("figures/" + str(identifier) + "/kmer_frequency/kmerfreq_" +
                str(identifier) + "_" + str(k),
                dpi=600)
    plt.close()
Exemple #17
0
def scatter(run, k, n):
    #top6(run, k)
    split = top6splitter(run, k, n)
    #print("split")
    #print(split)
    labels = []
    handels = []

    c = 0
    cc = 0
    texts = []

    for x in khams[run][k][n]:

        xaxis = xaxismaker(run, x, k, n)
        yaxis = yaxismaker(run, x, k, n)

        top6x = top6plotx(run, x, k, n)
        #print("top6x")
        #print(top6x)
        top6y = top6ploty(run, x, k, n)
        #print("top6y")
        #print(top6y)

        plt.scatter(xaxis, yaxis, color='0.75', alpha=0.7, s=1)

        count = len(top6x)

        for i in range(0, count):

            plt.scatter(top6x[i],
                        top6y[i],
                        label=split[run][x][i],
                        color=colours[c],
                        s=3)
            labels.append((str(
                hash2kmer(split[run][x][i], k) + ' ' +
                str(khams[run][k][n][x][(split[run][x][i])]))))
            c += 1

        for p, j in enumerate(split[run][x]):

            xp = top6x[p]
            yp = top6y[p]

            texts.append(
                plt.text(
                    float(xp),
                    float(yp - 0.3),
                    str(hash2kmer(j, k) + ' ' + str(khams[run][k][n][x][j])),
                    color=colours[cc],
                    fontsize=5))

            cc += 1

    adjust_text(texts, lw=0.5)

    leg = plt.legend(labels[::2], fontsize=7)
    if n == 1:
        top6s = top6all[run][k].copy()
    else:
        top6s = top6all[run][k].copy()
        for j in top6s:
            if j in removelist[run][k][n]:
                top6s.remove(j)
                rj = kmer2hash(revComp(hash2kmer(j, k)))
                if rj in top6s:
                    top6s.remove(rj)

    for i in range(0, (len(top6s) // 2)):

        leg.legendHandles[i].set_color(labelcolours[i])
        leg.legendHandles[i]._sizes = [8]

    plt.xticks(np.arange(0, len(khams[run][k][n]), step=1))

    plt.xlabel("Hamming distance")
    plt.ylabel("Kmer count")
    if n == 1:
        plt.title("Run number:" + ' ' + str(run +
                                            (startround - 1)) + '\n' + 'K: ' +
                  str(k) + '\n' + 'Total kmers: ' + str(totaldict[run][k]))
    else:
        plt.title("Run number:" + ' ' + str(run + (startround - 1)) + '\n' +
                  'K: ' + str(k) + '\n' + 'Total kmers: ' +
                  str(totaldict[run][k] - removedkmers[run][k][n]))

    plt.savefig("figures/" + str(identifier) + "/hamming_distance/hamdist_" +
                str(identifier) + "_" + str(run + (startround - 1)) + "_" +
                str(k) + "_" + str(n),
                dpi=600)
    plt.close()